I'm currently trying to use some images from the Sun dataset, with varying shapes, around (1000, 400, 1). Since they are varying in shape, my approach to it was to create a numpy array with numpy arrays in it, so that I dont have to define any shape of it. What I want to do is to train a basic CNN using these pictures. The problem is, I dont think my CNN understands how my input data is defined really. In my implementation, self.X_train[0] for example contains one image (with correspondsing target in self.Y_train[0] and so on). My code right now is looking like:
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
class network:
def __init__(self):
self.X_train, self.Y_train = self.generate_targets()
def generate_targets(self):
path = 'C:\\Users\\joaki\\PycharmProjects\\project\\project dl\\'
folder = os.fsencode(path)
targets = []
inputs = []
for file in os.listdir(folder):
filename = os.fsdecode(file)
if filename.endswith(('.jpg')):
img ='RGB')
img2 ='L')
arr2 = np.array(img2)
arr2 = arr2.reshape((arr2.shape[0], arr2.shape[1], 1))
arr = np.array(img)
Y = np.array(targets)
X = np.array(inputs)
return X, Y
def plotting(self, type):
plt.figure(figsize=(20, 10))
for i in range(self.X_train.shape[0]):
plt.subplot(2, 2, i+1)
if type == 'targets':
lum_img = self.Y_train[i][:, :, :] #[:,:,:] för färg
if type == 'inputs':
lum_img = self.X_train[i][:, :, 0] # [:,:,:] för färg
def train_network(self):
model = Sequential()
# add model layers
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape = (None, None, 1)))
model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']), self.Y_train, batch_size = 1, validation_data=(self.X_train, self.Y_train), epochs=3)
network1 = network()
Is there solution to this problem, if so, can someone provide information or a source that I should follow? Thanks in advance!


Keras to ONNX model export for Barracuda in Unity3D

this is my neural network model:
The input is an example of 10000 features. Each feature is a number (0 or 1).
The output is a number between 0 and 1.
from tensorflow.keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
for j in sequence:
results[i, j] = 1.
return results
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
y_train = np.asarray(train_labels).astype("float32")
y_test = np.asarray(test_labels).astype("float32")
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
layers.Dense(16, activation="relu"),
layers.Dense(16, activation="relu"),
layers.Dense(1, activation="sigmoid")
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:],
validation_data=(x_val, y_val))
I exported the model in this way:
import tensorflow as tf
spec = (tf.TensorSpec(model.inputs[0].shape, tf.float32, name="my input"),)
nchw_inputs_list = [model.inputs[0].name]
import tf2onnx
model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, custom_ops=None, opset=9, inputs_as_nchw=nchw_inputs_list, output_path="example.onnx")
And when I import it in Unity:
Can someone please help me to export a simple model from Keras to ONNX and import it in the right way in Unity3D?
Thank you.

How to test my own image on a MNIST trained network?

This is my first time trying to train a network and use PyTorch, so please forgive me if this is considered simple.
I have a pretrained AlexNet network that was modified to classify 3 classes, which I've already trained on MNIST that I mapped to 3 different labels.
class Net( nn.Module ) :
def __init__( self ) :
super( Net, self ).__init__()
self.model = models.alexnet( pretrained = True )
# changed in_channels from 3 to 1 bc images are black and white
self.model.features[ 0 ] = nn.Conv2d( 1, 64, kernel_size = 11, stride = 4, padding = 2 )
# binary classifier -> 3 out_features
self.model.classifier[ 4 ] = nn.Linear( 4096, 1024 )
self.model.classifier[ 6 ] = nn.Linear( 1024, 3 )
def forward( self, x ):
return self.model( x )
model = Net().to( device )
I want to test this on a single .png image that I drew, which is already 255x255, and in black and white. I would like the predicted label. This is the code I have so far for preprocessing the image:
from PIL import Image
import matplotlib.pyplot as plt
import cv2
image_8 = "eight.png" ).convert('L')
image_8 = list( image_8.getdata())
normalized_8 = [(255 - x) * 1.0 / 255.0 for x in image_8 ]
tensor_8 = torch.FloatTensor( normalized_8 )
pred = model( tensor_8 )
from which I got the following error: Expected 4-dimensional input for 4-dimensional weight [64, 1, 11, 11], but got 1-dimensional input of size [50176] instead. So this is clearly the wrong way to do things, but I'm not sure how to proceed.
Change your inference code to the following. Images are not intended to be flattened into 1d.
import matplotlib.pyplot as plt
import cv2
image_8 = cv2.imread("eight.png")
# following line may or may not be necessary
image_8 = cv2.cvtColor(image_8, cv2.COLOR_BGR2GRAY)
# you can divide numpy arrays by a constant natively
image_8 /= 255.
# This makes a 4d tensor (batched image) with shape [1, channels, width, height]
image_8 = torch.Tensor(tensor_8).unsqueeze(axis=0)
pred = model(image_8)
If the image is still 3d (shape of [1, width, height]), add a second .unsqueeze(axis=0).

Seemingly inconsistent tensor sizes in pytorch

I'm building a convolutional autoencoder, but want the encoding to be in a linear form so I can more easily feed it as input into an MLP. I have two convolutional layers on the encoder along with a linear inner layer to reduce dimension. This encoding is then fed into the corresponding decoder.
When I flatten the output of the second convolutional layer, based on my calculation (using the standard formula: Calculate the Output size in Convolution layer) should come out to a 1x100352 rank 1 tensor. However, when I set the input dimension of the linear layer to be 100352, the flattened rank 1 tensor has dimension 1x50176. Then comes the weird part.
I tried changing the input dimension of the linear layer to be 50176, assuming I had miscalculated. When I do this, the reshaped rank 1 tensor confusingly becomes 1x100352, and then the aforementioned weight matrix becomes 50176x256 as expected.
This response to modifying the linear layer's input dimension doesn't make sense to me. That hyperparameter controls the weight matrix correctly, but I guess I'm uncertain why it has any bearing on the linear layer's input since that's just a reshaped tensor output from a convolutional layer whose hyperparameters are unrelated to the hyperparameter in question.
I apologize if I'm just missing something obvious. I'm very new to pytorch, and I couldn't find any other posts which discussed this sort of issue.
Here's what I believe to be the minimal reproducible example:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
from import DataLoader
from torchvision.utils import save_image
class convAutoEncoder(nn.Module):
def __init__(self,**kwargs):
#Creating network structure
#Encoder portion of autoencoder
self.enc1 = nn.Conv2d(in_channels = kwargs["inputChannels"], out_channels = kwargs["channelsEncoderMid"], kernel_size = kwargs["kernelSize"])
self.enc2 = nn.Conv2d(in_channels = kwargs["channelsEncoderMid"], out_channels = kwargs["channelsEncoderInner"], kernel_size = kwargs["kernelSize"])
self.enc3 = nn.Linear(in_features = kwargs["intoLinear"], out_features = kwargs["linearEncoded"])
#Decoder portion of autoencoder
self.dec1 = nn.Linear(in_features = kwargs["linearEncoded"], out_features = kwargs["intoLinear"])
self.dec2 = nn.ConvTranspose2d(in_channels = kwargs["channelsEncoderInner"], out_channels = kwargs["channelsDecoderMid"], kernel_size = kwargs["kernelSize"])
self.dec3 = nn.ConvTranspose2d(in_channels = kwargs["channelsDecoderMid"], out_channels = kwargs["inputChannels"], kernel_size = kwargs["kernelSize"])
def forward(self,x):
x = F.relu(self.enc1(x))
x = F.relu(self.enc2(x))
x = x.reshape(1,-1)
x = x.squeeze()
x = F.relu(self.enc3(x))
x = F.relu(self.dec1(x))
x = x.reshape([32,4,28,28])
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
return x
def encodeDecodeConv(numEpochs = 20, input_Channels = 3, batchSize = 32,
channels_Encoder_Inner = 4, channels_Encoder_Mid = 8, into_Linear = 100352,
linear_Encoded = 256, channels_Decoder_Mid = 8, kernel_Size = 3,
learningRate = 1e-3):
#Pick a device. If GPU available, use that. Otherwise, use CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define data transforms
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#Define training dataset
trainSet = datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
#Define testing dataset
testSet = datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
#Define data loaders
trainLoader = DataLoader(trainSet, batch_size = batchSize, shuffle = True)
testLoader = DataLoader(testSet, batch_size = batchSize, shuffle = True)
#Initialize neural network
model = convAutoEncoder(inputChannels = input_Channels, channelsEncoderMid = channels_Encoder_Mid, channelsEncoderInner = channels_Encoder_Inner, intoLinear = into_Linear, linearEncoded = linear_Encoded, channelsDecoderMid = channels_Decoder_Mid, kernelSize = kernel_Size)
#Optimization setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr = learningRate)
lossTracker = []
for epoch in range(numEpochs):
loss = 0
for data,_ in trainLoader:
data =
outputs = model(data)
train_loss = criterion(outputs,data)
loss += train_loss.item()
loss = loss/len(trainLoader)
print('Epoch {} of {}, Train loss: {:.3f}'.format(epoch+1,numEpochs,loss))
Edit2: Somewhere in the CIFAR10 dataset, the data appears to change dimension. After playing around with print statements more, I discovered that setting the relevant hyperparameter to 100352 works great for many entries, but then seemingly one image pops up that has a different size. Not sure why that would occur, though.

plot Roc curve using keras

I have a neural network model and I am using KerasClassifier and then using KFold for cross-validation. Now I am having issues in plotting the ROC curve. I have tried few codes but most of them is giving me an error of multi-labeled is not interpreted. I have the following code till my neural network produces the accuracy. I will be thankful if anyone can help me with the later part of the code.
import numpy as np
import pandas as pd
from keras.layers import Dense, Input
from keras.models import Model, Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import LabelEncoder, MinMaxScaler,StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
seed = 7
dataset = pd.read_csv('lukemia_2003.csv')
X_train = dataset.values[:,0:12600]
Y_train = dataset.values[:,12600]
scalar = MinMaxScaler()
scaled_data = scalar.fit_transform(X_train)
pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(scaled_data)
encoder = LabelEncoder()
encoded_Y = encoder.transform(Y_train)
dummy_Y = np_utils.to_categorical(encoded_Y)
hid_layer1 = 4
hid_layer2 = 4
output_layer = 4
def my_model():
encoded = Sequential()
encoded.add(Dense(hid_layer1, input_dim = 10, activation='tanh'))
encoded.add(Dense(hid_layer2, activation='tanh'))
encoded.add(Dense(output_layer, activation='softmax'))
encoded.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return encoded
result_mean_list = []
std_list = []
for i in range(30):
estimator = KerasClassifier(build_fn=my_model, epochs=1500, batch_size=5, verbose=2)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X_train_pca, dummy_Y, cv=kfold)
print ("Result mean: ", result_mean_list)
print ("Standard Deviation List: ", std_list)
Here is the link to the dataset.
For your case since your target is multiclass, you cannot use ROC for the evaluation of the classifier. In a case that there was a binary classifier this link shows how to draw an ROC curve.

How to use keras ImageDataGenerator with a Siamese or Tripple networks

I'm trying to build up both a Siamese neural network and triple neural network on a custom large dataset
Keras has ImageDataGenerator which makes the generation of input data to a regular neural network very easy.
I'm interesting to use ImageDataGenerator or similar ways in order to train a networks with 2(siamese) and 3(triple) inputs.
In mniset keras siamese example, The input generated by a pre-process stage which is done by create_pairs method. I don't think this kind of way fit for a large dataset.
Is it possible to use ImageDataGenerator in this case? What are my other options assuming the data-set is very big?
The idea of DataGenerators is to give fit_generator a stream of data in batches.. hence giving control to you how you want to produce the data, ie whether you load from files or you do some data augmentation like what is done in ImageDataGenerator.
Here I posting the modified version of mniset siamese example with custom DataGenerator, you can work it out from here.
import numpy as np
np.random.seed(1337) # for reproducibility
import random
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda
from keras.optimizers import SGD, RMSprop
from keras import backend as K
class DataGenerator(object):
"""docstring for DataGenerator"""
def __init__(self, batch_sz):
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
self.tr_pairs, self.tr_y = self.create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
self.te_pairs, self.te_y = self.create_pairs(X_test, digit_indices)
self.tr_pairs_0 = self.tr_pairs[:, 0]
self.tr_pairs_1 = self.tr_pairs[:, 1]
self.te_pairs_0 = self.te_pairs[:, 0]
self.te_pairs_1 = self.te_pairs[:, 1]
self.batch_sz = batch_sz
self.samples_per_train = (self.tr_pairs.shape[0]/self.batch_sz)*self.batch_sz
self.samples_per_val = (self.te_pairs.shape[0]/self.batch_sz)*self.batch_sz
def create_pairs(self, x, digit_indices):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def next_train(self):
while 1:
self.cur_train_index += self.batch_sz
if self.cur_train_index >= self.samples_per_train:
yield ([ self.tr_pairs_0[self.cur_train_index:self.cur_train_index+self.batch_sz],
def next_val(self):
while 1:
self.cur_val_index += self.batch_sz
if self.cur_val_index >= self.samples_per_val:
yield ([ self.te_pairs_0[self.cur_val_index:self.cur_val_index+self.batch_sz],
def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
def contrastive_loss(y_true, y_pred):
'''Contrastive loss from Hadsell-et-al.'06
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
def create_base_network(input_dim):
'''Base network to be shared (eq. to feature extraction).
seq = Sequential()
seq.add(Dense(128, input_shape=(input_dim,), activation='relu'))
seq.add(Dense(128, activation='relu'))
seq.add(Dense(128, activation='relu'))
return seq
def compute_accuracy(predictions, labels):
'''Compute classification accuracy with a fixed threshold on distances.
return labels[predictions.ravel() < 0.5].mean()
input_dim = 784
nb_epoch = 20
datagen = DataGenerator(batch_size)
# network definition
base_network = create_base_network(input_dim)
input_a = Input(shape=(input_dim,))
input_b = Input(shape=(input_dim,))
# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
model = Model(input=[input_a, input_b], output=distance)
# train
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)
model.fit_generator(generator=datagen.next_train(), samples_per_epoch=datagen.samples_per_train, nb_epoch=nb_epoch, validation_data=datagen.next_val(), nb_val_samples=datagen.samples_per_val)