How can I get every layers output value with keras? - neural-network

I want use keras Lstm to get the time series features, then use the features to Kmeans. But now I can not get the layers output values. How can I get the layers output values?
This is my lstm network
Layer (type) Output Shape Param #
lstm_66 (LSTM) (None, None, 50) 10400
lstm_67 (LSTM) (None, 100) 60400
dense_19 (Dense) (None, 1) 101
activation_19 (Activation) (None, 1) 0
I want to get the lstm_67 output values,my code is:
import keras.backend as K
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
sess = tf.Session()
sess.run(tf.global_variables_initializer())
import numpy as np
statesAll=[]
layers = model.layers
print layers[1].output,type(layers[1].output[1]),sess.run(layers[1].output)
and the result is:
Tensor("lstm_61/TensorArrayReadV3:0", shape=(?, 100), dtype=float32)
So, how can I get the layers output value?
Thanks!
But it not work,my code is:
def load_data(file_name, sequence_length=10, split=0.8):
df = pd.read_csv(file_name, sep=',', usecols=[1])
data_all = np.array(df).astype(float)
scaler = MinMaxScaler()
data_all = scaler.fit_transform(data_all)
data = []
print len(data_all)
for i in range(len(data_all) - sequence_length - 1):
data.append(data_all[i: i + sequence_length + 1])
reshaped_data = np.array(data).astype('float64')
np.random.shuffle(reshaped_data)
x = reshaped_data[:, :-1]
y = reshaped_data[:, -1]
split_boundary = int(reshaped_data.shape[0] * split)
train_x = x[: split_boundary]
test_x = x[split_boundary:]
train_y = y[: split_boundary]
test_y = y[split_boundary:]
return train_x, train_y, test_x, test_y, scaler
def build_model(n_samples, time_steps, input_dim):
model = Sequential()
model.add(LSTM(input_dim=1, output_dim=50,return_sequences=True))
model.add(LSTM(100, return_sequences=False))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))
model.compile(loss='mse', optimizer='rmsprop')
print(model.layers)
return model
def train_model(train_x, train_y, test_x, test_y):
model = build_model()
model.fit(train_x, train_y, batch_size=128, nb_epoch=30,validation_split=0.1)
return model
train_x, train_y, test_x, test_y, scaler = load_data(file path)
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
model = train_model(train_x, train_y, test_x, test_y)
from keras import backend as K
layers = model.layers
K.eval(layers[1].output)

In TensorFlow 2.x, you can do like this:
from tensorflow.python.keras import backend as K
model = build_model()
# lstm_67 is the second layer.
lstm = K.function([model.layers[0].input], [model.layers[1].output])
lstm_output = lstm([test_x])[0]

keras.backend.eval() should do.
Look at the documentation here and here

First of all, this is a tensor, you need to use the tf. Print () method to see the specific value. If you use Spyder, you will not see this information in the console. You need to execute this program in the command line.

Related

How to solve "TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple"?

I'm trying to run the DeepCrack model, a CNN to find a pavement crack detection model. But I'm getting this error. I found the following error. I understood that the problem is in Outputs = model(image) here because my model is returning a tuple here instead of tensors. I tried to convert the output to tensor but it is not working. So, How can I solve this? I added my full code here. please help me to get rid of this.
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# License: BSD
# Author: Sasank Chilamkurthy
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import time
import os
import copy
cudnn.benchmark = True
plt.ion() # interactive mode
from random import *
from tqdm.notebook import tqdm, trange
from time import sleep
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from warnings import filterwarnings
filterwarnings('ignore')
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
plt.savefig('labels.JPG')
## codes for data augmentation
train_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
transforms.ToTensor(),
transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
])
test_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
transforms.ToTensor(),
transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
])
## Load data
from torchvision.datasets import ImageFolder
data = ImageFolder('../Data/Data_Structure(Annotated)', transform=train_trans , )
test_folder= ImageFolder("../Data/DATA_iPhone_13_Pro_Max", transform=test_trans, )
batch_size = 32
num_classes = 4
learning_rate = 0.01
num_epochs = 10
print("Follwing classes are there : \n",data.classes)
classes = ('Alligator Cracks', 'Delamination', 'Longitudinal Cracks', 'Transverse Cracks')
len(data)
##Splitting Data and Prepare Batches:
## Source: https://medium.com/thecyphy/train-cnn-model-with-pytorch-21dafb918f48
val_size = 127 ## Tamim:30% data for validation ##
train_size = len(data) - val_size
train_loader,val_loader = random_split(data,[train_size,val_size]) ## To randomly split the images into training and testing, PyTorch provides random_split()
print(f"Length of Train Data : {len(train_loader)}") ## changed the folder names
print(f"Length of Validation Data : {len(val_loader)}")
# Splitting train and validation data on batches
train_loader = torch.utils.data.DataLoader(train_loader, shuffle=True, batch_size=batch_size) ## defined train data & val data
val_loader = torch.utils.data.DataLoader(val_loader, shuffle=True, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_folder, shuffle=False, batch_size=batch_size)
# visualize images of a single batch
dataiter = iter(train_loader)
images, labels = next(dataiter)
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# print(model)
from torch import nn
import torch
import torch.nn.functional as F
def Conv3X3(in_, out):
return torch.nn.Conv2d(in_, out, 3, padding=1)
class ConvRelu(nn.Module):
def __init__(self, in_, out):
super().__init__()
self.conv = Conv3X3(in_, out)
self.activation = torch.nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.activation(x)
return x
class Down(nn.Module):
def __init__(self, nn):
super(Down,self).__init__()
self.nn = nn
self.maxpool_with_argmax = torch.nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)
def forward(self,inputs):
down = self.nn(inputs)
unpooled_shape = down.size()
outputs, indices = self.maxpool_with_argmax(down)
return outputs, down, indices, unpooled_shape
class Up(nn.Module):
def __init__(self, nn):
super().__init__()
self.nn = nn
self.unpool=torch.nn.MaxUnpool2d(2,2)
def forward(self,inputs,indices,output_shape):
outputs = self.unpool(inputs, indices=indices, output_size=output_shape)
outputs = self.nn(outputs)
return outputs
class Fuse(nn.Module):
def __init__(self, nn, scale):
super().__init__()
self.nn = nn
self.scale = scale
self.conv = Conv3X3(64,1)
def forward(self,down_inp,up_inp):
outputs = torch.cat([down_inp, up_inp], 1)
outputs = F.interpolate(outputs, scale_factor=self.scale, mode='bilinear')
outputs = self.nn(outputs)
return self.conv(outputs)
class DeepCrack(nn.Module):
def __init__(self, num_classes=1000):
super(DeepCrack, self).__init__()
self.down1 = Down(torch.nn.Sequential(
ConvRelu(3,64),
ConvRelu(64,64),
))
self.down2 = Down(torch.nn.Sequential(
ConvRelu(64,128),
ConvRelu(128,128),
))
self.down3 = Down(torch.nn.Sequential(
ConvRelu(128,256),
ConvRelu(256,256),
ConvRelu(256,256),
))
self.down4 = Down(torch.nn.Sequential(
ConvRelu(256, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.down5 = Down(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.up1 = Up(torch.nn.Sequential(
ConvRelu(64, 64),
ConvRelu(64, 64),
))
self.up2 = Up(torch.nn.Sequential(
ConvRelu(128, 128),
ConvRelu(128, 64),
))
self.up3 = Up(torch.nn.Sequential(
ConvRelu(256, 256),
ConvRelu(256, 256),
ConvRelu(256, 128),
))
self.up4 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 256),
))
self.up5 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.fuse5 = Fuse(ConvRelu(512 + 512, 64), scale=16)
self.fuse4 = Fuse(ConvRelu(512 + 256, 64), scale=8)
self.fuse3 = Fuse(ConvRelu(256 + 128, 64), scale=4)
self.fuse2 = Fuse(ConvRelu(128 + 64, 64), scale=2)
self.fuse1 = Fuse(ConvRelu(64 + 64, 64), scale=1)
self.final = Conv3X3(5,1)
def forward(self,inputs):
# encoder part
out, down1, indices_1, unpool_shape1 = self.down1(inputs)
out, down2, indices_2, unpool_shape2 = self.down2(out)
out, down3, indices_3, unpool_shape3 = self.down3(out)
out, down4, indices_4, unpool_shape4 = self.down4(out)
out, down5, indices_5, unpool_shape5 = self.down5(out)
# decoder part
up5 = self.up5(out, indices=indices_5, output_shape=unpool_shape5)
up4 = self.up4(up5, indices=indices_4, output_shape=unpool_shape4)
up3 = self.up3(up4, indices=indices_3, output_shape=unpool_shape3)
up2 = self.up2(up3, indices=indices_2, output_shape=unpool_shape2)
up1 = self.up1(up2, indices=indices_1, output_shape=unpool_shape1)
fuse5 = self.fuse5(down_inp=down5,up_inp=up5)
fuse4 = self.fuse4(down_inp=down4, up_inp=up4)
fuse3 = self.fuse3(down_inp=down3, up_inp=up3)
fuse2 = self.fuse2(down_inp=down2, up_inp=up2)
fuse1 = self.fuse1(down_inp=down1, up_inp=up1)
output = self.final(torch.cat([fuse5,fuse4,fuse3,fuse2,fuse1],1))
return output, fuse5, fuse4, fuse3, fuse2, fuse1
if __name__ == '__main__':
inp = torch.randn((1,3,512,512))
model = DeepCrack()
out = model(inp)
model = DeepCrack()
print(model)
# specify loss function
criterion = nn.CrossEntropyLoss()
# specify loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# number of epochs to train the model
n_epochs = 10
for epoch in range(1, n_epochs+1):
# monitor training loss
train_loss = 0.0
###################
# train the model #
###################
for data in train_loader:
# _ stands in for labels, here
# no need to flatten images
images, _ = data
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
outputs = model(images)
# calculate the loss
loss = criterion(outputs, images)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update running training loss
train_loss += loss.item()*images.size(0)
# print avg training statistics
train_loss = train_loss/len(train_loader)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch,
train_loss
)) ```
Traceback (most recent call last):
File "test_deepcrack.py", line 320, in <module>
loss = criterion(outputs, images)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-
packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-
packages/torch/nn/modules/loss.py", line 1150, in forward
> return F.cross_entropy(input, target, weight=self.weight,
> File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/functional.py", > line 2846, in cross_entropy
> return torch._C._nn.cross_entropy_loss(input, target, weight,
> _Reduction.get_enum(reduction), ignore_index, label_smoothing)
>TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple

predicting time series: my python code prints out a (very long) list rather than a (small) array

I am learning neural network modeling and its uses in time series prediction.
First, thank you for reading this post and for your help :)
On this page there are various NN models (LSTM, CNN etc.) for predicting "traffic volume":
https://michael-fuchs-python.netlify.app/2020/11/01/time-series-analysis-neural-networks-for-forecasting-univariate-variables/#train-validation-split
I got inspired and decided to use/shorten/adapt the code in there for a problem of my own: predicting the bitcoin price.
I have the bitcoin daily prices starting 1.1.2017
in total 2024 daily prices
I use the first 85% of the data for the training data, and the rest as the validation (except the last 10 observation, which I would like to use as test data to see how good my model is)
I would like to use a Feedforward model
My goal is merely having a code that runs.
I have managed so far to have most of my code run. However, I get a strange format for my test forecast results: It should be simply an array of 10 numbers (i.e. predicted prices corresponding to the 10 day at the end of my data). To my surprise what is printed out is a long list of numbers. I need help to find out what changes I need to make to make to the code to make it run.
Thank you for helping me :)
The code is pasted down there, followed by the error:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing #import MinMaxScaler
from sklearn import metrics #import mean_squared_error
import seaborn as sns
sns.set()
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Dense, Flatten
from keras.optimizers import Adam
from keras.models import Sequential
from keras.callbacks import EarlyStopping
tf.__version__
df = pd.read_csv('/content/BTC-USD.csv')
def mean_absolute_percentage_error_func(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def timeseries_evaluation_metrics_func(y_true, y_pred):
print('Evaluation metric results: ')
print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
print(f'MAPE is : {mean_absolute_percentage_error_func(y_true, y_pred)}')
print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')
def univariate_data_prep_func(dataset, start, end, window, horizon):
X = []
y = []
start = start + window
if end is None:
end = len(dataset) - horizon
for i in range(start, end):
indicesx = range(i-window, i)
X.append(np.reshape(dataset[indicesx], (window, 1)))
indicesy = range(i,i+horizon)
y.append(dataset[indicesy])
return np.array(X), np.array(y)
# Generating the test set
test_data = df['close'].tail(10)
df = df.drop(df['close'].tail(10).index)
df.shape
# Defining the target variable
uni_data = df['close']
uni_data.index = df['formatted_date']
uni_data.head()
#scaling
from sklearn import preprocessing
uni_data = uni_data.values
scaler_x = preprocessing.MinMaxScaler()
x_scaled = scaler_x.fit_transform(uni_data.reshape(-1, 1))
# Single Step Style (sss) modeling
univar_hist_window_sss = 50
horizon_sss = 1
# 2014 observations in total
# 2014*0.85=1710 should be part of the training (304 validation)
train_split_sss = 1710
x_train_uni_sss, y_train_uni_sss = univariate_data_prep_func(x_scaled, 0, train_split_sss,
univar_hist_window_sss, horizon_sss)
x_val_uni_sss, y_val_uni_sss = univariate_data_prep_func(x_scaled, train_split_sss, None,
univar_hist_window_sss, horizon_sss)
print ('Length of first Single Window:')
print (len(x_train_uni_sss[0]))
print()
print ('Target horizon:')
print (y_train_uni_sss[0])
BATCH_SIZE_sss = 32
BUFFER_SIZE_sss = 150
train_univariate_sss = tf.data.Dataset.from_tensor_slices((x_train_uni_sss, y_train_uni_sss))
train_univariate_sss = train_univariate_sss.cache().shuffle(BUFFER_SIZE_sss).batch(BATCH_SIZE_sss).repeat()
validation_univariate_sss = tf.data.Dataset.from_tensor_slices((x_val_uni_sss, y_val_uni_sss))
validation_univariate_sss = validation_univariate_sss.batch(BATCH_SIZE_sss).repeat()
n_steps_per_epoch = 55
n_validation_steps = 10
n_epochs = 100
#FFNN architecture
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(8, input_shape=x_train_uni_sss.shape[-2:]),
tf.keras.layers.Dense(units=horizon_sss)])
model.compile(loss='mse',
optimizer='adam')
#fit the model
model_path = '/content/FFNN_model_sss.h5'
keras_callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=0, patience=10,
verbose=1, mode='min'),
tf.keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss',
save_best_only=True,
mode='min', verbose=0)]
history = model.fit(train_univariate_sss, epochs=n_epochs, steps_per_epoch=n_steps_per_epoch,
validation_data=validation_univariate_sss, validation_steps=n_validation_steps, verbose =1,
callbacks = keras_callbacks)
#validation
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# Testing our model
trained_ffnn_model_sss = tf.keras.models.load_model(model_path)
df_temp = df['close']
test_horizon = df_temp.tail(univar_hist_window_sss)
test_history = test_horizon.values
result = []
# Define Forecast length here
window_len = len(test_data)
test_scaled = scaler_x.fit_transform(test_history.reshape(-1, 1))
for i in range(1, window_len+1):
test_scaled = test_scaled.reshape((1, test_scaled.shape[0], 1))
# Inserting the model
predicted_results = trained_ffnn_model_sss.predict(test_scaled)
print(f'predicted : {predicted_results}')
result.append(predicted_results[0])
test_scaled = np.append(test_scaled[:,1:],[[predicted_results]])
result_inv_trans = scaler_x.inverse_transform(result)
result_inv_trans
I believe the problem might have to do with the shapes of data. How exactly I do not yet know.
Data:
click here
Traceback:
click here

Accuracy from sess.run(() is returning the value in bytes. How can I change to value?

I am new to CNN and tried to train the CNN model. However when I try to print the accuracies returned from cnn it gives me results in bytes format like b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'. However when I try to print the values from the loss_train obtained from the same sess.run I get value of 1419.06. Why is this happening.
########################################################################################################################
#IMPORT PACKAGES
import math
import shutil
import pywt
import sys
import random
import numpy as np
import h5py
import pip
import os
from os import system
import tensorflow as tf
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import skimage.io as io
import matplotlib.image as mpimg
import time
np.random.seed(1)
slim = tf.contrib.slim
########################################################################################################################
########################################################################################################################
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 128, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = 'E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\train\\None_train_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#PLACEHOLDERS
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_relu1, num_input_channels=32, filter_size=5, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTEN LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_relu2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_fc2, labels=y_one_hot)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer_train = tf.summary.FileWriter("Training_FileWriter/")
writer_val = tf.summary.FileWriter("Validation_FileWriter/")
#summary
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
# Merge all summaries together
merged_summary = tf.summary.merge_all()
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_train.initializer)
# Add the model graph to TensorBoard
writer_train.add_graph(sess.graph)
writer_val.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
start_time = time.time()
train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
_, loss_train, acc_train, acc_summ = sess.run([optimizer, cost, accuracy, merged_summary], feed_dict = {x: img_train, y: lbl_train})
print(loss_train)
print(acc_train)
train_accuracy+=acc_train
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
writer_train.add_summary(acc_summ,epoch+1)
#######################################################################################################################
The error is obtained as
Training
1427.1069
b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'
Traceback (most recent call last):
File "train_trial.py", line 302, in <module>
train_accuracy+=acc_train
TypeError: unsupported operand type(s) for +=: 'int' and 'bytes'
You are overwriting your loss and accuracy operations here:
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
Then when you run accuracy you get the protobuf bytes of the summary, instead of just running the op. You should rename these variables to prevent overwriting/name clashes.

One-hot vector prediction always returns the same value

My deep neural network returns the same output for every input. I tried (with no luck) different variations of:
loss
optimizer
network topology / layers types
number of epochs (1-100)
I have 3 outputs (one-hot) and for every input output they are like (it changes after every training):
4.701869785785675049e-01 4.793547391891479492e-01 2.381391078233718872e-01
This problem happens probably because of highly random nature of my training data (stock prediction).
The data set is also heavily shifted towards one of the answers (that's why I used sample_weight - calculated proportionally).
I think I can rule out overfitting (it happens even for 1 epoch and I have dropout layers).
One of the examples of my network:
xs_conv = xs.reshape(xs.shape[0], xs.shape[1], 1)
model_conv = Sequential()
model_conv.add(Conv1D(128, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Conv1D(64, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Flatten())
model_conv.add(Dense(128, activation='relu'))
model_conv.add(Dropout(0.4))
model_conv.add(Dense(3, activation='sigmoid'))
model_conv.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model_conv.fit(xs_conv, ys, epochs=10, batch_size=16, sample_weight=sample_weight, validation_split=0.3, shuffle=True)
I would understand if the outputs were random, but what happens seems very peculiar. Any ideas?
Data: computed.csv
Whole code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Input, Dense, Conv1D, Dropout, MaxPooling1D, Flatten
from keras.models import Model, Sequential
from keras import backend as K
import random
DATA_DIR = '../../Data/'
INPUT_DATA_FILE = DATA_DIR + 'computed.csv'
def get_y(row):
profit = 0.010
hot_one = [0,0,0]
hot_one[0] = int(row.close_future_5 >= profit)
hot_one[1] = int(row.close_future_5 <= -profit)
hot_one[2] = int(row.close_future_5 < profit and row.close_future_10 > -profit)
return hot_one
def rolling_window(window, arr):
return [np.array(arr[i:i+window]).transpose().flatten().tolist() for i in range(0, len(arr))][0:-window+1]
def prepare_data(data, widnow, test_split):
xs1 = data.iloc[:,1:26].as_matrix()
ys1 = [get_y(row) for row in data.to_records()]
xs = np.array(rolling_window(window, xs1)).tolist()
ys = ys1[0:-window+1]
zipped = list(zip(xs, ys))
random.shuffle(zipped)
train_size = int((1.0 - test_split) * len(data))
xs, ys = zip(*zipped[0:train_size])
xs_test, ys_test = zip(*zipped[train_size:])
return np.array(xs), np.array(ys), np.array(xs_test), np.array(ys_test)
def get_sample_weight(y):
if(y[0]): return ups_w
elif(y[1]): return downs_w
else: return flats_w
data = pd.read_csv(INPUT_DATA_FILE)
window = 30
test_split = .9
xs, ys, xs_test, ys_test = prepare_data(data, window, test_split)
ups_cnt = sum(y[0] for y in ys)
downs_cnt = sum(y[1] for y in ys)
flats_cnt = sum(y[0] == False and y[1] == False for y in ys)
total_cnt = ups_cnt + downs_cnt + flats_cnt
ups_w = total_cnt/ups_cnt
downs_w = total_cnt/downs_cnt
flats_w = total_cnt/flats_cnt
sample_weight = np.array([get_sample_weight(y) for y in ys])
_, input_columns = xs.shape
xs_conv = xs.reshape(xs.shape[0], xs.shape[1], 1)
model_conv = Sequential()
model_conv.add(Conv1D(128, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Conv1D(64, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Flatten())
model_conv.add(Dense(128, activation='relu'))
model_conv.add(Dropout(0.4))
model_conv.add(Dense(3, activation='sigmoid'))
model_conv.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model_conv.fit(xs_conv, ys, epochs=1, batch_size=16, sample_weight=sample_weight, validation_split=0.3, shuffle=True)
xs_test_conv = xs_test.reshape(xs_test.shape[0], xs_test.shape[1], 1)
res = model_conv.predict(xs_test_conv)
plotdata = pd.concat([pd.DataFrame(res, columns=['res_up','res_down','res_flat']), pd.DataFrame(ys_test, columns=['ys_up','ys_down','y_flat'])], axis = 1)
plotdata[['res_up', 'ys_up']][3000:3500].plot(figsize=(20,4))
plotdata[['res_down', 'ys_down']][3000:3500].plot(figsize=(20,4))
I have run your model with the attached data and so far can say that the biggest problem is lack of data cleaning.
For instance, there's a inf value in .csv at line 623. After I've filtered them all out with
xs1 = xs1[np.isfinite(xs1).all(axis=1)]
... I collected some statistics over xs, namely min, max and mean. They turned out pretty remarkable:
-43.0049723138
32832.3333333 # !!!
0.213126234391
On average, the values are close to 0, but some are 6 orders of magnitude higher. These particular rows definitely hurt the neural network, so you should either filter them as well or come up with a clever way to normalize the features.
But even with them, the model ended up with 71-79% validation accuracy. The result distribution is a bit skewed towards the 3rd class, but in general pretty diverse to name it peculiar: 19% for class 1, 7% for class 2, 73% for class 3. Example test output:
[[ 1.93120316e-02 4.47684433e-04 9.97518778e-01]
[ 1.40607255e-02 2.45630667e-02 9.74113524e-01]
[ 3.07740629e-01 4.80920941e-01 2.28664145e-01]
...,
[ 5.72797097e-02 9.45571139e-02 8.07634115e-01]
[ 1.05512664e-01 8.99530351e-02 6.70437515e-01]
[ 5.24505274e-03 1.46622911e-01 9.42657173e-01]]

Using the deep neural network package "Chainer" to train a simple dataset

I'm trying to use the chainer package for a large project I'm working on. I have read through the tutorial on their website which gives an example of applying it to the MNIST dataset, but it doesn't seem to scale easily to other examples, and there's simply not enough documentation otherwise.
Their example code is as follows:
class MLP(Chain):
def __init__(self, n_units, n_out):
super(MLP, self).__init__(
# the size of the inputs to each layer will be inferred
l1=L.Linear(None, n_units), # n_in -> n_units
l2=L.Linear(None, n_units), # n_units -> n_units
l3=L.Linear(None, n_out), # n_units -> n_out
)
def __call__(self, x):
h1 = F.relu(self.l1(x))
h2 = F.relu(self.l2(h1))
y = self.l3(h2)
return y
train, test = datasets.get_mnist()
train_iter = iterators.SerialIterator(train, batch_size=5, shuffle=True)
test_iter = iterators.SerialIterator(test, batch_size=2, repeat=False, shuffle=False)
model = L.Classifier(MLP(100, 10)) # the input size, 784, is inferred
optimizer = optimizers.SGD()
optimizer.setup(model)
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (4, 'epoch'), out='result')
trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())
trainer.run()
Could someone point me in the direction of how to simple fit a straight line to a few data points in 2D? If I can understand a simple fit such as this I should be able to scale appropriately.
Thanks for the help!
I pasted simple regression modeling here.
You can use original train data and test data as tuple.
train = (data, label)
Here, data.shape = (Number of data, Number of data dimesion)
And, label.shape = (Number of data,)
Both of their data type should be numpy.float32.
import chainer
from chainer.functions import *
from chainer.links import *
from chainer.optimizers import *
from chainer import training
from chainer.training import extensions
from chainer import reporter
from chainer import datasets
import numpy
class MyNet(chainer.Chain):
def __init__(self):
super(MyNet, self).__init__(
l0=Linear(None, 30, nobias=True),
l1=Linear(None, 1, nobias=True),
)
def __call__(self, x, t):
l0 = self.l0(x)
f0 = relu(l0)
l1 = self.l1(f0)
f1 = flatten(l1)
self.loss = mean_squared_error(f1, t)
reporter.report({'loss': self.loss}, self)
return self.loss
def get_optimizer():
return Adam()
def training_main():
model = MyNet()
optimizer = get_optimizer()
optimizer.setup(model)
train, test = datasets.get_mnist(label_dtype=numpy.float32)
train_iter = chainer.iterators.SerialIterator(train, 50)
test_iter = chainer.iterators.SerialIterator(test, 50,
repeat=False,
shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (10, 'epoch'))
trainer.extend(extensions.ProgressBar())
trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(
extensions.PlotReport(['main/loss', 'validation/main/loss'],
'epoch'))
trainer.run()
if __name__ == '__main__':
training_main()