the model learned, but cannot predict? - linear-regression

I follow the tutorial to write some linear regression code about boston price, it worked very well and the loss became smaller, when I wanted to paint the graph in matplotlib, I found the graph not showed as what in my mind.
I searched,but could not solve my question.
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
if __name__ == '__main__':
boston = load_boston()
col_names = ['feature_{}'.format(i) for i in range(boston['data'].shape[1])]
df_full = pd.DataFrame(boston['data'], columns=col_names)
scalers_dict = {}
for col in col_names:
scaler = StandardScaler()
df_full[col] = scaler.fit_transform(df_full[col].values.reshape(-1, 1))
scalers_dict[col] = scaler
x_train, x_test, y_train, y_test = train_test_split(df_full.values, boston['target'], test_size=0.2, random_state=2)
model = torch.nn.Sequential(torch.nn.Linear(x_train.shape[1], 1), torch.nn.ReLU())
criterion = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
n_epochs = 2000
train_loss = []
test_loss = []
x_train = Variable(torch.from_numpy(x_train).float(), requires_grad=True)
y_train = Variable(torch.from_numpy(y_train).float())
for epoch in range(n_epochs):
y_hat = model(x_train)
loss = criterion(y_hat, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss = loss.data ** (1/2)
train_loss.append(epoch_loss)
if (epoch + 1) % 250 == 0:
print("{}:loss = {}".format(epoch + 1, epoch_loss))
order = y_train.argsort()
y_train = y_train[order]
x_train = x_train[order, :]
model.eval()
predicted = model(x_train).detach().numpy()
actual = y_train.numpy()
print('predicted:", predicted[:5].flatten(), actual[:5])
plt.plot(predicted.flatten(), 'r-', label='predicted')
plt.plot(actual, 'g-', label='actual')
plt.show()
why the predict were the same result like [22.4413, 22.4413, ...],
in the picture, it's a horizontal line.
I'm a very beginner to deeplearning, thank you very much for your help!

Related

Pytorch-GPU what am I forgetting to move over to the GPU?

I'm getting this error. What am I leaving out, I feel that I have tired everything.
Also is there not a easy way to just use the GPU and not the CPU I feel like I have tried all those options as well. As in not using .cuda() everywhere
This is one of my first neutral networks so please go easy on me. (most of it is from Class)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking arugment for argument target in method wrapper_nll_loss_forward)
import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.cuda.set_device(0)
def load_data():
num_workers = 0
load_data.batch_size = 20
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
load_data.train_loader = torch.utils.data.DataLoader(train_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
def visualize():
dataiter = iter(load_data.train_loader)
visualize.images, labels = dataiter.next()
visualize.images = visualize.images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(load_data.batch_size):
ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
ax.set_title(str(labels[idx].item()))
#plt.show()
def fig_values():
img = np.squeeze(visualize.images[1])
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
#plt.show()
load_data()
visualize()
fig_values()
class NeuralNet(nn.Module):
def __init__(self, gpu = True):
super(NeuralNet, self ).__init__()
self.fc1 = nn.Linear(28 * 28, 16).cuda()
self.fc2 = nn.Linear(16, 10).cuda()
def forward(self, x):
x = x.view(-1, 28 * 28).cuda()
x = F.relu(self.fc1(x)).cuda()
x = self.fc2(x).cuda()
return x
def training():
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 100
model.train().cuda()
for epoch in range(n_epochs):
train_loss = 0.0
for data, target in load_data.train_loader:
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(load_data.train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()
Your data, and target are not in GPU (considering the removal of repeated cuda calls).
You are also doing of unnecessary cuda() which is not needed. Simply, see where your data and model are. Take the model to GPU, take the data and label to GPU, finally feed data to the model.
Don't use, cuda(), use to.device(), it's safer in the long run and easily customizable in multi-gpu setup.
import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.cuda.set_device(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_data():
num_workers = 0
load_data.batch_size = 20
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
load_data.train_loader = torch.utils.data.DataLoader(train_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
def visualize():
dataiter = iter(load_data.train_loader)
visualize.images, labels = dataiter.next()
visualize.images = visualize.images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(load_data.batch_size):
ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
ax.set_title(str(labels[idx].item()))
#plt.show()
def fig_values():
img = np.squeeze(visualize.images[1])
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
#plt.show()
load_data()
visualize()
fig_values()
class NeuralNet(nn.Module):
def __init__(self, gpu = True):
super(NeuralNet, self ).__init__()
self.fc1 = nn.Linear(28 * 28, 16)
self.fc2 = nn.Linear(16, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
def training():
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 100
model.train()
for epoch in range(n_epochs):
train_loss = 0.0
for data, target in load_data.train_loader:
optimizer.zero_grad()
###################################
data = data.to(device)
target = target.to(device)
###################################
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(load_data.train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()
Clearly your target variable is not on GPU.
Also, its a bad idea to call .cuda() inside forward()
def forward(self, x):
x = x.view(-1, 28 * 28).cuda() # BAD
x = F.relu(self.fc1(x)).cuda() # BAD
x = self.fc2(x).cuda() #BAD
return x
Rather, remove all .cuda() inside forward and do this is main loop
for data, target in load_data.train_loader:
data = data.cuda()
target = target.cuda()

Coefficient of determination is close to -1

I am down loading stock data from you finance.
I am trying to generate stock trading signal using ANN.
I am getting Coefficient of determination is close to -1 and prediction is seem to be mirror image.
Can someone suggest.
I am getting similar Coefficient of determination with others ML.
It seems to be strange.
import numpy as np
import yfinance as yf
import talib as ta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
import random
random.seed(42)
# YYYY-MM-DD
start_date = '2010-01-01'
end_date = '2020-08-03'
#df = yf.download(tickers="^NSEI", start=start_date, end=end_date, interval="1d", progress=False)
df = yf.download("SBIN.NS", start=start_date, end=end_date, interval="1d", progress=False)
def create_trading_condition(df):
df['RSI'] = ta.RSI(df['Adj Close'].values, timeperiod = 9)
df['MACD'] = ta.MACD(df['Adj Close'].values, fastperiod=12, slowperiod=26, signalperiod=9)[0]
df['Williams %R'] = ta.WILLR(df['High'].values, df['Low'].values, df['Adj Close'].values, 7)
df['C-O'] = df['Adj Close'] - df['Open']
df['H-L'] = df['High'] - df['Low']
df['STDEV']= df['Adj Close'].rolling(5).std()
df['TARGET'] = np.where(df['Adj Close'].shift(-1) > df['Adj Close'], 1, 0)
df = df.dropna()
X = df[['RSI', 'MACD', 'Williams %R', 'C-O', 'H-L', 'STDEV']]
Y = df['TARGET']
#print(df)
return (df, X, Y)
df_new, X, Y = create_trading_condition(df)
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, shuffle=False, train_size=0.8)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
clf = Sequential()
clf.add(Dense(units = 128, kernel_initializer = 'uniform', activation= 'relu', input_dim = X.shape[1]))
clf.add(Dense(units = 128, kernel_initializer = 'uniform', activation= 'relu'))
clf.add(Dense(units = 1, kernel_initializer = 'uniform', activation= 'sigmoid'))
clf.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics= ['accuracy'])
clf.fit(X_train, Y_train, batch_size = 10, epochs = 100)
Y_pred = clf.predict(X_test)
#print(Y_pred)
Y_pred = (Y_pred > 0.5)
df_new['Y_pred'] = np.NaN
df_new.iloc[(len(df_new) - len(Y_pred)):,-1:] = Y_pred
trade_df = df_new.dropna()
trade_df['Tomorrows Returns'] = 0.
trade_df['Tomorrows Returns'] = np.log(trade_df['Adj Close']/trade_df['Adj Close'].shift(1))
trade_df['Tomorrows Returns'] = trade_df['Tomorrows Returns'].shift(-1)
# Y_pred = true for long position otherwise short position
trade_df['Strategy Returns'] = 0.
trade_df['Strategy Returns'] = np.where(trade_df['Y_pred'] == True, trade_df['Tomorrows Returns'], -trade_df['Tomorrows Returns'])
trade_df['Cumulative Market Returns'] = np.cumsum(trade_df['Tomorrows Returns'])
trade_df['Cumulative Strategy Returns'] = np.cumsum(trade_df['Strategy Returns'])
#accuracy_test = accuracy_score(Y_test, clf.predict(X_test))
# mean squared error
mse_test = mean_squared_error(Y_test, clf.predict(X_test))
# Coefficient of determination
#r2_test = r2_score(Y_test, clf.predict(X_test))
r2_test = r2_score(Y_test, Y_pred)
#report_test = classification_report(Y_test, clf.predict(X_test))
#print("Test accuracy score: %.2f" % accuracy_test)
print("Test mean squared error: %.2f" % mse_test)
print('Test R-square: %.2f\n' % r2_test)
#print(report_test)
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
plt.plot(trade_df['Cumulative Market Returns'], color='r', label='Market Returns')
plt.plot(trade_df['Cumulative Strategy Returns'], color='g', label='Strategy Returns')
plt.legend()
plt.show()

How to calculate gini index in pyspark classification model using spark ML?

I am trying to calculate the gini index for a classification model done using GBTClassifier from the pyspark ml models. I cant seem to find a metrics which gives the roc_auc_score like the one in python sklearn.
Below is the code that I have used so far on databricks. I am currently using a dataset from the databricks
%fs ls databricks-datasets/adult/adult.data
from pyspark.sql.functions import *
from pyspark.ml.classification import RandomForestClassifier, GBTClassifier
from pyspark.ml.feature import StringIndexer, OneHotEncoderEstimator, VectorAssembler, VectorSlicer
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import BinaryClassificationEvaluator,MulticlassClassificationEvaluator
from pyspark.mllib.evaluation import BinaryClassificationMetrics
from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import ParamGridBuilder, TrainValidationSplit
dataset = spark.table("adult")
# spliting the train and test data frames
splits = dataset.randomSplit([0.7, 0.3])
train_df = splits[0]
test_df = splits[1]
def churn_predictions(train_df,
target_col,
# algorithm,
# model_parameters = conf['model_parameters']
):
"""
#Function attributes
dataframe - training df
target - target varibale in the model
Algorithm - Algorithm used
model_parameters - model parameters used to fine tune the model
"""
# one hot encoding and assembling
encoding_var = [i[0] for i in train_df.dtypes if (i[1]=='string') & (i[0]!=target_col)]
num_var = [i[0] for i in train_df.dtypes if ((i[1]=='int') | (i[1]=='double')) & (i[0]!=target_col)]
string_indexes = [StringIndexer(inputCol = c, outputCol = 'IDX_' + c, handleInvalid = 'keep') for c in encoding_var]
onehot_indexes = [OneHotEncoderEstimator(inputCols = ['IDX_' + c], outputCols = ['OHE_' + c]) for c in encoding_var]
label_indexes = StringIndexer(inputCol = target_col, outputCol = 'label', handleInvalid = 'keep')
assembler = VectorAssembler(inputCols = num_var + ['OHE_' + c for c in encoding_var], outputCol = "features")
gbt = GBTClassifier(featuresCol = 'features', labelCol = 'label',
maxDepth = 5,
maxBins = 45,
maxIter = 20)
pipe = Pipeline(stages = string_indexes + onehot_indexes + [assembler, label_indexes, gbt])
model = pipe.fit(train_df)
return model
gbt_model = churn_predictions(train_df = train_df,
target_col = 'income')
#### prediction in test sample ####
gbt_predictions = gbt_model.transform(test_df)
# display(gbt_predictions)
gbt_evaluator = MulticlassClassificationEvaluator(
labelCol="label", predictionCol="prediction", metricName="accuracy")
accuracy = gbt_evaluator.evaluate(gbt_predictions) * 100
print("Accuracy on test data = %g" % accuracy)
gini_train = 2 * metrics.roc_auc_score(Y, pred_prob) - 1
as you can see in the last line of code there is clearly no metric called roc_auc_score to calculate the gini.
Really appreciate any help on this.
Normally Gini is used to evaluate a binary classification model.
You can calculate it in pyspark in the next way:
from pyspark.ml.evaluation import BinaryClassificationEvaluator
evaluator = BinaryClassificationEvaluator()
auc = evaluator.evaluate(gbt_predictions, {evaluator.metricName: "areaUnderROC"})
gini = 2 * auc - 1.0

How to plot ROC curve in pyspark for GBTClassifier?

I am trying to plot the ROC curve for a gradient boosting model. I have come across this post but it doesn't seem to work for the GBTclassifier model. pyspark extract ROC curve?
I am using a dataset in databricks and below is my code. It gives the following error
AttributeError: 'PipelineModel' object has no attribute 'summary'
%fs ls databricks-datasets/adult/adult.data
from pyspark.sql.functions import *
from pyspark.ml.classification import RandomForestClassifier, GBTClassifier
from pyspark.ml.feature import StringIndexer, OneHotEncoderEstimator, VectorAssembler, VectorSlicer
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import BinaryClassificationEvaluator,MulticlassClassificationEvaluator
from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import ParamGridBuilder, TrainValidationSplit
import pandas as pd
dataset = spark.table("adult")
# spliting the train and test data frames
splits = dataset.randomSplit([0.7, 0.3])
train_df = splits[0]
test_df = splits[1]
def predictions(train_df,
target_col,
):
"""
#Function attributes
dataframe - training df
target - target varibale in the model
"""
# one hot encoding and assembling
encoding_var = [i[0] for i in train_df.dtypes if (i[1]=='string') & (i[0]!=target_col)]
num_var = [i[0] for i in train_df.dtypes if ((i[1]=='int') | (i[1]=='double')) & (i[0]!=target_col)]
string_indexes = [StringIndexer(inputCol = c, outputCol = 'IDX_' + c, handleInvalid = 'keep') for c in encoding_var]
onehot_indexes = [OneHotEncoderEstimator(inputCols = ['IDX_' + c], outputCols = ['OHE_' + c]) for c in encoding_var]
label_indexes = StringIndexer(inputCol = target_col, outputCol = 'label', handleInvalid = 'keep')
assembler = VectorAssembler(inputCols = num_var + ['OHE_' + c for c in encoding_var], outputCol = "features")
gbt = GBTClassifier(featuresCol = 'features', labelCol = 'label',
maxDepth = 5,
maxBins = 45,
maxIter = 20)
pipe = Pipeline(stages = string_indexes + onehot_indexes + [assembler, label_indexes, gbt])
model = pipe.fit(train_df)
return model
gbt_model = predictions(train_df = train_df,
target_col = 'income')
import matplotlib.pyplot as plt
plt.figure(figsize=(5,5))
plt.plot([0, 1], [0, 1], 'r--')
plt.plot(gbt_model.summary.roc.select('FPR').collect(),
gbt_model.summary.roc.select('TPR').collect())
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.show()
Based on your error, have a look at PipelineModel in this doc:
https://spark.apache.org/docs/2.4.3/api/python/pyspark.ml.html#pyspark.ml.PipelineModel
There is not attribute summary in on an object of this class. Instead, I believe you need to access the stages of the PipelineModel individually, such as gbt_model.stages[-1] (which should give access to your last stage - the GBTClassifier. Then try and play around with the attributes there, such as:
gbt_model.stages[-1].summary
And if your GBTClassifier has a summary, you'll find it there. Hope this helps.

TensorFlow restoring from NN does not work

I am struggling with restoring values from NN in tensorflow. I tried to follow the examples on net, and here is my code:
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
np.random.seed(1000) # for repro
function_to_learn = lambda x: np.sin(x) + 0.1*np.random.randn(*x.shape)
NUM_HIDDEN_NODES = 2
NUM_EXAMPLES = 1000
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 100
NUM_EPOCHS = 500
all_x = np.float32(np.random.uniform(-2*math.pi, 2*math.pi, (1, NUM_EXAMPLES))).T
np.random.shuffle(all_x)
train_size = int(NUM_EXAMPLES*TRAIN_SPLIT)
trainx = all_x[:train_size]
validx = all_x[train_size:]
trainy = function_to_learn(trainx)
validy = function_to_learn(validx)
plt.figure()
plt.scatter(trainx, trainy, c='green', label='train')
plt.scatter(validx, validy, c='red', label='validation')
plt.legend()
X = tf.placeholder(tf.float32, [None, 1], name="X")
Y = tf.placeholder(tf.float32, [None, 1], name="Y")
w_h = tf.Variable(tf.zeros([1, NUM_HIDDEN_NODES],name="w_h"))
b_h = tf.Variable(tf.zeros([1, NUM_HIDDEN_NODES],name="b_h"))
w_o = tf.Variable(tf.zeros([NUM_HIDDEN_NODES,1],name="w_o"))
b_o = tf.Variable(tf.zeros([1, 1],name="b_o"))
def init_weights(shape, init_method='xavier', xavier_params = (None, None)):
if init_method == 'zeros':
return tf.Variable(tf.zeros(shape, dtype=tf.float32))
elif init_method == 'uniform':
return tf.Variable(tf.random_normal(shape, stddev=0.01, dtype=tf.float32))
def model(X, num_hidden = NUM_HIDDEN_NODES):
w_h = init_weights([1, num_hidden], 'uniform' )
b_h = init_weights([1, num_hidden], 'zeros')
h = tf.nn.sigmoid(tf.matmul(X, w_h) + b_h)
w_o = init_weights([num_hidden, 1], 'xavier', xavier_params=(num_hidden, 1))
b_o = init_weights([1, 1], 'zeros')
return tf.matmul(h, w_o) + b_o
yhat = model(X, NUM_HIDDEN_NODES)
train_op = tf.train.AdamOptimizer().minimize(tf.nn.l2_loss(yhat - Y))
plt.figure()
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for v in tf.all_variables():
print v.name
saver = tf.train.Saver()
errors = []
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for i in range(NUM_EPOCHS):
for start, end in zip(range(0, len(trainx), MINI_BATCH_SIZE), range(MINI_BATCH_SIZE, len(trainx), MINI_BATCH_SIZE)):
sess.run(train_op, feed_dict={X: trainx[start:end], Y: trainy[start:end]})
mse = sess.run(tf.nn.l2_loss(yhat - validy), feed_dict={X:validx})
errors.append(mse)
if i%100 == 0:
print "epoch %d, validation MSE %g" % (i, mse)
print sess.run(w_h)
saver.save(sess,"/Python/tensorflow/res/save_net.ckpt", global_step = i)
print " ******* AFTR *******"
for v in tf.all_variables():
print v.name
plt.plot(errors)
plt.xlabel('#epochs')
plt.ylabel('MSE')
******* to get the restore values, I tried:**
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
NUM_HIDDEN_NODES = 2
#SECOND PART TO GET THE STORED VALUES
w_h = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(1, NUM_HIDDEN_NODES), dtype=tf.float32, name='w_h')
b_h = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(1, NUM_HIDDEN_NODES), dtype=tf.float32, name='b_h')
w_o = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(NUM_HIDDEN_NODES, 1), dtype=tf.float32, name='w_o')
b_o = tf.Variable(np.arange(1).reshape(1, 1), dtype=tf.float32, name='b_o')
saver = tf.train.Saver()
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state("/Python/tensorflow/res/")
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, "/Python/tensorflow/res/save_net.ckpt-400")
print "Model loaded"
else:
print "No checkpoint file found"
print("weights:", sess.run(w_h))
print("biases:", sess.run(b_h))
Your help is greatly appreciated and I am almost giving up on this.
Thanks a lot again
It seems the checkpoint file you want to restore your variables from is different from the current variable/shape of existing code.
Save: (if substitute it with constants from definitions above)
w_h = tf.Variable(tf.zeros([1, 5],name="w_h"))
b_h = tf.Variable(tf.zeros([1, 5],name="b_h"))
w_o = tf.Variable(tf.zeros([5,1],name="w_o"))
b_o = tf.Variable(tf.zeros([1, 1],name="b_o"))
Restore:
w_h = tf.Variable(np.arange(10).reshape(1, 10), dtype=tf.float32, name='w_h')
b_h = tf.Variable(np.arange(10).reshape(1, 10), dtype=tf.float32, name='b_h')
w_o = tf.Variable(np.arange(10).reshape(10, 1), dtype=tf.float32, name='w_o')
b_o = tf.Variable(np.arange(1).reshape(1, 1), dtype=tf.float32, name='b_o')
To prevent these types of problems, try to use functions for training and inference so all your code will same variables and constants.
You are creating two sets of weights, once globally and second time when you call init_weights. The second set of variables is the one that's getting optimized, but both sets are saved.
In your eval code, you are creating this set of variables once, so your restore only restores the first set, which has not been modified after initialization.
The solution is to either factor out model creation code so that exactly same graph is created during training and during eval, or to use meta_graph which will recreate graph structure during restore.