Modify training function of 3 nets with shared classifier - neural-network

I have 3 VGG: VGGA, VGGB and VGG*, trained with the following training function:
def train(nets, loaders, optimizer, criterion, epochs=20, dev=None, save_param=False, model_name="valerio"):
# try:
nets = [n.to(dev) for n in nets]
model_a = module_unwrap(nets[0], True)
model_b = module_unwrap(nets[1], True)
model_c = module_unwrap(nets[2], True)
reg_loss = nn.MSELoss()
criterion.to(dev)
reg_loss.to(dev)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": [0,0,0], "val": [0,0,0], "test": [0,0,0]}
progbar = None
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
for n in nets:
n.train()
widgets = [
' [', pb.Timer(), '] ',
pb.Bar(),
' [', pb.ETA(), '] ', pb.Variable('ta','[Train Acc: {formatted_value}]')
]
progbar = pb.ProgressBar(max_value=len(loaders[split][0]),widgets=widgets,redirect_stdout=True)
else:
for n in nets:
n.eval()
# Process each batch
for j,((input_a, labels_a),(input_b, labels_b)) in enumerate(zip(loaders[split][0],loaders[split][1])):
input_a = input_a.to(dev)
input_b = input_b.to(dev)
labels_a = labels_a.long().to(dev)
labels_b = labels_b.long().to(dev)
#print(labels_a.shape)
#labels_a = labels_a.squeeze()
#labels_b = labels_b.squeeze()
#labels_a = labels_a.unsqueeze(1)
#labels_b = labels_b.unsqueeze(1)
#print(labels_a.shape)
#labels_a = labels_a.argmax(-1)
#labels_b = labels_b.argmax(-1)
inputs = torch.cat([input_a,input_b],axis=0)
labels = torch.cat([labels_a, labels_b])
#labels = labels.squeeze()
#print(labels.shape)
#labels = labels.argmax(-1)
# Reset gradients
optimizer.zero_grad()
# Compute output
features_a = nets[0](input_a)
features_b = nets[1](input_b)
features_c = nets[2](inputs)
pred_a = torch.squeeze(nets[3](features_a))
pred_b = torch.squeeze(nets[3](features_b))
pred_c = torch.squeeze(nets[3](features_c))
loss = criterion(pred_a, labels_a) + criterion(pred_b, labels_b) + criterion(pred_c, labels)
for n in model_a:
layer_a = model_a[n]
layer_b = model_b[n]
layer_c = model_c[n]
if (isinstance(layer_a,nn.Conv2d)):
loss += lambda_reg * reg_loss(combo_fn(layer_a.weight,layer_b.weight),layer_c.weight)
if (layer_a.bias is not None):
loss += lambda_reg * reg_loss(combo_fn(layer_a.bias, layer_b.bias), layer_c.bias)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
#https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
#pred_labels_a = (pred_a >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_b = (pred_b >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_c = (pred_c >= 0.0).long() # Binarize predictions to 0 and 1
#print(pred_a.shape)
_,pred_label_a = torch.max(pred_a, dim = 1)
pred_labels_a = (pred_label_a == labels_a).float()
_,pred_label_b = torch.max(pred_b, dim = 1)
pred_labels_b = (pred_label_b == labels_b).float()
_,pred_label_c = torch.max(pred_c, dim = 1)
pred_labels_c = (pred_label_c == labels).float()
batch_accuracy_a = pred_labels_a.sum().item() / len(labels_a)
batch_accuracy_b = pred_labels_b.sum().item() / len(labels_b)
batch_accuracy_c = pred_labels_c.sum().item() / len(labels)
# Update accuracy
sum_accuracy[split][0] += batch_accuracy_a
sum_accuracy[split][1] += batch_accuracy_b
sum_accuracy[split][2] += batch_accuracy_c
if (split=='train'):
progbar.update(j, ta=batch_accuracy_c)
if (progbar is not None):
progbar.finish()
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split] / len(loaders[split][0]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: [sum_accuracy[split][i] / len(loaders[split][0]) for i in range(len(sum_accuracy[split])) ] for split in ["train", "val", "test"]}
# # Store params at the best validation accuracy
# if save_param and epoch_accuracy["val"] > best_val_accuracy:
# # torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
# torch.save(net.state_dict(), f"{model_name}_best_val.pth")
# best_val_accuracy = epoch_accuracy["val"]
print(f"Epoch {epoch + 1}:")
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"\t{split}\tLoss: {epoch_loss[split]:0.5}\tVGG 1:{epoch_accuracy[split][0]:0.5}"
f"\tVGG 2:{epoch_accuracy[split][1]:0.5}\tVGG *:{epoch_accuracy[split][2]:0.5}")
if save_param:
torch.save({'vgg_a':nets[0].state_dict(),'vgg_b':nets[1].state_dict(),'vgg_star':nets[2].state_dict(),'classifier':nets[3].state_dict()},f'{model_name}.pth')
For each epoch of training the result is this:
Then, I have a combined model which sums the weights of VGGA and VGGB:
DO = 'TEST'
if (DO=='TRAIN'):
train(nets, loaders, optimizer, criterion, epochs=50, dev=dev,save_param=True)
else:
state_dicts = torch.load('valerio.pth')
model1.load_state_dict(state_dicts['vgg_a']) #questi state_dict vengono dalla funzione di training
model2.load_state_dict(state_dicts['vgg_b'])
model3.load_state_dict(state_dicts['vgg_star'])
classifier.load_state_dict(state_dicts['classifier'])
test(model1,classifier,test_loader_all)
test(model2, classifier, test_loader_all)
test(model3, classifier, test_loader_all)
summed_state_dict = OrderedDict()
for key in state_dicts['vgg_star']:
if key.find('conv') >=0:
print(key)
summed_state_dict[key] = combo_fn(state_dicts['vgg_a'][key],state_dicts['vgg_b'][key])
else:
summed_state_dict[key] = state_dicts['vgg_star'][key]
model3.load_state_dict(summed_state_dict)
test(model3, classifier, test_loader_all)
where the test function is this:
def test(net,classifier, loader):
net.to(dev)
classifier.to(dev)
net.eval()
sum_accuracy = 0
# Process each batch
for j, (input, labels) in enumerate(loader):
input = input.to(dev)
labels = labels.float().to(dev)
features = net(input)
pred = torch.squeeze(classifier(features))
# https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
#pred_labels = (pred >= 0.0).long() # Binarize predictions to 0 and 1
_,pred_label = torch.max(pred, dim = 1)
pred_labels = (pred_label == labels).float()
batch_accuracy = pred_labels.sum().item() / len(labels)
# Update accuracy
sum_accuracy += batch_accuracy
epoch_accuracy = sum_accuracy / len(loader)
print(f"Accuracy after sum: {epoch_accuracy:0.5}")
And the result of this aggregation is the following:
I want to modify my training function in order to print the same things of the first image, plus the accuracy of the aggregated model (the highlighted part in red of the second picture). So basically, for each epoch, accuracies of VGGA, VGGB, VGG* and combined VGG, print these accuracies and continue with the training. I tried to add this model combo but I failed, because I did not able to insert into each epoch, but only at the end of the training. I was trying to add in the training function, between print(f"Epoch {epoch + 1}:")and
# Update history
for split in ["train", "val", "test"]:
the code with the part of state_dict, but i am doing something wrong, i do not know what.
Can I reuse the code of the test, or I have to write new code?
Do you think i have to save the state_dict for each epoch, or i can do something else? Like model_c.parameters()=model_a.parameters()+model_b.parameters() (which does not work, already tried)

I solved, here is the solution of how I modified my training function:
def train(nets, loaders, optimizer, criterion, epochs=20, dev=None, save_param=False, model_name="valerio"):
# try:
nets = [n.to(dev) for n in nets]
model_a = module_unwrap(nets[0], True)
model_b = module_unwrap(nets[1], True)
model_c = module_unwrap(nets[2], True)
reg_loss = nn.MSELoss()
criterion.to(dev)
reg_loss.to(dev)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
history_test = 0
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": [0,0,0], "val": [0,0,0], "test": [0,0,0]}
progbar = None
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
for n in nets:
n.train()
widgets = [
' [', pb.Timer(), '] ',
pb.Bar(),
' [', pb.ETA(), '] ', pb.Variable('ta','[Train Acc: {formatted_value}]')
]
progbar = pb.ProgressBar(max_value=len(loaders[split][0]),widgets=widgets,redirect_stdout=True)
else:
for n in nets:
n.eval()
# Process each batch
for j,((input_a, labels_a),(input_b, labels_b)) in enumerate(zip(loaders[split][0],loaders[split][1])):
input_a = input_a.to(dev)
input_b = input_b.to(dev)
labels_a = labels_a.long().to(dev)
labels_b = labels_b.long().to(dev)
#print(labels_a.shape)
#labels_a = labels_a.squeeze()
#labels_b = labels_b.squeeze()
#labels_a = labels_a.unsqueeze(1)
#labels_b = labels_b.unsqueeze(1)
#print(labels_a.shape)
#labels_a = labels_a.argmax(-1)
#labels_b = labels_b.argmax(-1)
inputs = torch.cat([input_a,input_b],axis=0)
labels = torch.cat([labels_a, labels_b])
#labels = labels.squeeze()
#print(labels.shape)
#labels = labels.argmax(-1)
# Reset gradients
optimizer.zero_grad()
# Compute output
features_a = nets[0](input_a)
features_b = nets[1](input_b)
features_c = nets[2](inputs)
pred_a = torch.squeeze(nets[3](features_a))
pred_b = torch.squeeze(nets[3](features_b))
pred_c = torch.squeeze(nets[3](features_c))
loss = criterion(pred_a, labels_a) + criterion(pred_b, labels_b) + criterion(pred_c, labels)
for n in model_a:
layer_a = model_a[n]
layer_b = model_b[n]
layer_c = model_c[n]
if (isinstance(layer_a,nn.Conv2d)):
loss += lambda_reg * reg_loss(combo_fn(layer_a.weight,layer_b.weight),layer_c.weight)
if (layer_a.bias is not None):
loss += lambda_reg * reg_loss(combo_fn(layer_a.bias, layer_b.bias), layer_c.bias)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
#https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
#pred_labels_a = (pred_a >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_b = (pred_b >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_c = (pred_c >= 0.0).long() # Binarize predictions to 0 and 1
#print(pred_a.shape)
_,pred_label_a = torch.max(pred_a, dim = 1)
pred_labels_a = (pred_label_a == labels_a).float()
_,pred_label_b = torch.max(pred_b, dim = 1)
pred_labels_b = (pred_label_b == labels_b).float()
_,pred_label_c = torch.max(pred_c, dim = 1)
pred_labels_c = (pred_label_c == labels).float()
batch_accuracy_a = pred_labels_a.sum().item() / len(labels_a)
batch_accuracy_b = pred_labels_b.sum().item() / len(labels_b)
batch_accuracy_c = pred_labels_c.sum().item() / len(labels)
# Update accuracy
sum_accuracy[split][0] += batch_accuracy_a
sum_accuracy[split][1] += batch_accuracy_b
sum_accuracy[split][2] += batch_accuracy_c
if (split=='train'):
progbar.update(j, ta=batch_accuracy_c)
if (progbar is not None):
progbar.finish()
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split] / len(loaders[split][0]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: [sum_accuracy[split][i] / len(loaders[split][0]) for i in range(len(sum_accuracy[split])) ] for split in ["train", "val", "test"]}
# # Store params at the best validation accuracy
# if save_param and epoch_accuracy["val"] > best_val_accuracy:
# # torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
# torch.save(net.state_dict(), f"{model_name}_best_val.pth")
# best_val_accuracy = epoch_accuracy["val"]
print(f"Epoch {epoch + 1}:")
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"\t{split}\tLoss: {epoch_loss[split]:0.5}\tVGG 1:{epoch_accuracy[split][0]:0.5}"
f"\tVGG 2:{epoch_accuracy[split][1]:0.5}\tVGG *:{epoch_accuracy[split][2]:0.5}")
if save_param:
torch.save({'vgg_a':nets[0].state_dict(),'vgg_b':nets[1].state_dict(),'vgg_star':nets[2].state_dict(),'classifier':nets[3].state_dict()},f'{model_name}.pth')
test(nets[0], nets[3], test_loader_all)
test(nets[1], nets[3], test_loader_all)
test(nets[2], nets[3], test_loader_all)
summed_state_dict = OrderedDict()
for key in nets[2].state_dict():
if key.find('conv') >=0:
#print(key)
summed_state_dict[key] = combo_fn(nets[0].state_dict()[key],nets[1].state_dict()[key])
else:
summed_state_dict[key] = nets[2].state_dict()[key]
nets[2].load_state_dict(summed_state_dict)
test(nets[2], nets[3], test_loader_all)
The edited parts are the last rows.

Related

Machine Translation FFN : Dimension problem due to window size

this is my first time creating a FFN to train it to translate French to English using word prediction:
Input are two arrays of size 2 x window_size + 1 from source language and window_size target language. And the label of size 1
For e.g for window_size = 2:
["je","mange", "la", "pomme","avec"]
and
["I", "eat"]
So the input of size [5] and [2] after concatenating => 7
Label: "the" (refering to "la" in French)
The label is changed to one-hot-encoding before comparing with yHat
I'm using unique index for each word ( 1 to len(vocab) ) and train using the index (not the words)
The output of the FFN is a probability of the size of the vocab of the target language
The problem is that the FFN doesn't learn and the accuracy stays at 0.
When I print the size of y_final (target probability) and yHat (Model Hypo) they have different dimensions:
yHat.size()=[512, 7, 10212]
with 64 batch_size, 7 is the concatenated input size and 10212 size of target vocab, while
y_final.size()= [512, 10212]
And over all the forward method I have these sizes:
torch.Size([512, 5, 32])
torch.Size([512, 5, 64])
torch.Size([512, 5, 64])
torch.Size([512, 2, 256])
torch.Size([512, 2, 32])
torch.Size([512, 2, 64])
torch.Size([512, 2, 64])
torch.Size([512, 7, 64])
torch.Size([512, 7, 128])
torch.Size([512, 7, 10212])
Since the accuracy augments when yHat = y_final then I thought that it is never the case because they don't even have the same shapes (2D vs 3D). Is this the problem ?
Please refer to the code and if you need any other info please tell me.
The code is working fine, no errors.
trainingData = TensorDataset(encoded_source_windows, encoded_target_windows, encoded_labels)
# print(trainingData)
batchsize = 512
trainingLoader = DataLoader(trainingData, batch_size=batchsize, drop_last=True)
def ffnModel(vocabSize1,vocabSize2, learningRate=0.01):
class ffNetwork(nn.Module):
def __init__(self):
super().__init__()
self.embeds_src = nn.Embedding(vocabSize1, 256)
self.embeds_target = nn.Embedding(vocabSize2, 256)
# input layer
self.inputSource = nn.Linear(256, 32)
self.inputTarget = nn.Linear(256, 32)
# hidden layer 1
self.fc1 = nn.Linear(32, 64)
self.bnormS = nn.BatchNorm1d(5)
self.bnormT = nn.BatchNorm1d(2)
# Layer(s) afer Concatenation:
self.fc2 = nn.Linear(64,128)
self.output = nn.Linear(128, vocabSize2)
self.softmaaax = nn.Softmax(dim=0)
# forward pass
def forward(self, xSource, xTarget):
xSource = self.embeds_src(xSource)
xSource = F.relu(self.inputSource(xSource))
xSource = F.relu(self.fc1(xSource))
xSource = self.bnormS(xSource)
xTarget = self.embeds_target(xTarget)
xTarget = F.relu(self.inputTarget(xTarget))
xTarget = F.relu(self.fc1(xTarget))
xTarget = self.bnormT(xTarget)
xCat = torch.cat((xSource, xTarget), dim=1)#dim=128 or 1 ?
xCat = F.relu(self.fc2(xCat))
print(xCat.size())
xCat = self.softmaaax(self.output(xCat))
return xCat
# creating instance of the class
net = ffNetwork()
# loss function
lossfun = nn.CrossEntropyLoss()
# lossfun = nn.NLLLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learningRate)
return net, lossfun, optimizer
def trainModel(vocabSize1,vocabSize2, learningRate):
# number of epochs
numepochs = 64
# create a new Model instance
net, lossfun, optimizer = ffnModel(vocabSize1,vocabSize2, learningRate)
# initialize losses
losses = torch.zeros(numepochs)
trainAcc = []
# loop over training data batches
batchAcc = []
batchLoss = []
for epochi in range(numepochs):
#Switching on training mode
net.train()
# loop over training data batches
batchAcc = []
batchLoss = []
for A, B, y in tqdm(trainingLoader):
# forward pass and loss
final_y = []
for i in range(y.size(dim=0)):
yy = [0] * target_vocab_length
yy[y[i]] = 1
final_y.append(yy)
final_y = torch.tensor(final_y)
yHat = net(A, B)
loss = lossfun(yHat, final_y)
################
print("\n yHat.size()")
print(yHat.size())
print("final_y.size()")
print(final_y.size())
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# loss from this batch
batchLoss.append(loss.item())
print(f'batchLoss: {loss.item()}')
#Accuracy calculator:
matches = torch.argmax(yHat) == final_y # booleans (false/true)
matchesNumeric = matches.float() # convert to numbers (0/1)
accuracyPct = 100 * torch.mean(matchesNumeric) # average and x100
batchAcc.append(accuracyPct) # add to list of accuracies
print(f'accuracyPct: {accuracyPct}')
trainAcc.append(np.mean(batchAcc))
losses[epochi] = np.mean(batchLoss)
return trainAcc,losses,net
trainAcc,losses,net = trainModel(len(source_vocab),len(target_vocab), 0.01)
print(trainAcc)

OR-tools VRP solver returning one job per vehicle

I am using Google OR-Tools in python to solve a capacitated VRP with pickup/delivery. In many cases the solver works well and returns reasonable solutions, but we have found that for some data sets the solver will always return one job per truck regardless of the time involved for the route.
I have the model set up as follows:
My initial vehicle count is equal to the number of jobs in the data-set, and we allow OR-Tools to automatically minimize the truck count.
Each job's pickup location has a demand of 1 and each job's dropoff location has a demand of -1, to enforce delivery immediately after pickup.
We set the maximum drive time per vehicle to 8 hours.
Then, each job has an associated quantity attached for pickup, and we separate this job into multiple deliveries based on a truck's capacity. For instance, if a job requires 60 tons delivered, we represent that as three jobs at 20 tons each (the maximum a vehicle is allowed to carry on an interstate in the U.S)
Now, we have a simple data set with a pickup location at: 698 Longtown Rd, Columbia, SC and a dropoff location at: 121 Chappell Creek Rd Hopkins, SC. This is a drive time of 32 minutes, or a total trip time of 64 minutes. This job has an associated quantity of 60 tons, which will require 3 truck loads.
The results we receive from or-tools shows one load per truck, and this result does not change regardless of how long we allow the solver to run. The optimal solution would allow one truck to complete all loads, as this is still drastically under the 8 hour drive time limit.
Here is my code:
import json
import math
import traceback
import urllib
import redis
import requests
import boto3
from signal import signal, SIGINT, SIGTERM
from ortools.constraint_solver import pywrapcp, routing_enums_pb2
url = 'https://test-api.truckit.com/api/2/signin'
api_data = {"password": "", "username": ""}
response = requests.post(url, json=api_data)
api_data = response.json()
def build_auth_header(token):
header = {'Authorization': f'Token {token}'}
return header
class SignalHandler:
def __init__(self):
self.received_signal = False
signal(SIGINT, self._signal_handler)
signal(SIGTERM, self._signal_handler)
def _signal_handler(self, signal, frame):
print(f"handling signal {signal}, exiting gracefully")
self.received_signal = True
sqs = boto3.resource("sqs")
queue = sqs.get_queue_by_name(QueueName="")
redisClient = redis.Redis(host='', port=6379,
password='')
def create_distance_matrix(data):
addresses = data["addresses"]
API_key = data["API_key"]
origin_addresses = []
dest_addresses = addresses
distance_matrix = []
responses = {}
responses['destination_addresses'] = []
responses['origin_addresses'] = []
responses['rows'] = []
# Send q requests, returning max_rows rows per request.
for i in range(0, len(addresses)):
origin_addresses.clear()
origin_addresses.append(addresses[i])
for j in range(0, len(addresses), 25):
dest_addresses_request = addresses[j:j + 25]
response = send_request(origin_addresses, dest_addresses_request, API_key)
responses['origin_addresses'] = response['origin_addresses']
for destination_address in response['destination_addresses']:
responses['destination_addresses'].append(destination_address)
for row in response['rows']:
if len(responses['rows']) == 0:
responses['rows'].append(row)
else:
for element in row['elements']:
responses['rows'][0]['elements'].append(element)
distance_matrix += build_distance_matrix(responses)
responses['origin_addresses'].clear()
responses['destination_addresses'].clear()
responses['rows'].clear()
return distance_matrix
def send_request(origin_addresses, dest_addresses, API_key):
""" Build and send request for the given origin and destination addresses."""
def build_address_str(addresses):
# Build a pipe-separated string of addresses
address_str = ''
for i in range(len(addresses) - 1):
address_str += addresses[i] + '|'
address_str += addresses[-1]
return address_str
request = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial'
origin_address_str = build_address_str(origin_addresses)
dest_address_str = build_address_str(dest_addresses)
request = request + '&origins=' + origin_address_str + '&destinations=' + \
dest_address_str + '&key=' + API_key
jsonResult = urllib.request.urlopen(request).read()
response = json.loads(jsonResult)
return response
def build_distance_matrix(response):
distance_matrix = []
for row in response['rows']:
row_list = [row['elements'][j]['duration']['value'] for j in range(len(row['elements']))]
distance_matrix.append(row_list)
return distance_matrix
def process_message(message_body):
print(f"processing message: {message_body}")
data = json.loads(message_body)
data_matrix = {}
data_matrix['problem_id'] = data['problemId']
data_matrix["addresses"] = []
data_matrix["pickups_deliveries"] = []
data_matrix["demands"] = []
data_matrix["jobOrderIDs"] = []
depot_address = str(data["depot"]["latitude"]) + "," + str(data["depot"]["longitude"])
data_matrix["jobOrderIDs"].append(0)
data_matrix["addresses"].append(depot_address)
hash_key = data["hashKey"]
for location in data["locationList"]:
pick_lat = location["PickupLatitude"]
pick_long = location["PickupLongitude"]
drop_lat = location["DropoffLatitude"]
drop_long = location["DropoffLongitude"]
jobOrderId = location["jobOrderID"]
demand = math.ceil(float(int(location["totalQuantity"]) / 20))
for i in range(0, demand):
data_matrix["addresses"].append(str(pick_lat) + ',' + str(pick_long))
data_matrix["addresses"].append(str(drop_lat) + ',' + str(drop_long))
data_matrix["jobOrderIDs"].append(str(jobOrderId))
data_matrix["jobOrderIDs"].append(str(jobOrderId))
data_matrix["demands"].append(0)
for i in range(1, len(data_matrix["addresses"]) - 1, 2):
data_matrix["pickups_deliveries"].append([i, i + 1])
data_matrix["demands"].append(1)
data_matrix["demands"].append(-1)
data_matrix["num_vehicles"] = int(len(data_matrix["addresses"]) / 2)
data_matrix["vehicle_capacities"] = []
for i in range(0, data_matrix["num_vehicles"]):
data_matrix["vehicle_capacities"].append(1)
data_matrix["depot"] = 0
data_matrix["API_key"] = ''
data_matrix["distance_matrix"] = create_distance_matrix(data_matrix)
# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(data_matrix['distance_matrix']),
data_matrix['num_vehicles'], data_matrix['depot'])
# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)
# Define cost of each arc.
def distance_callback(from_index, to_index):
"""Returns the manhattan distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data_matrix['distance_matrix'][from_node][to_node]*1000
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Add Distance constraint.
dimension_name = 'Duration'
routing.AddDimension(
transit_callback_index,
0, # no slack
28800*1000, # vehicle maximum travel hours
True, # start cumul to zero
dimension_name)
distance_dimension = routing.GetDimensionOrDie(dimension_name)
distance_dimension.SetGlobalSpanCostCoefficient(100)
def demand_callback(from_index):
"""Returns the demand of the node."""
# Convert from routing variable Index to demands NodeIndex.
from_node = manager.IndexToNode(from_index)
return data_matrix['demands'][from_node]
demand_callback_index = routing.RegisterUnaryTransitCallback(
demand_callback)
routing.AddDimensionWithVehicleCapacity(
demand_callback_index,
0, # null capacity slack
data_matrix['vehicle_capacities'], # vehicle maximum capacities
True, # start cumul to zero
'Capacity')
# Define Transportation Requests.
for request in data_matrix['pickups_deliveries']:
pickup_index = manager.NodeToIndex(request[0])
delivery_index = manager.NodeToIndex(request[1])
routing.AddPickupAndDelivery(pickup_index, delivery_index)
routing.solver().Add(
routing.VehicleVar(pickup_index) == routing.VehicleVar(
delivery_index))
routing.solver().Add(
distance_dimension.CumulVar(pickup_index) <=
distance_dimension.CumulVar(delivery_index))
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
search_parameters.time_limit.seconds = 1200
search_parameters.log_search = True
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.AUTOMATIC)
search_parameters.use_full_propagation = True
# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)
if solution:
solution_dict = {}
for vehicle_id in range(data_matrix['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = ''
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
plan_output += '{0},'.format(data_matrix['jobOrderIDs'][node_index])
previous_index = index
index = solution.Value(routing.NextVar(index))
plan_output += '{0},'.format(data_matrix['jobOrderIDs'][manager.IndexToNode(index)])
plan_output = plan_output[:-1]
plan_words = plan_output.split(",")
plan_output = ''
for i in range(len(plan_words)):
if (i % 2 == 0):
plan_output += plan_words[i] + ","
plan_output = plan_output[:-1]
plan_output += ",0"
if plan_output != 0 and plan_output != str(0) and plan_output != str('0,0'):
print(plan_output)
solution_dict[vehicle_id] = plan_output
# trucks_url = 'https://test-api.truckit.com/api/2/trucks'
trucks_url = 'https://test-api.truckit.com/api/2/job-orders/smart-dispatch/' + str(data_matrix['problem_id'])
head = build_auth_header(api_data["authToken"])
status = {}
ride_list = []
dummy_location_dict = {}
dummy_id_dict = {}
dummy_id_dict["id"] = 0
dummy_id_dict["name"] = ""
dummy_location_dict["location"] = dummy_id_dict
dummy_location_dict["timestamp"] = 0
ride_list.append(dummy_location_dict)
redisClient.hset(hash_key, "solution", json.dumps(solution_dict))
redisClient.hset(hash_key, "ride_list", json.dumps(ride_list))
json_data = {"status": "completed"}
api_response = requests.post(trucks_url, headers=head, json=json_data)
print_solution(data_matrix, manager, routing, solution)
def print_solution(data, manager, routing, solution):
"""Prints solution on console."""
print(f'Objective: {solution.ObjectiveValue()}')
total_distance = 0
total_load = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
plan_output += ' {0} -> '.format(node_index)
previous_index = index
index = solution.Value(routing.NextVar(index))
try:
distance = data['distance_matrix'][previous_index][index]
route_distance += distance
except:
distance = distance
plan_output += ' {0}\n'.format(manager.IndexToNode(index))
plan_output += 'Time of the route: {} hours\n'.format(str(float(route_distance / (60 * 60))))
print(plan_output)
total_distance += route_distance
print('Total distance of all routes: {}m'.format(total_distance))
if __name__ == "__main__":
signal_handler = SignalHandler()
while not signal_handler.received_signal:
messages = queue.receive_messages(
MaxNumberOfMessages=1,
WaitTimeSeconds=1
)
for message in messages:
try:
process_message(message.body)
message.delete()
except Exception as e:
print(f"exception while processing message: {repr(e)}")
traceback.print_exc()
continue
message.delete()
IF anyone has any suggestions as to what the problem may be, your help is greatly appreciated.

Using zero_grad() after loss.backward(), but still receives RuntimeError: "Trying to backward through the graph a second time..."

Below is my implementation of a2c using PyTorch. Upon learning about backpropagation in PyTorch, I have known to zero_grad() the optimizer after each update iteration. However, there is still a RunTime error on second-time backpropagation.
def torchworker(number, model):
worker_env = gym.make("Taxi-v3").env
max_steps_per_episode = 2000
worker_opt = optim.Adam(lr=5e-4, params=model.parameters())
p_history = []
val_history = []
r_history = []
running_reward = 0
episode_count = 0
under = 0
start = time.time()
for i in range(2):
state = worker_env.reset()
episode_reward = 0
penalties = 0
drop = 0
print("Episode {} begins ({})".format(episode_count, number))
worker_env.render()
criterion = nn.SmoothL1Loss()
time_solve = 0
for _ in range(1, max_steps_per_episode):
#worker_env.render()
state = torch.tensor(state, dtype=torch.long)
action_probs = model.forward(state)[0]
critic_value = model.forward(state)[1]
val_history.append((state, critic_value[0]))
# Choose action
action = np.random.choice(6, p=action_probs.detach().numpy())
p_history.append(torch.log(action_probs[action]))
# Apply chosen action
state, reward, done, _ = worker_env.step(action)
r_history.append(reward)
episode_reward += reward
time_solve += 1
if reward == -10:
penalties += 1
elif reward == 20:
drop += 1
if done:
break
# Update running reward to check condition for solving
running_reward = (running_reward * (episode_count) + episode_reward) / (episode_count + 1)
# Calculate discounted returns
returns = deque(maxlen=3500)
discounted_sum = 0
for r in r_history[::-1]:
discounted_sum = r + gamma * discounted_sum
returns.appendleft(discounted_sum)
# Calculate actor losses and critic losses
loss_actor_value = 0
loss_critic_value = 0
history = zip(p_history, val_history, returns)
for log_prob, value, ret in history:
diff = ret - value[1]
loss_actor_value += -log_prob * diff
ret_tensor = torch.tensor(ret, dtype=torch.float32)
loss_critic_value += criterion(value[1], ret_tensor)
loss = loss_actor_value + 0.1 * loss_critic_value
print(loss)
# Update params
loss.backward()
worker_opt.step()
worker_opt.zero_grad()
# Log details
end = time.time()
episode_count += 1
if episode_count % 1 == 0:
worker_env.render()
if running_reward > -50: # Condition to consider the task solved
under += 1
if under > 5:
print("Solved at episode {} !".format(episode_count))
break
I believe there may be something to do with the architecture of my AC model, so I also include it here for reference.
class ActorCriticNetwork(nn.Module):
def __init__(self, num_inputs, num_hidden, num_actions):
super(ActorCriticNetwork, self).__init__()
self.embed = nn.Embedding(500, 10)
self.fc1 = nn.Linear(10, num_hidden * 2)
self.fc2 = nn.Linear(num_hidden * 2, num_hidden)
self.c = nn.Linear(num_hidden, 1)
self.fc3 = nn.Linear(num_hidden, num_hidden)
self.a = nn.Linear(num_hidden, num_actions)
def forward(self, x):
out = F.relu(self.embed(x))
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
critic = self.c(out)
out = F.relu(self.fc3(out.detach()))
actor = F.softmax(self.a(out), dim=-1)
return actor, critic
Would you please tell me what the mistake here is? Thank you in advance.
SOLVED: I forgot to clear the history of probabilities, action-values and rewards after iterations. It is clear why that would cause the issue, as the older elements would cause propagating through old dcgs.

Keras shape error when checking input

I am trying to train a simple MLP model that maps input questions (using a 300D word embedding) and image features extracted using a pretrained VGG16 model to a feature vector of fixed length. However, I can't figure out how to fix the error mentioned below. Here is the code I'm trying to run at the moment:
parser = argparse.ArgumentParser()
parser.add_argument('-num_hidden_units', type=int, default=1024)
parser.add_argument('-num_hidden_layers', type=int, default=3)
parser.add_argument('-dropout', type=float, default=0.5)
parser.add_argument('-activation', type=str, default='tanh')
parser.add_argument('-language_only', type=bool, default= False)
parser.add_argument('-num_epochs', type=int, default=10) #default=100
parser.add_argument('-model_save_interval', type=int, default=10)
parser.add_argument('-batch_size', type=int, default=128)
args = parser.parse_args()
questions_train = open('data/qa/preprocess/questions_train2014.txt', 'r').read().splitlines()
answers_train = open('data/qa/preprocess/answers_train2014_modal.txt', 'r').read().splitlines()
images_train = open('data/qa/preprocess/images_train2014.txt', 'r').read().splitlines()
vgg_model_path = 'data/coco/vgg_feats.mat'
maxAnswers = 1000
questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)
#encode the remaining answers
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(answers_train)
nb_classes = len(list(labelencoder.classes_))
joblib.dump(labelencoder,'models/labelencoder.pkl')
features_struct = scipy.io.loadmat(vgg_model_path)
VGGfeatures = features_struct['feats']
print ('loaded vgg features')
image_ids = open('data/coco/coco_vgg_IDMap.txt').read().splitlines()
id_map = {}
for ids in image_ids:
id_split = ids.split()
id_map[id_split[0]] = int(id_split[1])
nlp = English()
print ('loaded word2vec features...')
img_dim = 4096
word_vec_dim = 300
model = Sequential()
if args.language_only:
model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
else:
model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
model.add(Activation(args.activation))
if args.dropout>0:
model.add(Dropout(args.dropout))
for i in range(args.num_hidden_layers-1):
model.add(Dense(args.num_hidden_units, init='uniform'))
model.add(Activation(args.activation))
if args.dropout>0:
model.add(Dropout(args.dropout))
model.add(Dense(nb_classes, init='uniform'))
model.add(Activation('softmax'))
json_string = model.to_json()
if args.language_only:
model_file_name = 'models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
else:
model_file_name = 'models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
open(model_file_name + '.json', 'w').write(json_string)
print ('Compiling model...')
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
print ('Compilation done...')
print ('Training started...')
for k in range(args.num_epochs):
#shuffle the data points before going through them
index_shuf = list(range(len(questions_train)))
shuffle(index_shuf)
questions_train = [questions_train[i] for i in index_shuf]
answers_train = [answers_train[i] for i in index_shuf]
images_train = [images_train[i] for i in index_shuf]
progbar = generic_utils.Progbar(len(questions_train))
for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]),
grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]),
grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
if args.language_only:
X_batch = X_q_batch
else:
X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
X_batch = np.hstack((X_q_batch, X_i_batch))
Y_batch = get_answers_matrix(an_batch, labelencoder)
loss = model.train_on_batch(X_batch, Y_batch)
progbar.add(args.batch_size, values=[("train loss", loss)])
#print type(loss)
if k%args.model_save_interval == 0:
model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
And here is the error I get:
Keras: Error when checking input: expected dense_9_input to have shape
(4396,) but got array with shape (4096,)
I think that the error lies in what you pass in the else statement in the first layer of your model versus what you pass in training. In your first layer you specify:
model = Sequential()
if args.language_only:
model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
else:
model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
You clearly pass input_dim = img_dim + word_vec_dim = 4096 + 300 = 4396. During training you pass:
X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
if args.language_only:
X_batch = X_q_batch
else:
X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
X_batch = np.hstack((X_q_batch, X_i_batch))
So, in the else branch, X_batch will have X_q_batch or X_i_batch rows, which apparently = 4096.
By the way, for debugging purposes, it would be easier to give your layers a name, e.g.
x = Dense(64, activation='relu', name="dense_one")
I hope this helps.

In tensorflow divide label and features is not supported?

I want to make the data which divided label and features, beause tf.nn.softmax_cross_entropy_with_logits required.
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]] #
)
make the queue and put the label and features.
after that I should divide label and features for cost function. but how to do that?
Thank you
import tensorflow as tf
import numpy as np
# Parameters
learning_rate = 0.003
training_epochs = 30
batch_size = 2
display_step = 1
min_after_dequeue = 5
capacity = 16246832
# Network Parameters
# feature size
n_input = 199
# 1st layer num features
n_hidden_1 = 150
# 2nd layer num features
n_hidden_2 = 100
# 3rd layer num features
n_hidden_3 = 50
# 4th layer num features
n_hidden_4 = 30
#class
n_classes = 3
#read csv, 0 index is label
filename_queue = tf.train.string_input_producer(["data.csv"])
record_default = [[0.0] for x in xrange(200)] # with a label and 199 features
#testfile
reader = tf.TextLineReader()
#file read
key, value = reader.read(filename_queue)
#decode
features = tf.decode_csv(value, record_defaults= record_default)
featurespack = tf.pack(features)
#xy = tf.map_fn(fn = lambda f: [f[1:],f[0]], elems=featurespack)
#for the batch
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]]
)
#enqueue
enqueue_op = queue.enqueue(featurespack)
#dequeue
inputs = queue.dequeue_many(batch_size)
#threading
qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)
#features n=199
x = tf.placeholder("float", [None, n_input])
# class 0,1,2
y = tf.placeholder("float", [None, n_classes])
#dropout
dropout_keep_prob = tf.placeholder("float")
# Create model
def multilayer_perceptron(_X, _weights, _biases, _keep_prob):
layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _keep_prob)
layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _keep_prob)
layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), _keep_prob)
layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])), _keep_prob)
return tf.sigmoid(tf.matmul(layer_4, _weights['out']) + _biases['out'])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev=0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], stddev=0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], stddev=0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], stddev=0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'b4': tf.Variable(tf.random_normal([n_hidden_4])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8).minimize(cost) # Adam Optimizer
# Initializing the variables
print "1"
with tf.Session() as sess:
#init
tf.initialize_all_variables().run
#what is
coord = tf.train.Coordinator()
#queue start what is
tf.train.start_queue_runners (coord=coord)
#i dont know well
enqueue_threads = qr.create_threads(sess, coord=coord, start=True)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
#
#print sess.run(feature)
#Training cycle
for epoch in range(training_epochs):
print epoch
avg_cost = 0.
# Loop over all batches
for i in range(10):
print i
if coord.should_stop():
break
#get inputs
inputs_value = sess.run(inputs)
#THIS IS NOT WORK
batch_xs = np.ndarray([x[1:] for x in inputs_value])
batch_ys = np.ndarray([x[0] for x in inputs_value])
print 'batch', len(batch_ys), len(batch_xs)
#batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
#optimzierm put x and y
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})
# Compute average loss
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})/batch_size
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob: 1.}))
coord.request_stop ()
coord.join (enqueue_threads)
print ("Optimization Finished!")