OR-tools VRP solver returning one job per vehicle - or-tools

I am using Google OR-Tools in python to solve a capacitated VRP with pickup/delivery. In many cases the solver works well and returns reasonable solutions, but we have found that for some data sets the solver will always return one job per truck regardless of the time involved for the route.
I have the model set up as follows:
My initial vehicle count is equal to the number of jobs in the data-set, and we allow OR-Tools to automatically minimize the truck count.
Each job's pickup location has a demand of 1 and each job's dropoff location has a demand of -1, to enforce delivery immediately after pickup.
We set the maximum drive time per vehicle to 8 hours.
Then, each job has an associated quantity attached for pickup, and we separate this job into multiple deliveries based on a truck's capacity. For instance, if a job requires 60 tons delivered, we represent that as three jobs at 20 tons each (the maximum a vehicle is allowed to carry on an interstate in the U.S)
Now, we have a simple data set with a pickup location at: 698 Longtown Rd, Columbia, SC and a dropoff location at: 121 Chappell Creek Rd Hopkins, SC. This is a drive time of 32 minutes, or a total trip time of 64 minutes. This job has an associated quantity of 60 tons, which will require 3 truck loads.
The results we receive from or-tools shows one load per truck, and this result does not change regardless of how long we allow the solver to run. The optimal solution would allow one truck to complete all loads, as this is still drastically under the 8 hour drive time limit.
Here is my code:
import json
import math
import traceback
import urllib
import redis
import requests
import boto3
from signal import signal, SIGINT, SIGTERM
from ortools.constraint_solver import pywrapcp, routing_enums_pb2
url = 'https://test-api.truckit.com/api/2/signin'
api_data = {"password": "", "username": ""}
response = requests.post(url, json=api_data)
api_data = response.json()
def build_auth_header(token):
header = {'Authorization': f'Token {token}'}
return header
class SignalHandler:
def __init__(self):
self.received_signal = False
signal(SIGINT, self._signal_handler)
signal(SIGTERM, self._signal_handler)
def _signal_handler(self, signal, frame):
print(f"handling signal {signal}, exiting gracefully")
self.received_signal = True
sqs = boto3.resource("sqs")
queue = sqs.get_queue_by_name(QueueName="")
redisClient = redis.Redis(host='', port=6379,
password='')
def create_distance_matrix(data):
addresses = data["addresses"]
API_key = data["API_key"]
origin_addresses = []
dest_addresses = addresses
distance_matrix = []
responses = {}
responses['destination_addresses'] = []
responses['origin_addresses'] = []
responses['rows'] = []
# Send q requests, returning max_rows rows per request.
for i in range(0, len(addresses)):
origin_addresses.clear()
origin_addresses.append(addresses[i])
for j in range(0, len(addresses), 25):
dest_addresses_request = addresses[j:j + 25]
response = send_request(origin_addresses, dest_addresses_request, API_key)
responses['origin_addresses'] = response['origin_addresses']
for destination_address in response['destination_addresses']:
responses['destination_addresses'].append(destination_address)
for row in response['rows']:
if len(responses['rows']) == 0:
responses['rows'].append(row)
else:
for element in row['elements']:
responses['rows'][0]['elements'].append(element)
distance_matrix += build_distance_matrix(responses)
responses['origin_addresses'].clear()
responses['destination_addresses'].clear()
responses['rows'].clear()
return distance_matrix
def send_request(origin_addresses, dest_addresses, API_key):
""" Build and send request for the given origin and destination addresses."""
def build_address_str(addresses):
# Build a pipe-separated string of addresses
address_str = ''
for i in range(len(addresses) - 1):
address_str += addresses[i] + '|'
address_str += addresses[-1]
return address_str
request = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial'
origin_address_str = build_address_str(origin_addresses)
dest_address_str = build_address_str(dest_addresses)
request = request + '&origins=' + origin_address_str + '&destinations=' + \
dest_address_str + '&key=' + API_key
jsonResult = urllib.request.urlopen(request).read()
response = json.loads(jsonResult)
return response
def build_distance_matrix(response):
distance_matrix = []
for row in response['rows']:
row_list = [row['elements'][j]['duration']['value'] for j in range(len(row['elements']))]
distance_matrix.append(row_list)
return distance_matrix
def process_message(message_body):
print(f"processing message: {message_body}")
data = json.loads(message_body)
data_matrix = {}
data_matrix['problem_id'] = data['problemId']
data_matrix["addresses"] = []
data_matrix["pickups_deliveries"] = []
data_matrix["demands"] = []
data_matrix["jobOrderIDs"] = []
depot_address = str(data["depot"]["latitude"]) + "," + str(data["depot"]["longitude"])
data_matrix["jobOrderIDs"].append(0)
data_matrix["addresses"].append(depot_address)
hash_key = data["hashKey"]
for location in data["locationList"]:
pick_lat = location["PickupLatitude"]
pick_long = location["PickupLongitude"]
drop_lat = location["DropoffLatitude"]
drop_long = location["DropoffLongitude"]
jobOrderId = location["jobOrderID"]
demand = math.ceil(float(int(location["totalQuantity"]) / 20))
for i in range(0, demand):
data_matrix["addresses"].append(str(pick_lat) + ',' + str(pick_long))
data_matrix["addresses"].append(str(drop_lat) + ',' + str(drop_long))
data_matrix["jobOrderIDs"].append(str(jobOrderId))
data_matrix["jobOrderIDs"].append(str(jobOrderId))
data_matrix["demands"].append(0)
for i in range(1, len(data_matrix["addresses"]) - 1, 2):
data_matrix["pickups_deliveries"].append([i, i + 1])
data_matrix["demands"].append(1)
data_matrix["demands"].append(-1)
data_matrix["num_vehicles"] = int(len(data_matrix["addresses"]) / 2)
data_matrix["vehicle_capacities"] = []
for i in range(0, data_matrix["num_vehicles"]):
data_matrix["vehicle_capacities"].append(1)
data_matrix["depot"] = 0
data_matrix["API_key"] = ''
data_matrix["distance_matrix"] = create_distance_matrix(data_matrix)
# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(data_matrix['distance_matrix']),
data_matrix['num_vehicles'], data_matrix['depot'])
# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)
# Define cost of each arc.
def distance_callback(from_index, to_index):
"""Returns the manhattan distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data_matrix['distance_matrix'][from_node][to_node]*1000
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Add Distance constraint.
dimension_name = 'Duration'
routing.AddDimension(
transit_callback_index,
0, # no slack
28800*1000, # vehicle maximum travel hours
True, # start cumul to zero
dimension_name)
distance_dimension = routing.GetDimensionOrDie(dimension_name)
distance_dimension.SetGlobalSpanCostCoefficient(100)
def demand_callback(from_index):
"""Returns the demand of the node."""
# Convert from routing variable Index to demands NodeIndex.
from_node = manager.IndexToNode(from_index)
return data_matrix['demands'][from_node]
demand_callback_index = routing.RegisterUnaryTransitCallback(
demand_callback)
routing.AddDimensionWithVehicleCapacity(
demand_callback_index,
0, # null capacity slack
data_matrix['vehicle_capacities'], # vehicle maximum capacities
True, # start cumul to zero
'Capacity')
# Define Transportation Requests.
for request in data_matrix['pickups_deliveries']:
pickup_index = manager.NodeToIndex(request[0])
delivery_index = manager.NodeToIndex(request[1])
routing.AddPickupAndDelivery(pickup_index, delivery_index)
routing.solver().Add(
routing.VehicleVar(pickup_index) == routing.VehicleVar(
delivery_index))
routing.solver().Add(
distance_dimension.CumulVar(pickup_index) <=
distance_dimension.CumulVar(delivery_index))
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
search_parameters.time_limit.seconds = 1200
search_parameters.log_search = True
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.AUTOMATIC)
search_parameters.use_full_propagation = True
# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)
if solution:
solution_dict = {}
for vehicle_id in range(data_matrix['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = ''
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
plan_output += '{0},'.format(data_matrix['jobOrderIDs'][node_index])
previous_index = index
index = solution.Value(routing.NextVar(index))
plan_output += '{0},'.format(data_matrix['jobOrderIDs'][manager.IndexToNode(index)])
plan_output = plan_output[:-1]
plan_words = plan_output.split(",")
plan_output = ''
for i in range(len(plan_words)):
if (i % 2 == 0):
plan_output += plan_words[i] + ","
plan_output = plan_output[:-1]
plan_output += ",0"
if plan_output != 0 and plan_output != str(0) and plan_output != str('0,0'):
print(plan_output)
solution_dict[vehicle_id] = plan_output
# trucks_url = 'https://test-api.truckit.com/api/2/trucks'
trucks_url = 'https://test-api.truckit.com/api/2/job-orders/smart-dispatch/' + str(data_matrix['problem_id'])
head = build_auth_header(api_data["authToken"])
status = {}
ride_list = []
dummy_location_dict = {}
dummy_id_dict = {}
dummy_id_dict["id"] = 0
dummy_id_dict["name"] = ""
dummy_location_dict["location"] = dummy_id_dict
dummy_location_dict["timestamp"] = 0
ride_list.append(dummy_location_dict)
redisClient.hset(hash_key, "solution", json.dumps(solution_dict))
redisClient.hset(hash_key, "ride_list", json.dumps(ride_list))
json_data = {"status": "completed"}
api_response = requests.post(trucks_url, headers=head, json=json_data)
print_solution(data_matrix, manager, routing, solution)
def print_solution(data, manager, routing, solution):
"""Prints solution on console."""
print(f'Objective: {solution.ObjectiveValue()}')
total_distance = 0
total_load = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
plan_output += ' {0} -> '.format(node_index)
previous_index = index
index = solution.Value(routing.NextVar(index))
try:
distance = data['distance_matrix'][previous_index][index]
route_distance += distance
except:
distance = distance
plan_output += ' {0}\n'.format(manager.IndexToNode(index))
plan_output += 'Time of the route: {} hours\n'.format(str(float(route_distance / (60 * 60))))
print(plan_output)
total_distance += route_distance
print('Total distance of all routes: {}m'.format(total_distance))
if __name__ == "__main__":
signal_handler = SignalHandler()
while not signal_handler.received_signal:
messages = queue.receive_messages(
MaxNumberOfMessages=1,
WaitTimeSeconds=1
)
for message in messages:
try:
process_message(message.body)
message.delete()
except Exception as e:
print(f"exception while processing message: {repr(e)}")
traceback.print_exc()
continue
message.delete()
IF anyone has any suggestions as to what the problem may be, your help is greatly appreciated.

Related

route optimization with precedences between pairs of nodes

For my routing application I have multiple pairs of nodes which must be visited. The order the nodes in each pair is not important, but both nodes in each pair must be visited after each other. Also, I have a max_travel_distance constraint per vehicle and it is not required to visit all nodes if no optimal solution exist. Meaning that if one node in a pair needs to be dropped, both nodes in the pair must be dropped.
For instance I have three pairs: [A,B], [C,D], [E,F], two valid solutions for two vehicles could be:
(if all node pair can be covered)
1) 0->A->B->C->D->0
2) 0->F->E->0
or
(if only [A,B] is within max_travel_distance)
1) 0->A->B->0
2) 0->0
From the pickup and deliveries example I see how I can make sure that both nodes in a pair always is included in the solution. The problem is that I dont see how I can enforce a constraint so that the two nodes from the same pair always is visited directly after each other, and that the order does not matter (A->B and B->A are both ok)
import math
from ortools.constraint_solver import pywrapcp
from ortools.constraint_solver import routing_enums_pb2
def calc_dmat(pairs, depot = [0,0]):
# Distance matrix
ncol = len(pairs)*2
nodes = [depot] + [p for pair in Pairs for p in pair]
dmat = [[math.dist(c,r) for c in nodes] for r in nodes]
return dmat
# Nodes and pairs
A = [1,1] # Coordinate
B = [1,5]
C = [3,1]
D = [3,8]
E = [6,3]
F = [6,19]
Pairs = [
[A,B],
[C,D],
[E,F]
]
data = {}
data["distance_matrix"] = calc_dmat(Pairs)
data["depot"] = 0
data["num_vehicles"]=2
data["pickups_deliveries"] = [
[1,2],
[3,4],
[5,6]
]
def print_solution(data, manager, routing, solution):
if solution is None:
print("No solution")
return
print(f'Objective: {solution.ObjectiveValue()}')
# Display dropped nodes.
dropped_nodes = 'Dropped nodes:'
for node in range(routing.Size()):
if routing.IsStart(node) or routing.IsEnd(node):
continue
if solution.Value(routing.NextVar(node)) == node:
dropped_nodes += ' {}'.format(manager.IndexToNode(node))
print(dropped_nodes)
max_route_distance = 0
max_route_time = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
route_time = 0
while not routing.IsEnd(index):
plan_output += ' {} -> '.format(manager.IndexToNode(index))
previous_index = index
index = solution.Value(routing.NextVar(index))
route_distance += routing.GetArcCostForVehicle(
previous_index, index, vehicle_id)
plan_output += '{}\n'.format(manager.IndexToNode(index))
plan_output += 'Distance of the route: {}m\n'.format(route_distance)
print(plan_output)
max_route_distance = max(route_distance, max_route_distance)
max_route_time = max(route_time, max_route_time)
print('Maximum of the route distances: {}m'.format(max_route_distance))
print('Maximum of the route time: {}m'.format(max_route_time))
manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
data['num_vehicles'], data['depot'])
routing = pywrapcp.RoutingModel(manager)
def distance_callback(from_index, to_index):
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data["distance_matrix"][from_node][to_node]
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Add Distance constraint.
dimension_name = 'Distance'
routing.AddDimension(
transit_callback_index,
0,
18,
True,
dimension_name)
distance_dimension = routing.GetDimensionOrDie(dimension_name)
# Define Transportation Requests.
for request in data['pickups_deliveries']:
pickup_index = manager.NodeToIndex(request[0])
delivery_index = manager.NodeToIndex(request[1])
routing.AddPickupAndDelivery(pickup_index, delivery_index)
routing.solver().Add(
routing.VehicleVar(pickup_index) == routing.VehicleVar(
delivery_index))
routing.solver().Add(
distance_dimension.CumulVar(pickup_index) <=
distance_dimension.CumulVar(delivery_index))
# Omit some nodes
for node in range(1,len(data["distance_matrix"])):
routing.AddDisjunction([manager.NodeToIndex(node)], 13)
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PARALLEL_CHEAPEST_INSERTION)
# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)
# Print solution on console.
print_solution(data, manager, routing, solution)
Output:
*Objective: 25
Dropped nodes:
Route for vehicle 0:
0 -> 3 -> 1 -> 2 -> 4 -> 0
Distance of the route: 9m
Route for vehicle 1:
0 -> 5 -> 6 -> 0
Distance of the route: 16m
Maximum of the route distances: 16m
Maximum of the route time: 0m*
I don't see how I can add a constraint making sure both nodes from the same node is picked successively and independently of the order. A solution to this would be highly appreciated.
Set a dimension representing rank (transit is always 1)
Link the vehicle variables of the two nodes in a pair to be equal
Add each one in a disjunction of size 1 (maybe both in a unique disjunction of size 2)
Add a precedence on the cumul variables of the 2 elements of a pair
Here is another approach.
Create 2 fake nodes 'a->b' and 'b->a', do not create the nodes a and b.
Distance from any node c to 'a->b' is distance c to a. Distance from 'a->b' to any node d is distance from b to d.
Same thing for b->a.
Add 'a->b' and 'b->a' in a disjunction.
And solve.

Using zero_grad() after loss.backward(), but still receives RuntimeError: "Trying to backward through the graph a second time..."

Below is my implementation of a2c using PyTorch. Upon learning about backpropagation in PyTorch, I have known to zero_grad() the optimizer after each update iteration. However, there is still a RunTime error on second-time backpropagation.
def torchworker(number, model):
worker_env = gym.make("Taxi-v3").env
max_steps_per_episode = 2000
worker_opt = optim.Adam(lr=5e-4, params=model.parameters())
p_history = []
val_history = []
r_history = []
running_reward = 0
episode_count = 0
under = 0
start = time.time()
for i in range(2):
state = worker_env.reset()
episode_reward = 0
penalties = 0
drop = 0
print("Episode {} begins ({})".format(episode_count, number))
worker_env.render()
criterion = nn.SmoothL1Loss()
time_solve = 0
for _ in range(1, max_steps_per_episode):
#worker_env.render()
state = torch.tensor(state, dtype=torch.long)
action_probs = model.forward(state)[0]
critic_value = model.forward(state)[1]
val_history.append((state, critic_value[0]))
# Choose action
action = np.random.choice(6, p=action_probs.detach().numpy())
p_history.append(torch.log(action_probs[action]))
# Apply chosen action
state, reward, done, _ = worker_env.step(action)
r_history.append(reward)
episode_reward += reward
time_solve += 1
if reward == -10:
penalties += 1
elif reward == 20:
drop += 1
if done:
break
# Update running reward to check condition for solving
running_reward = (running_reward * (episode_count) + episode_reward) / (episode_count + 1)
# Calculate discounted returns
returns = deque(maxlen=3500)
discounted_sum = 0
for r in r_history[::-1]:
discounted_sum = r + gamma * discounted_sum
returns.appendleft(discounted_sum)
# Calculate actor losses and critic losses
loss_actor_value = 0
loss_critic_value = 0
history = zip(p_history, val_history, returns)
for log_prob, value, ret in history:
diff = ret - value[1]
loss_actor_value += -log_prob * diff
ret_tensor = torch.tensor(ret, dtype=torch.float32)
loss_critic_value += criterion(value[1], ret_tensor)
loss = loss_actor_value + 0.1 * loss_critic_value
print(loss)
# Update params
loss.backward()
worker_opt.step()
worker_opt.zero_grad()
# Log details
end = time.time()
episode_count += 1
if episode_count % 1 == 0:
worker_env.render()
if running_reward > -50: # Condition to consider the task solved
under += 1
if under > 5:
print("Solved at episode {} !".format(episode_count))
break
I believe there may be something to do with the architecture of my AC model, so I also include it here for reference.
class ActorCriticNetwork(nn.Module):
def __init__(self, num_inputs, num_hidden, num_actions):
super(ActorCriticNetwork, self).__init__()
self.embed = nn.Embedding(500, 10)
self.fc1 = nn.Linear(10, num_hidden * 2)
self.fc2 = nn.Linear(num_hidden * 2, num_hidden)
self.c = nn.Linear(num_hidden, 1)
self.fc3 = nn.Linear(num_hidden, num_hidden)
self.a = nn.Linear(num_hidden, num_actions)
def forward(self, x):
out = F.relu(self.embed(x))
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
critic = self.c(out)
out = F.relu(self.fc3(out.detach()))
actor = F.softmax(self.a(out), dim=-1)
return actor, critic
Would you please tell me what the mistake here is? Thank you in advance.
SOLVED: I forgot to clear the history of probabilities, action-values and rewards after iterations. It is clear why that would cause the issue, as the older elements would cause propagating through old dcgs.

Optimal dispatch of power generators

I want to write a matlab code that can optimally dispatch power generators to satisfy electric demand by considering profit too. I used below code that is in matlab website. https://ww2.mathworks.cn/help/optim/examples/optimal-dispatch-of-power-generators.html?requestedDomain=en
But in this code they are not considering the demand.
My target is , there are 20 power plants and I want to schedule this plants to satisfy demand. And the running power level can be any value in between the max and minimum power value of each power plant. High cost power plants should dispatch too.
name = 'generationcost.xlsx';
poolPrice = xlsread(name,'AD3:AD50'); % Revenue in dollars per MWh in interval
demand = xlsread(name,'AC3:AC52');
month = numel(demand);
e = xlsread(name,'W2:W4')'; %Cost for a unit of fuel
fuelPrice = repmat(e,1,1,2);
nPeriods = length(poolPrice);
nGens = 3;
genhigh = xlsread(name,'F2:F4');
genlow = xlsread(name,'Q2:Q4');
gen = [genlow(:),genhigh(:),]; %MW
fuel = [50,330;45,325;55,750]; % Fuel consumption for generator
startCost = [10000;10000;10000]; %Cost in dollars to start a generator after it has been off
y = optimvar('y',nPeriods,nGens,{'Low','High'},'Type','integer','LowerBound',0,'UpperBound',1);
z = optimvar('z',nPeriods,nGens,'Type','integer','LowerBound',0,'UpperBound',1);
powercons = y(:,:,'Low') + y(:,:,'High') <= 1;
yFuel = zeros(nPeriods,nGens,2);
yFuel(:,1,1) = fuel(1,1);
yFuel(:,1,2) = fuel(1,2);
yFuel(:,2,1) = fuel(2,1);
yFuel(:,2,2) = fuel(2,2);
yFuel(:,3,1) = fuel(3,1);
yFuel(:,3,2) = fuel(3,2);
fuelUsed = sum(reshape((sum(y.*yFuel)),[3 2])');
%fuelcons = fuelUsed <= totalFuel;
w = optimexpr(nPeriods,nGens);
idx = 1:(nPeriods-1);
w(idx,:) = y(idx+1,:,'Low') - y(idx,:,'Low') + y(idx+1,:,'High') -
y(idx,:,'High');
w(nPeriods,:) = y(1,:,'Low') - y(nPeriods,:,'Low') + y(1,:,'High') -
y(nPeriods,:,'High');
switchcons = w - z <= 0;
generatorlevel = zeros(size(yFuel));
generatorlevel(:,1,1) = gen(1,1);
generatorlevel(:,1,2) = gen(1,2);
generatorlevel(:,2,1) = gen(2,1);
generatorlevel(:,2,2) = gen(2,2);
generatorlevel(:,3,1) = gen(3,1);
generatorlevel(:,3,2) = gen(3,2);
revenue = optimexpr(size(y));
for ii = 1:nPeriods
revenue(ii,:,:) = poolPrice(ii)*y(ii,:,:).*generatorlevel(ii,:,:);
end
val = optimexpr(size(y));
demands = optimexpr(nPeriods,1);
id = 1:nPeriods;
val(id,:,:)= y(id,:,:).*generatorlevel(id,:,:);
demands(id,:)= sum(sum(val,3),2);
demandcons = demands-demand ;
fuelCost = sum(sum(fuelPrice.*sum(y.*yFuel)));
startingCost = z*startCost;
profit = sum(sum(sum(revenue))) - fuelCost - sum(startingCost);
dispatch = optimproblem('ObjectiveSense','maximize');
dispatch.Objective = profit;
dispatch.Constraints.powercons = powercons;
dispatch.Constraints.demandcons = demands - demand <= 0 ;
options = optimoptions('intlinprog','Display','final');
[dispatchsol,fval,exitflag,output] = solve(dispatch,'options',options);
subplot(5,1,1)
bar(dispatchsol.y(:,1,1)*gen(1,1)+dispatchsol.y(:,1,2)*gen(1,2),.5,'g')
xlim([.5,48.5])
ylabel('MW')
title('Generator 1 Optimal Schedule','FontWeight','bold')
subplot(5,1,2)
bar(dispatchsol.y(:,2,1)*gen(2,1)+dispatchsol.y(:,2,2)*gen(2,2),.5,'c')
title('Generator 2 Optimal Schedule','FontWeight','bold')
xlim([.5,48.5])
ylabel('MW')
subplot(5,1,3)
bar(dispatchsol.y(:,3,1)*gen(3,1)+dispatchsol.y(:,3,2)*gen(3,2),.5,'c')
title('Generator 3 Optimal Schedule','FontWeight','bold')
xlim([.5,48.5])
ylabel('MW')
subplot(5,1,4)
bar(demand,.5)
xlim([.5,48.5])
title('Daily Demand','FontWeight','bold')
xlabel('Period')
ylabel('MW')
starttimes = find(round(dispatchsol.z) == 1);
[theperiod,thegenerator] = ind2sub(size(dispatchsol.z),starttimes)
This code is not satisfying Demand. The output demand value is more lesser than the actual demand value. And also here only use 'High' or 'Low' power levels only. In between power values not using.

In tensorflow divide label and features is not supported?

I want to make the data which divided label and features, beause tf.nn.softmax_cross_entropy_with_logits required.
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]] #
)
make the queue and put the label and features.
after that I should divide label and features for cost function. but how to do that?
Thank you
import tensorflow as tf
import numpy as np
# Parameters
learning_rate = 0.003
training_epochs = 30
batch_size = 2
display_step = 1
min_after_dequeue = 5
capacity = 16246832
# Network Parameters
# feature size
n_input = 199
# 1st layer num features
n_hidden_1 = 150
# 2nd layer num features
n_hidden_2 = 100
# 3rd layer num features
n_hidden_3 = 50
# 4th layer num features
n_hidden_4 = 30
#class
n_classes = 3
#read csv, 0 index is label
filename_queue = tf.train.string_input_producer(["data.csv"])
record_default = [[0.0] for x in xrange(200)] # with a label and 199 features
#testfile
reader = tf.TextLineReader()
#file read
key, value = reader.read(filename_queue)
#decode
features = tf.decode_csv(value, record_defaults= record_default)
featurespack = tf.pack(features)
#xy = tf.map_fn(fn = lambda f: [f[1:],f[0]], elems=featurespack)
#for the batch
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]]
)
#enqueue
enqueue_op = queue.enqueue(featurespack)
#dequeue
inputs = queue.dequeue_many(batch_size)
#threading
qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)
#features n=199
x = tf.placeholder("float", [None, n_input])
# class 0,1,2
y = tf.placeholder("float", [None, n_classes])
#dropout
dropout_keep_prob = tf.placeholder("float")
# Create model
def multilayer_perceptron(_X, _weights, _biases, _keep_prob):
layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _keep_prob)
layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _keep_prob)
layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), _keep_prob)
layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])), _keep_prob)
return tf.sigmoid(tf.matmul(layer_4, _weights['out']) + _biases['out'])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev=0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], stddev=0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], stddev=0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], stddev=0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'b4': tf.Variable(tf.random_normal([n_hidden_4])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8).minimize(cost) # Adam Optimizer
# Initializing the variables
print "1"
with tf.Session() as sess:
#init
tf.initialize_all_variables().run
#what is
coord = tf.train.Coordinator()
#queue start what is
tf.train.start_queue_runners (coord=coord)
#i dont know well
enqueue_threads = qr.create_threads(sess, coord=coord, start=True)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
#
#print sess.run(feature)
#Training cycle
for epoch in range(training_epochs):
print epoch
avg_cost = 0.
# Loop over all batches
for i in range(10):
print i
if coord.should_stop():
break
#get inputs
inputs_value = sess.run(inputs)
#THIS IS NOT WORK
batch_xs = np.ndarray([x[1:] for x in inputs_value])
batch_ys = np.ndarray([x[0] for x in inputs_value])
print 'batch', len(batch_ys), len(batch_xs)
#batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
#optimzierm put x and y
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})
# Compute average loss
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})/batch_size
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob: 1.}))
coord.request_stop ()
coord.join (enqueue_threads)
print ("Optimization Finished!")

Genetic Algorithm and neural network failing to learn

I am trying to make a Flappy Bird AI where the agent tries to learn to pass through the pipes via genetic algorithms and neural network.
My implementation is that I am using a neural network with two inputs (horizontal distance from pipe and vertical distance from pipe opening), one hidden layer of 5 neurons, and one output layer.
The genetic algorithm evolves the agent by constantly changing one of the weights of the neural network per generation. (based on this GA implementation)
However I noticed that the flappy bird agent is failing to learn, to the point where it never even attempts to flap once (the entire time, it keeps falling at the beginning of every generation) all the way until the 485th generation (where an "maximum recursion depth exceeded" error occurs)
Genetic Algorithm + Neural Network functions:
def flap(playery, playerFlapAcc):
playerVelY = playerFlapAcc
playerFlapped = True
if sound:
SOUNDS['wing'].play()
return playerVelY, playerFlapped
def mutate(n):
global in_to_hidden
global hidden_to_hidden2
global hidden_to_out
layer_select = random.uniform(0,2)
print 'Changed:'
if layer_select == 0:
selector = int(random.uniform(0,5))
print in_to_hidden.params[selector]
in_to_hidden.params[selector] = in_to_hidden.params[selector] + random.uniform(-5,5)
print in_to_hidden.params[selector]
elif layer_select == 1:
selector = int(random.uniform(0,5))
print hidden_to_hidden.params[selector]
hidden_to_hidden2.params[selector] = hidden_to_hidden2.params[selector] + random.uniform(-5,5)
print hidden_to_hidden.params[selector]
else:
selector = int(random.uniform(0,3))
print hidden_to_out.params[selector]
hidden_to_out.params[selector] = hidden_to_out.params[selector] + random.uniform(-5,5)
print hidden_to_out.params[selector]
return n
def predict_action(rangex, error, playery, playerFlapAcc, playerVelY, playerFlapped, i):
global jumped
if i % 10 == 0:
pred = n.activate([rangex, error]).argmax()
if pred == 1:
jumped = True
playerVelY, playerFlapped = flap(playery, playerFlapAcc)
return playerVelY, playerFlapped
def initalize_nn():
global in_to_hidden
global hidden_to_hidden2
global hidden_to_out
# Old code (regression)
n = FeedForwardNetwork()
# n = buildNetwork( 2, 3, data.outdim, outclass=SoftmaxLayer )
inLayer = LinearLayer(2)
hiddenLayer = SigmoidLayer(5)
hiddenLayer2 = SigmoidLayer(5)
outLayer = LinearLayer(1)
n.addInputModule(inLayer)
n.addModule(hiddenLayer)
n.addModule(hiddenLayer2)
n.addOutputModule(outLayer)
in_to_hidden = FullConnection(inLayer, hiddenLayer)
hidden_to_hidden2 = FullConnection(hiddenLayer, hiddenLayer2)
hidden_to_out = FullConnection(hiddenLayer2, outLayer)
n.addConnection(in_to_hidden)
n.addConnection(hidden_to_hidden2)
n.addConnection(hidden_to_out)
n.sortModules()
return n
def fitness_fun(score, x_distance, error):
# Fitness function was designed so that the largest distance is
# the most fit. Before going through the first pipe, total distance traveled is the fitness.
# Once agent passed through the first pipe and earned a point,
# the amount of points it gained is the main determinant of the fitness score
if error != 0:
fitval = abs((100*score) + (x_distance/(2*abs(error))))
else:
fitval = abs(100*score) + x_distance*2
return fitval
Sample implementation in the game:
def mainGame(movementInfo):
global fitness
global old_fitness
global num_nn_parameters
global score
global disx
global first_time
global n
global old_n
global in_to_hidden
global hidden_to_hidden2
global hidden_to_out
global generation
global jumped
print 'generation: ', generation
generation = generation + 1
if first_time:
### Initalizing the neural network
n = initalize_nn()
ds = ClassificationDataSet(2, nb_classes=2)
z = 0
for val in in_to_hidden.params:
in_to_hidden.params[z] = random.uniform(-2,2)
z = z + 1
num_nn_parameters = z
old_nn = n
else:
# create new nn (but with old_nn saved)
n = mutate(old_n)
disx = 0
score = 0
first_time = False
# Print weights
print_all_weights()
####
'''
NOTES:
playerx = player's x position (57)
playery = player's height
upper_gap
lower_gap
center_cord
'''
pipeHeight = IMAGES['pipe'][0].get_height()
upper_gap = newPipe1[0]['y'] + pipeHeight
lower_gap = upper_gap + PIPEGAPSIZE
center_cord = upper_gap + ((lower_gap - upper_gap)/2)
########### The main loop ###########
#playerx = 140
while True:
i = i + 1
disx = disx + 1
# Error is determined by comparing the agent's y distance from the pipe opening
error = playery - center_cord
for event in pygame.event.get():
if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE):
pygame.quit()
sys.exit()
if event.type == KEYDOWN and (event.key == K_SPACE or event.key == K_UP):
if playery > -2 * IMAGES['player'][0].get_height():
playerVelY = playerFlapAcc
playerFlapped = True
if sound:
SOUNDS['wing'].play()
# check for crash here
crashTest = checkCrash({'x': playerx, 'y': playery, 'index': playerIndex},
upperPipes, lowerPipes)
if crashTest[0]:
fitness = fitness_fun(score, disx, error)
print '------------------- Game Over ---------------------'
print 'fitness: [', fitness, ']'
print 'old fit: [', old_fitness, ']'
print ''
print ''
print 'error: ', error
#print 'score: ', score
print 'range_x', rangex
print 'player_x: ', disx
print '----------------------------------------------------'
print ''
print ''
print ''
print ''
print ''
# If it turns out the old nn was better
if old_fitness > fitness:
# prevents the old but good nn from being overwritten
n = old_n
fitness = old_fitness
else:
print 'Better fitness discovered'
# store the good nn as the old_nn
old_n = n
old_fitness = fitness
return {
'y': playery,
'groundCrash': crashTest[1],
'basex': basex,
'upperPipes': upperPipes,
'lowerPipes': lowerPipes,
'score': score,
'playerVelY': playerVelY,
}
rangex = upperPipes[0]['x'] - 92
# Make prediction
playerVelY, playerFlapped = predict_action(rangex, error, playery, playerFlapAcc, playerVelY, playerFlapped, i)
Does anyone know the cause of this and how I can fix this?