Using zero_grad() after loss.backward(), but still receives RuntimeError: "Trying to backward through the graph a second time..." - neural-network

Below is my implementation of a2c using PyTorch. Upon learning about backpropagation in PyTorch, I have known to zero_grad() the optimizer after each update iteration. However, there is still a RunTime error on second-time backpropagation.
def torchworker(number, model):
worker_env = gym.make("Taxi-v3").env
max_steps_per_episode = 2000
worker_opt = optim.Adam(lr=5e-4, params=model.parameters())
p_history = []
val_history = []
r_history = []
running_reward = 0
episode_count = 0
under = 0
start = time.time()
for i in range(2):
state = worker_env.reset()
episode_reward = 0
penalties = 0
drop = 0
print("Episode {} begins ({})".format(episode_count, number))
worker_env.render()
criterion = nn.SmoothL1Loss()
time_solve = 0
for _ in range(1, max_steps_per_episode):
#worker_env.render()
state = torch.tensor(state, dtype=torch.long)
action_probs = model.forward(state)[0]
critic_value = model.forward(state)[1]
val_history.append((state, critic_value[0]))
# Choose action
action = np.random.choice(6, p=action_probs.detach().numpy())
p_history.append(torch.log(action_probs[action]))
# Apply chosen action
state, reward, done, _ = worker_env.step(action)
r_history.append(reward)
episode_reward += reward
time_solve += 1
if reward == -10:
penalties += 1
elif reward == 20:
drop += 1
if done:
break
# Update running reward to check condition for solving
running_reward = (running_reward * (episode_count) + episode_reward) / (episode_count + 1)
# Calculate discounted returns
returns = deque(maxlen=3500)
discounted_sum = 0
for r in r_history[::-1]:
discounted_sum = r + gamma * discounted_sum
returns.appendleft(discounted_sum)
# Calculate actor losses and critic losses
loss_actor_value = 0
loss_critic_value = 0
history = zip(p_history, val_history, returns)
for log_prob, value, ret in history:
diff = ret - value[1]
loss_actor_value += -log_prob * diff
ret_tensor = torch.tensor(ret, dtype=torch.float32)
loss_critic_value += criterion(value[1], ret_tensor)
loss = loss_actor_value + 0.1 * loss_critic_value
print(loss)
# Update params
loss.backward()
worker_opt.step()
worker_opt.zero_grad()
# Log details
end = time.time()
episode_count += 1
if episode_count % 1 == 0:
worker_env.render()
if running_reward > -50: # Condition to consider the task solved
under += 1
if under > 5:
print("Solved at episode {} !".format(episode_count))
break
I believe there may be something to do with the architecture of my AC model, so I also include it here for reference.
class ActorCriticNetwork(nn.Module):
def __init__(self, num_inputs, num_hidden, num_actions):
super(ActorCriticNetwork, self).__init__()
self.embed = nn.Embedding(500, 10)
self.fc1 = nn.Linear(10, num_hidden * 2)
self.fc2 = nn.Linear(num_hidden * 2, num_hidden)
self.c = nn.Linear(num_hidden, 1)
self.fc3 = nn.Linear(num_hidden, num_hidden)
self.a = nn.Linear(num_hidden, num_actions)
def forward(self, x):
out = F.relu(self.embed(x))
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
critic = self.c(out)
out = F.relu(self.fc3(out.detach()))
actor = F.softmax(self.a(out), dim=-1)
return actor, critic
Would you please tell me what the mistake here is? Thank you in advance.

SOLVED: I forgot to clear the history of probabilities, action-values and rewards after iterations. It is clear why that would cause the issue, as the older elements would cause propagating through old dcgs.

Related

Issue while solving Gym Environment ; ValueError: Weights for model sequential have not yet been created

I am new to stackoverflow, so I apologize for any errors while asking a question. I am trying to solve the cartpole-v1 gym environment using a dqn agent. I am facing an issue as follows ValueError: Weights for model sequential have not yet been created. Weights are created when the Model is first called on inputs or build() is called with an input_shape. I've searched how to fix this but to no success. My tensorflow version is 2.8.0. My code for my agent is as follows. I believe, the problem is most probably due to my build_model and in the model.fit line. This is the error that I am facing
class DQNAgent0:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount factor
self.epsilon = 1.0 # 100% exploration at the start
self.epsilon_decay = 0.995
self.epsilon_min = 0.01
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
'''model = tf.keras.Sequential([
tf.keras.layers.Dense(1),
#tf.keras.Input((self.state_size,)),
tf.keras.layers.Dense(24, activation="relu"),
tf.keras.layers.Dense(24, activation="relu"),
tf.keras.layers.Dense(self.action_size, activation="linear"),
])
model.compile(loss=tf.keras.losses.mse,
optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))'''
#model = tf.keras.Sequential()
model = tf.keras.Sequential([tf.keras.layers.Dense(1)])
model.add(tf.keras.Input(shape = self.state_size))
model.add(tf.keras.layers.Dense(24, activation = 'relu'))
model.add(tf.keras.layers.Dense(24, activation = 'relu'))
model.add(tf.keras.layers.Dense(self.action_size, activation = 'linear'))
#opt = tf.keras.optimizers.Adam(learning_rate = self.learning_rate)
#model.compile(loss = 'mse', optimizer = opt)
model.compile(loss = tf.keras.losses.mse, optimizer = tf.keras.optimizers.Adam(learning_rate = self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random .randrange(self.action_size) # exploratory action
act_values = self.model.predict(state)
return np.argmax(act_values[0])
def replay(self, batch_size):
#creating a random sample from our memory
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma * np.amax(self.model.predict(next_state[0]))) # reward at current timestep + discounted future reward
target_f = self.model.predict(state)
target_f[0][action] = target #mapping future reward to the current reward
self.model.fit(tf.expand_dims(state, axis=-1), target_f, epochs = 1, verbose = 0) # fitting a model to train with state as input x and target_f as y (predicted future reward)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self, name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)

OR-tools VRP solver returning one job per vehicle

I am using Google OR-Tools in python to solve a capacitated VRP with pickup/delivery. In many cases the solver works well and returns reasonable solutions, but we have found that for some data sets the solver will always return one job per truck regardless of the time involved for the route.
I have the model set up as follows:
My initial vehicle count is equal to the number of jobs in the data-set, and we allow OR-Tools to automatically minimize the truck count.
Each job's pickup location has a demand of 1 and each job's dropoff location has a demand of -1, to enforce delivery immediately after pickup.
We set the maximum drive time per vehicle to 8 hours.
Then, each job has an associated quantity attached for pickup, and we separate this job into multiple deliveries based on a truck's capacity. For instance, if a job requires 60 tons delivered, we represent that as three jobs at 20 tons each (the maximum a vehicle is allowed to carry on an interstate in the U.S)
Now, we have a simple data set with a pickup location at: 698 Longtown Rd, Columbia, SC and a dropoff location at: 121 Chappell Creek Rd Hopkins, SC. This is a drive time of 32 minutes, or a total trip time of 64 minutes. This job has an associated quantity of 60 tons, which will require 3 truck loads.
The results we receive from or-tools shows one load per truck, and this result does not change regardless of how long we allow the solver to run. The optimal solution would allow one truck to complete all loads, as this is still drastically under the 8 hour drive time limit.
Here is my code:
import json
import math
import traceback
import urllib
import redis
import requests
import boto3
from signal import signal, SIGINT, SIGTERM
from ortools.constraint_solver import pywrapcp, routing_enums_pb2
url = 'https://test-api.truckit.com/api/2/signin'
api_data = {"password": "", "username": ""}
response = requests.post(url, json=api_data)
api_data = response.json()
def build_auth_header(token):
header = {'Authorization': f'Token {token}'}
return header
class SignalHandler:
def __init__(self):
self.received_signal = False
signal(SIGINT, self._signal_handler)
signal(SIGTERM, self._signal_handler)
def _signal_handler(self, signal, frame):
print(f"handling signal {signal}, exiting gracefully")
self.received_signal = True
sqs = boto3.resource("sqs")
queue = sqs.get_queue_by_name(QueueName="")
redisClient = redis.Redis(host='', port=6379,
password='')
def create_distance_matrix(data):
addresses = data["addresses"]
API_key = data["API_key"]
origin_addresses = []
dest_addresses = addresses
distance_matrix = []
responses = {}
responses['destination_addresses'] = []
responses['origin_addresses'] = []
responses['rows'] = []
# Send q requests, returning max_rows rows per request.
for i in range(0, len(addresses)):
origin_addresses.clear()
origin_addresses.append(addresses[i])
for j in range(0, len(addresses), 25):
dest_addresses_request = addresses[j:j + 25]
response = send_request(origin_addresses, dest_addresses_request, API_key)
responses['origin_addresses'] = response['origin_addresses']
for destination_address in response['destination_addresses']:
responses['destination_addresses'].append(destination_address)
for row in response['rows']:
if len(responses['rows']) == 0:
responses['rows'].append(row)
else:
for element in row['elements']:
responses['rows'][0]['elements'].append(element)
distance_matrix += build_distance_matrix(responses)
responses['origin_addresses'].clear()
responses['destination_addresses'].clear()
responses['rows'].clear()
return distance_matrix
def send_request(origin_addresses, dest_addresses, API_key):
""" Build and send request for the given origin and destination addresses."""
def build_address_str(addresses):
# Build a pipe-separated string of addresses
address_str = ''
for i in range(len(addresses) - 1):
address_str += addresses[i] + '|'
address_str += addresses[-1]
return address_str
request = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial'
origin_address_str = build_address_str(origin_addresses)
dest_address_str = build_address_str(dest_addresses)
request = request + '&origins=' + origin_address_str + '&destinations=' + \
dest_address_str + '&key=' + API_key
jsonResult = urllib.request.urlopen(request).read()
response = json.loads(jsonResult)
return response
def build_distance_matrix(response):
distance_matrix = []
for row in response['rows']:
row_list = [row['elements'][j]['duration']['value'] for j in range(len(row['elements']))]
distance_matrix.append(row_list)
return distance_matrix
def process_message(message_body):
print(f"processing message: {message_body}")
data = json.loads(message_body)
data_matrix = {}
data_matrix['problem_id'] = data['problemId']
data_matrix["addresses"] = []
data_matrix["pickups_deliveries"] = []
data_matrix["demands"] = []
data_matrix["jobOrderIDs"] = []
depot_address = str(data["depot"]["latitude"]) + "," + str(data["depot"]["longitude"])
data_matrix["jobOrderIDs"].append(0)
data_matrix["addresses"].append(depot_address)
hash_key = data["hashKey"]
for location in data["locationList"]:
pick_lat = location["PickupLatitude"]
pick_long = location["PickupLongitude"]
drop_lat = location["DropoffLatitude"]
drop_long = location["DropoffLongitude"]
jobOrderId = location["jobOrderID"]
demand = math.ceil(float(int(location["totalQuantity"]) / 20))
for i in range(0, demand):
data_matrix["addresses"].append(str(pick_lat) + ',' + str(pick_long))
data_matrix["addresses"].append(str(drop_lat) + ',' + str(drop_long))
data_matrix["jobOrderIDs"].append(str(jobOrderId))
data_matrix["jobOrderIDs"].append(str(jobOrderId))
data_matrix["demands"].append(0)
for i in range(1, len(data_matrix["addresses"]) - 1, 2):
data_matrix["pickups_deliveries"].append([i, i + 1])
data_matrix["demands"].append(1)
data_matrix["demands"].append(-1)
data_matrix["num_vehicles"] = int(len(data_matrix["addresses"]) / 2)
data_matrix["vehicle_capacities"] = []
for i in range(0, data_matrix["num_vehicles"]):
data_matrix["vehicle_capacities"].append(1)
data_matrix["depot"] = 0
data_matrix["API_key"] = ''
data_matrix["distance_matrix"] = create_distance_matrix(data_matrix)
# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(data_matrix['distance_matrix']),
data_matrix['num_vehicles'], data_matrix['depot'])
# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)
# Define cost of each arc.
def distance_callback(from_index, to_index):
"""Returns the manhattan distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data_matrix['distance_matrix'][from_node][to_node]*1000
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Add Distance constraint.
dimension_name = 'Duration'
routing.AddDimension(
transit_callback_index,
0, # no slack
28800*1000, # vehicle maximum travel hours
True, # start cumul to zero
dimension_name)
distance_dimension = routing.GetDimensionOrDie(dimension_name)
distance_dimension.SetGlobalSpanCostCoefficient(100)
def demand_callback(from_index):
"""Returns the demand of the node."""
# Convert from routing variable Index to demands NodeIndex.
from_node = manager.IndexToNode(from_index)
return data_matrix['demands'][from_node]
demand_callback_index = routing.RegisterUnaryTransitCallback(
demand_callback)
routing.AddDimensionWithVehicleCapacity(
demand_callback_index,
0, # null capacity slack
data_matrix['vehicle_capacities'], # vehicle maximum capacities
True, # start cumul to zero
'Capacity')
# Define Transportation Requests.
for request in data_matrix['pickups_deliveries']:
pickup_index = manager.NodeToIndex(request[0])
delivery_index = manager.NodeToIndex(request[1])
routing.AddPickupAndDelivery(pickup_index, delivery_index)
routing.solver().Add(
routing.VehicleVar(pickup_index) == routing.VehicleVar(
delivery_index))
routing.solver().Add(
distance_dimension.CumulVar(pickup_index) <=
distance_dimension.CumulVar(delivery_index))
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
search_parameters.time_limit.seconds = 1200
search_parameters.log_search = True
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.AUTOMATIC)
search_parameters.use_full_propagation = True
# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)
if solution:
solution_dict = {}
for vehicle_id in range(data_matrix['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = ''
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
plan_output += '{0},'.format(data_matrix['jobOrderIDs'][node_index])
previous_index = index
index = solution.Value(routing.NextVar(index))
plan_output += '{0},'.format(data_matrix['jobOrderIDs'][manager.IndexToNode(index)])
plan_output = plan_output[:-1]
plan_words = plan_output.split(",")
plan_output = ''
for i in range(len(plan_words)):
if (i % 2 == 0):
plan_output += plan_words[i] + ","
plan_output = plan_output[:-1]
plan_output += ",0"
if plan_output != 0 and plan_output != str(0) and plan_output != str('0,0'):
print(plan_output)
solution_dict[vehicle_id] = plan_output
# trucks_url = 'https://test-api.truckit.com/api/2/trucks'
trucks_url = 'https://test-api.truckit.com/api/2/job-orders/smart-dispatch/' + str(data_matrix['problem_id'])
head = build_auth_header(api_data["authToken"])
status = {}
ride_list = []
dummy_location_dict = {}
dummy_id_dict = {}
dummy_id_dict["id"] = 0
dummy_id_dict["name"] = ""
dummy_location_dict["location"] = dummy_id_dict
dummy_location_dict["timestamp"] = 0
ride_list.append(dummy_location_dict)
redisClient.hset(hash_key, "solution", json.dumps(solution_dict))
redisClient.hset(hash_key, "ride_list", json.dumps(ride_list))
json_data = {"status": "completed"}
api_response = requests.post(trucks_url, headers=head, json=json_data)
print_solution(data_matrix, manager, routing, solution)
def print_solution(data, manager, routing, solution):
"""Prints solution on console."""
print(f'Objective: {solution.ObjectiveValue()}')
total_distance = 0
total_load = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
plan_output += ' {0} -> '.format(node_index)
previous_index = index
index = solution.Value(routing.NextVar(index))
try:
distance = data['distance_matrix'][previous_index][index]
route_distance += distance
except:
distance = distance
plan_output += ' {0}\n'.format(manager.IndexToNode(index))
plan_output += 'Time of the route: {} hours\n'.format(str(float(route_distance / (60 * 60))))
print(plan_output)
total_distance += route_distance
print('Total distance of all routes: {}m'.format(total_distance))
if __name__ == "__main__":
signal_handler = SignalHandler()
while not signal_handler.received_signal:
messages = queue.receive_messages(
MaxNumberOfMessages=1,
WaitTimeSeconds=1
)
for message in messages:
try:
process_message(message.body)
message.delete()
except Exception as e:
print(f"exception while processing message: {repr(e)}")
traceback.print_exc()
continue
message.delete()
IF anyone has any suggestions as to what the problem may be, your help is greatly appreciated.

Optimal dispatch of power generators

I want to write a matlab code that can optimally dispatch power generators to satisfy electric demand by considering profit too. I used below code that is in matlab website. https://ww2.mathworks.cn/help/optim/examples/optimal-dispatch-of-power-generators.html?requestedDomain=en
But in this code they are not considering the demand.
My target is , there are 20 power plants and I want to schedule this plants to satisfy demand. And the running power level can be any value in between the max and minimum power value of each power plant. High cost power plants should dispatch too.
name = 'generationcost.xlsx';
poolPrice = xlsread(name,'AD3:AD50'); % Revenue in dollars per MWh in interval
demand = xlsread(name,'AC3:AC52');
month = numel(demand);
e = xlsread(name,'W2:W4')'; %Cost for a unit of fuel
fuelPrice = repmat(e,1,1,2);
nPeriods = length(poolPrice);
nGens = 3;
genhigh = xlsread(name,'F2:F4');
genlow = xlsread(name,'Q2:Q4');
gen = [genlow(:),genhigh(:),]; %MW
fuel = [50,330;45,325;55,750]; % Fuel consumption for generator
startCost = [10000;10000;10000]; %Cost in dollars to start a generator after it has been off
y = optimvar('y',nPeriods,nGens,{'Low','High'},'Type','integer','LowerBound',0,'UpperBound',1);
z = optimvar('z',nPeriods,nGens,'Type','integer','LowerBound',0,'UpperBound',1);
powercons = y(:,:,'Low') + y(:,:,'High') <= 1;
yFuel = zeros(nPeriods,nGens,2);
yFuel(:,1,1) = fuel(1,1);
yFuel(:,1,2) = fuel(1,2);
yFuel(:,2,1) = fuel(2,1);
yFuel(:,2,2) = fuel(2,2);
yFuel(:,3,1) = fuel(3,1);
yFuel(:,3,2) = fuel(3,2);
fuelUsed = sum(reshape((sum(y.*yFuel)),[3 2])');
%fuelcons = fuelUsed <= totalFuel;
w = optimexpr(nPeriods,nGens);
idx = 1:(nPeriods-1);
w(idx,:) = y(idx+1,:,'Low') - y(idx,:,'Low') + y(idx+1,:,'High') -
y(idx,:,'High');
w(nPeriods,:) = y(1,:,'Low') - y(nPeriods,:,'Low') + y(1,:,'High') -
y(nPeriods,:,'High');
switchcons = w - z <= 0;
generatorlevel = zeros(size(yFuel));
generatorlevel(:,1,1) = gen(1,1);
generatorlevel(:,1,2) = gen(1,2);
generatorlevel(:,2,1) = gen(2,1);
generatorlevel(:,2,2) = gen(2,2);
generatorlevel(:,3,1) = gen(3,1);
generatorlevel(:,3,2) = gen(3,2);
revenue = optimexpr(size(y));
for ii = 1:nPeriods
revenue(ii,:,:) = poolPrice(ii)*y(ii,:,:).*generatorlevel(ii,:,:);
end
val = optimexpr(size(y));
demands = optimexpr(nPeriods,1);
id = 1:nPeriods;
val(id,:,:)= y(id,:,:).*generatorlevel(id,:,:);
demands(id,:)= sum(sum(val,3),2);
demandcons = demands-demand ;
fuelCost = sum(sum(fuelPrice.*sum(y.*yFuel)));
startingCost = z*startCost;
profit = sum(sum(sum(revenue))) - fuelCost - sum(startingCost);
dispatch = optimproblem('ObjectiveSense','maximize');
dispatch.Objective = profit;
dispatch.Constraints.powercons = powercons;
dispatch.Constraints.demandcons = demands - demand <= 0 ;
options = optimoptions('intlinprog','Display','final');
[dispatchsol,fval,exitflag,output] = solve(dispatch,'options',options);
subplot(5,1,1)
bar(dispatchsol.y(:,1,1)*gen(1,1)+dispatchsol.y(:,1,2)*gen(1,2),.5,'g')
xlim([.5,48.5])
ylabel('MW')
title('Generator 1 Optimal Schedule','FontWeight','bold')
subplot(5,1,2)
bar(dispatchsol.y(:,2,1)*gen(2,1)+dispatchsol.y(:,2,2)*gen(2,2),.5,'c')
title('Generator 2 Optimal Schedule','FontWeight','bold')
xlim([.5,48.5])
ylabel('MW')
subplot(5,1,3)
bar(dispatchsol.y(:,3,1)*gen(3,1)+dispatchsol.y(:,3,2)*gen(3,2),.5,'c')
title('Generator 3 Optimal Schedule','FontWeight','bold')
xlim([.5,48.5])
ylabel('MW')
subplot(5,1,4)
bar(demand,.5)
xlim([.5,48.5])
title('Daily Demand','FontWeight','bold')
xlabel('Period')
ylabel('MW')
starttimes = find(round(dispatchsol.z) == 1);
[theperiod,thegenerator] = ind2sub(size(dispatchsol.z),starttimes)
This code is not satisfying Demand. The output demand value is more lesser than the actual demand value. And also here only use 'High' or 'Low' power levels only. In between power values not using.

ValueError: List argument 'values' to 'ConcatV2' Op with length 0 shorter than minimum length 2 3Dball

Executing "3Dball" creates some errors in Unity ml-agent
When I execute PPO.ipynb, there is no error till "Load the environment".
Executing "Train the Agents" there are some errors
ValueError: List argument 'values' to 'ConcatV2' Op with length 0
shorter than minimum length 2.
This is the code I executed
https://github.com/Unity-Technologies/ml-agents/blob/master/python/PPO.ipynb
tf.reset_default_graph()
if curriculum_file == "None":
curriculum_file = None
def get_progress():
if curriculum_file is not None:
if env._curriculum.measure_type == "progress":
return steps / max_steps
elif env._curriculum.measure_type == "reward":
return last_reward
else:
return None
else:
return None
# Create the Tensorflow model graph
ppo_model = create_agent_model(env, lr=learning_rate,
h_size=hidden_units, epsilon=epsilon,
beta=beta, max_step=max_steps,
normalize=normalize, num_layers=num_layers)
is_continuous = (env.brains[brain_name].action_space_type == "continuous")
use_observations = (env.brains[brain_name].number_observations > 0)
use_states = (env.brains[brain_name].state_space_size > 0)
model_path = './models/{}'.format(run_path)
summary_path = './summaries/{}'.format(run_path)
if not os.path.exists(model_path):
os.makedirs(model_path)
if not os.path.exists(summary_path):
os.makedirs(summary_path)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
# Instantiate model parameters
if load_model:
print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(init)
steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])
summary_writer = tf.summary.FileWriter(summary_path)
info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model)
if train_model:
trainer.write_text(summary_writer, 'Hyperparameters', hyperparameter_dict, steps)
while steps <= max_steps:
if env.global_done:
info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
# Decide and take an action
new_info = trainer.take_action(info, env, brain_name, steps, normalize)
info = new_info
trainer.process_experiences(info, time_horizon, gamma, lambd)
if len(trainer.training_buffer['actions']) > buffer_size and train_model:
# Perform gradient descent with experience buffer
trainer.update_model(batch_size, num_epoch)
if steps % summary_freq == 0 and steps != 0 and train_model:
# Write training statistics to tensorboard.
trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)
if steps % save_freq == 0 and steps != 0 and train_model:
# Save Tensorflow model
save_model(sess, model_path=model_path, steps=steps, saver=saver)
steps += 1
sess.run(ppo_model.increment_step)
if len(trainer.stats['cumulative_reward']) > 0:
mean_reward = np.mean(trainer.stats['cumulative_reward'])
sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})
last_reward = sess.run(ppo_model.last_reward)
# Final save Tensorflow model
if steps != 0 and train_model:
save_model(sess, model_path=model_path, steps=steps, saver=saver)
env.close()
export_graph(model_path, env_name)
I had the same error, the way I fixed it is by replacing line 222 under the file: "ml-agents/python/ppo/models.py":
REPLACE Line 222:
hidden_visual = tf.concat(encoders, axis=2)
BY:
if encoders:
hidden_visual = tf.concat(encoders, axis=2)
I hope that helped you.

How to compare function value with a constant

I'm working with Python Scipy I have the next code:
...
t = np.linspace(0, simtime, points)
def Vbooster90(t):
return np.sin(t * 2 * np.pi*F_booster + 0.5 * np.pi)
def beam(t):
return np.sign(Vrfq(t) - Vrfq(bunchwidth)) * 0.5 + 0.5
def criteria(t):
return np.sign(Vbooster90(t))
def kicker(t):
if criteria(t) > 0:
k(t)=beam(t)
else:
k(t)=0
return k(t)
I have a problem with the last function kicker(t). I want to compare the function criteria(t) with zero at each value of t, and in case if criteria(t) is higher than zero, I want to assign kicker(t) to the value of function beam(t) at the same t value. I'm new to Python and don't know syntax well.
Modify the kicker function like following.
def kicker(t):
k = 0
if criteria(t) > 0:
k = beam(t)
return k
Thanks for answers, instead of defining a function I solved it next way:
kicker = np.empty(points)
i = np.arange(points)
time = np.empty(points)
time[i] = i*simtime/points
for i in range(points):
if criteria(time[i]) > 0:
kicker[i] = beam(time[i])
else:
kicker[i] = 0