ValueError: List argument 'values' to 'ConcatV2' Op with length 0 shorter than minimum length 2 3Dball - unity3d

Executing "3Dball" creates some errors in Unity ml-agent
When I execute PPO.ipynb, there is no error till "Load the environment".
Executing "Train the Agents" there are some errors
ValueError: List argument 'values' to 'ConcatV2' Op with length 0
shorter than minimum length 2.
This is the code I executed
https://github.com/Unity-Technologies/ml-agents/blob/master/python/PPO.ipynb
tf.reset_default_graph()
if curriculum_file == "None":
curriculum_file = None
def get_progress():
if curriculum_file is not None:
if env._curriculum.measure_type == "progress":
return steps / max_steps
elif env._curriculum.measure_type == "reward":
return last_reward
else:
return None
else:
return None
# Create the Tensorflow model graph
ppo_model = create_agent_model(env, lr=learning_rate,
h_size=hidden_units, epsilon=epsilon,
beta=beta, max_step=max_steps,
normalize=normalize, num_layers=num_layers)
is_continuous = (env.brains[brain_name].action_space_type == "continuous")
use_observations = (env.brains[brain_name].number_observations > 0)
use_states = (env.brains[brain_name].state_space_size > 0)
model_path = './models/{}'.format(run_path)
summary_path = './summaries/{}'.format(run_path)
if not os.path.exists(model_path):
os.makedirs(model_path)
if not os.path.exists(summary_path):
os.makedirs(summary_path)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
# Instantiate model parameters
if load_model:
print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(init)
steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])
summary_writer = tf.summary.FileWriter(summary_path)
info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model)
if train_model:
trainer.write_text(summary_writer, 'Hyperparameters', hyperparameter_dict, steps)
while steps <= max_steps:
if env.global_done:
info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
# Decide and take an action
new_info = trainer.take_action(info, env, brain_name, steps, normalize)
info = new_info
trainer.process_experiences(info, time_horizon, gamma, lambd)
if len(trainer.training_buffer['actions']) > buffer_size and train_model:
# Perform gradient descent with experience buffer
trainer.update_model(batch_size, num_epoch)
if steps % summary_freq == 0 and steps != 0 and train_model:
# Write training statistics to tensorboard.
trainer.write_summary(summary_writer, steps, env._curriculum.lesson_number)
if steps % save_freq == 0 and steps != 0 and train_model:
# Save Tensorflow model
save_model(sess, model_path=model_path, steps=steps, saver=saver)
steps += 1
sess.run(ppo_model.increment_step)
if len(trainer.stats['cumulative_reward']) > 0:
mean_reward = np.mean(trainer.stats['cumulative_reward'])
sess.run(ppo_model.update_reward, feed_dict={ppo_model.new_reward: mean_reward})
last_reward = sess.run(ppo_model.last_reward)
# Final save Tensorflow model
if steps != 0 and train_model:
save_model(sess, model_path=model_path, steps=steps, saver=saver)
env.close()
export_graph(model_path, env_name)

I had the same error, the way I fixed it is by replacing line 222 under the file: "ml-agents/python/ppo/models.py":
REPLACE Line 222:
hidden_visual = tf.concat(encoders, axis=2)
BY:
if encoders:
hidden_visual = tf.concat(encoders, axis=2)
I hope that helped you.

Related

Using zero_grad() after loss.backward(), but still receives RuntimeError: "Trying to backward through the graph a second time..."

Below is my implementation of a2c using PyTorch. Upon learning about backpropagation in PyTorch, I have known to zero_grad() the optimizer after each update iteration. However, there is still a RunTime error on second-time backpropagation.
def torchworker(number, model):
worker_env = gym.make("Taxi-v3").env
max_steps_per_episode = 2000
worker_opt = optim.Adam(lr=5e-4, params=model.parameters())
p_history = []
val_history = []
r_history = []
running_reward = 0
episode_count = 0
under = 0
start = time.time()
for i in range(2):
state = worker_env.reset()
episode_reward = 0
penalties = 0
drop = 0
print("Episode {} begins ({})".format(episode_count, number))
worker_env.render()
criterion = nn.SmoothL1Loss()
time_solve = 0
for _ in range(1, max_steps_per_episode):
#worker_env.render()
state = torch.tensor(state, dtype=torch.long)
action_probs = model.forward(state)[0]
critic_value = model.forward(state)[1]
val_history.append((state, critic_value[0]))
# Choose action
action = np.random.choice(6, p=action_probs.detach().numpy())
p_history.append(torch.log(action_probs[action]))
# Apply chosen action
state, reward, done, _ = worker_env.step(action)
r_history.append(reward)
episode_reward += reward
time_solve += 1
if reward == -10:
penalties += 1
elif reward == 20:
drop += 1
if done:
break
# Update running reward to check condition for solving
running_reward = (running_reward * (episode_count) + episode_reward) / (episode_count + 1)
# Calculate discounted returns
returns = deque(maxlen=3500)
discounted_sum = 0
for r in r_history[::-1]:
discounted_sum = r + gamma * discounted_sum
returns.appendleft(discounted_sum)
# Calculate actor losses and critic losses
loss_actor_value = 0
loss_critic_value = 0
history = zip(p_history, val_history, returns)
for log_prob, value, ret in history:
diff = ret - value[1]
loss_actor_value += -log_prob * diff
ret_tensor = torch.tensor(ret, dtype=torch.float32)
loss_critic_value += criterion(value[1], ret_tensor)
loss = loss_actor_value + 0.1 * loss_critic_value
print(loss)
# Update params
loss.backward()
worker_opt.step()
worker_opt.zero_grad()
# Log details
end = time.time()
episode_count += 1
if episode_count % 1 == 0:
worker_env.render()
if running_reward > -50: # Condition to consider the task solved
under += 1
if under > 5:
print("Solved at episode {} !".format(episode_count))
break
I believe there may be something to do with the architecture of my AC model, so I also include it here for reference.
class ActorCriticNetwork(nn.Module):
def __init__(self, num_inputs, num_hidden, num_actions):
super(ActorCriticNetwork, self).__init__()
self.embed = nn.Embedding(500, 10)
self.fc1 = nn.Linear(10, num_hidden * 2)
self.fc2 = nn.Linear(num_hidden * 2, num_hidden)
self.c = nn.Linear(num_hidden, 1)
self.fc3 = nn.Linear(num_hidden, num_hidden)
self.a = nn.Linear(num_hidden, num_actions)
def forward(self, x):
out = F.relu(self.embed(x))
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
critic = self.c(out)
out = F.relu(self.fc3(out.detach()))
actor = F.softmax(self.a(out), dim=-1)
return actor, critic
Would you please tell me what the mistake here is? Thank you in advance.
SOLVED: I forgot to clear the history of probabilities, action-values and rewards after iterations. It is clear why that would cause the issue, as the older elements would cause propagating through old dcgs.

Where the weights get updated in this code?

I want to train a model in distributed system. I have found a code in github for distributed training where the worker node send gradient to the parameter server and the parameter server sends the average gradient to the workers. But in client/worker side code, i couldn't understand where the received gradient updates the weights and biases.
Here is client/worker side the code, it receives initial gradients from the parameter server and then calculates loss, gradients and sends the gradient value to the server again.
from __future__ import division
from __future__ import print_function
import numpy as np
import sys
import pickle as pickle
import socket
from datetime import datetime
import time
import tensorflow as tf
import cifar10
TCP_IP = 'some IP'
TCP_PORT = 5014
port = 0
port_main = 0
s = 0
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/home/ubuntu/cifar10_train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 5000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
tf.app.flags.DEFINE_integer('log_frequency', 10,
"""How often to log results to the console.""")
#gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.30)
def safe_recv(size, server_socket):
data = ""
temp = ""
data = bytearray()
recv_size = 0
while 1:
try:
temp = server_socket.recv(size-len(data))
data.extend(temp)
recv_size = len(data)
if recv_size >= size:
break
except:
print("Error")
data = bytes(data)
return data
def train():
"""Train CIFAR-10 for a number of steps."""
g1 = tf.Graph()
with g1.as_default():
global_step = tf.Variable(-1, name='global_step',
trainable=False, dtype=tf.int32)
increment_global_step_op = tf.assign(global_step, global_step+1)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
grads = cifar10.train_part1(loss, global_step)
only_gradients = [g for g, _ in grads]
class _LoggerHook(tf.train.SessionRunHook):
"""Logs loss and runtime."""
def begin(self):
self._step = -1
self._start_time = time.time()
def before_run(self, run_context):
self._step += 1
return tf.train.SessionRunArgs(loss) # Asks for loss value.
def after_run(self, run_context, run_values):
if self._step % FLAGS.log_frequency == 0:
current_time = time.time()
duration = current_time - self._start_time
self._start_time = current_time
loss_value = run_values.results
examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
sec_per_batch = float(duration / FLAGS.log_frequency)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
# log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) as mon_sess:
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
global port
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((TCP_IP, port_main))
recv_size = safe_recv(17, s)
recv_size = pickle.loads(recv_size)
recv_data = safe_recv(recv_size, s)
var_vals = pickle.loads(recv_data)
s.close()
feed_dict = {}
i = 0
for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
feed_dict[v] = var_vals[i]
i = i+1
print("Received variable values from ps")
# Opening the socket and connecting to server
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((TCP_IP, port))
while not mon_sess.should_stop():
gradients, step_val = mon_sess.run(
[only_gradients, increment_global_step_op], feed_dict=feed_dict)
# sending the gradients
send_data = pickle.dumps(gradients, pickle.HIGHEST_PROTOCOL)
to_send_size = len(send_data)
send_size = pickle.dumps(to_send_size, pickle.HIGHEST_PROTOCOL)
s.sendall(send_size)
s.sendall(send_data)
# receiving the variable values
recv_size = safe_recv(17, s)
recv_size = pickle.loads(recv_size)
recv_data = safe_recv(recv_size, s)
var_vals = pickle.loads(recv_data)
feed_dict = {}
i = 0
for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
feed_dict[v] = var_vals[i]
i = i+1
s.close()
def main(argv=None): # pylint: disable=unused-argument
global port
global port_main
global s
if(len(sys.argv) != 3):
print("<port> <worker-id> required")
sys.exit()
port = int(sys.argv[1]) + int(sys.argv[2])
port_main = int(sys.argv[1])
print("Connecting to port ", port)
cifar10.maybe_download_and_extract()
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
total_start_time = time.time()
train()
print("--- %s seconds ---" % (time.time() - total_start_time))
if __name__ == '__main__':
tf.app.run()
EDIT:
Here is the train_part1() code:
def train_part1(total_loss, global_step):
"""Train CIFAR-10 model.
Create an optimizer and apply to all trainable variables. Add moving
average for all trainable variables.
Args:
total_loss: Total loss from loss().
global_step: Integer Variable counting the number of training steps
processed.
Returns:
train_op: op for training.
"""
# Variables that affect learning rate.
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(total_loss)
return grads
To me it seems that line
gradients, step_val = mon_sess.run(
[only_gradients, increment_global_step_op], feed_dict=feed_dict)
receieves new values for variables in feed_dict, assign these values to variables, and makes a training step, during which it only calculates and returns the gradients, that are later sent to the parameter server. I would expect cifar10.train_part1 (the one that returns only_gradients) to depend on variable values and define the update.
Update: I looked into the code and changed my mind. Had to google and found next answer that shed some light on what is happening.
Gradients are actually not applied in this code anywhere implicitly. Instead, gradients are sent to the parameter server, parameter server averages gradients and applies them to weights, it returns the weights to the local worker, * recieved weights are used instead of local weights during session run through feed_dict* i.e. local weights are never actually updated and do not actually matter at all. The key, is that feed_dict allows to rewrite any tensor output of the session run and this code rewrites variables.

stress centrality in social network

i got the error of this code which is:
path[index][4] += 1
IndexError: list index out of range
why this happened?how can i remove this error ?
Code:
def stress_centrality(g):
stress = defaultdict(int)
for a in nx.nodes_iter(g):
for b in nx.nodes_iter(g):
if a==b:
continue
pred = nx.predecessor(G,b) # for unweighted graphs
#pred, distance = nx.dijkstra_predecessor_and_distance(g,b) # for weighted graphs
if a not in pred:
return []
path = [[a,0]]
path_length = 1
index = 0
while index >= 0:
n,i = path[index]
if n == b:
for vertex in list(map(lambda x:x[0], path[:index+1]))[1:-1]:
stress[vertex] += 1
if len(pred[n]) >i:
index += 1
if index == path_length:
path.append([pred[n][i],0])
path_length += 1
else:
path[index] = [pred[n][i],0]
else:
index -= 1
if index >= 0:
path[index][4] += 1
return stress
Without the data it's hard to give you anything more than an indicative answer.
This line
path[index][4] += 1
assumes there are 5 elements in path[index] but there are fewer than that. It seems to me that your code only assigns or appends to path lists of length 2. As in
path = [[a,0]]
path.append([pred[n][i],0])
path[index] = [pred[n][i],0]
So it's hard to see how accessing the 5th element of one of those lists could ever be correct.
This is a complete guess, but I think you might have meant
path[index][1] += 4

How to predict in pycaffe?

I have a model that has been trained on CIFAR-10, but I don't realise how can I make a prediction in pycaffe.
I got an image from lmdb but I don't know how to load it in a net and get a predicted class.
My code:
net = caffe.Net('acc81/model.prototxt',
'acc81/cifar10_full_iter_70000.caffemodel.h5',
caffe.TEST)
lmdb_env = lmdb.open('cifar10_test_lmdb/')
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
image = caffe.io.datum_to_array(datum)
image = image.astype(np.uint8)
# What's next with the image variable?
# If i try:
# out = net.forward_all(data=np.asarray([image]))
# I get Exception: Input blob arguments do not match net inputs.
print("Image class is " + label)
Use this python script
# Run the script with anaconda-python
# $ /home/<path to anaconda directory>/anaconda/bin/python LmdbClassification.py
import sys
import numpy as np
import lmdb
import caffe
from collections import defaultdict
caffe.set_mode_gpu()
# Modify the paths given below
deploy_prototxt_file_path = '/home/<username>/caffe/examples/cifar10/cifar10_deploy.prototxt' # Network definition file
caffe_model_file_path = '/home/<username>/caffe/examples/cifar10/cifar10_iter_5000.caffemodel' # Trained Caffe model file
test_lmdb_path = '/home/<username>/caffe/examples/cifar10/cifar10_test_lmdb/' # Test LMDB database path
mean_file_binaryproto = '/home/<username>/caffe/examples/cifar10/mean.binaryproto' # Mean image file
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(mean_file_binaryproto, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
# CNN reconstruction and loading the trained weights
net = caffe.Net(deploy_prototxt_file_path, caffe_model_file_path, caffe.TEST)
count = 0
correct = 0
matrix = defaultdict(int) # (real,pred) -> int
labels_set = set()
lmdb_env = lmdb.open(test_lmdb_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum)
image = image.astype(np.uint8)
out = net.forward_all(data=np.asarray([image]) - mean_image)
plabel = int(out['prob'][0].argmax(axis=0))
count += 1
iscorrect = label == plabel
correct += (1 if iscorrect else 0)
matrix[(label, plabel)] += 1
labels_set.update([label, plabel])
if not iscorrect:
print("\rError: key = %s, expected %i but predicted %i" % (key, label, plabel))
sys.stdout.write("\rAccuracy: %.1f%%" % (100.*correct/count))
sys.stdout.flush()
print("\n" + str(correct) + " out of " + str(count) + " were classified correctly")
print ""
print "Confusion matrix:"
print "(r , p) | count"
for l in labels_set:
for pl in labels_set:
print "(%i , %i) | %i" % (l, pl, matrix[(l,pl)])

Dynamic piping with FRP

Consider a problem:
split file by lines
write lines to a result file
if a result file exceeds some size create a new result file
For example, if I have a file which weights 4gb and split size is equal 1gb. The result is four files weights 1gb.
I'm looking for a solution with something like Rx*/Bacon or any other similar library in any language.
My solution in Coffee with Highland.js:
_ = require('underscore')
H = require('highland')
fs = require('fs')
debug = require('debug')
log = debug('main')
assert = require('assert')
readS = H(fs.createReadStream('walmart.dump')).map((buffer) ->
{ buffer: buffer }
)
MAX_SIZE = 10 ** 7
counter = 0
nextStream = ()->
stream = fs.createWriteStream("result/data#{counter}.txt")
wrapper = H.wrapCallback(stream.write.bind(stream))
counter += 1
return wrapper
debug('profile')('start')
s = readS.scan({
size: 0
stream: nextStream()
}, (acc, {buffer}) ->
debug('scan')(acc, buffer)
acc.size += buffer.length
acc.buffer = buffer
if acc.size > MAX_SIZE
debug('notify')(counter - 1, acc.size)
acc.size = 0
acc.stream = nextStream()
log(acc)
return acc
).filter((x)->x.buffer?)
s.parallel 4
s.flatMap((x) ->
debug('flatMap')(x)
x.stream(x.buffer)
)
.done -> debug('profile')('finish')
walmart.dump is a text file which contains 6gb of text. Splitting for 649 files takes:
profile start +0ms
profile finish +53s