Openmdao V1.7 Sellar MDF - mdf

I foound out something strange with the MDA of sellar problem on the doc page of OpenMDAO (http://openmdao.readthedocs.io/en/1.7.3/usr-guide/tutorials/sellar.html)
If I extract the code and only run the MDA (adding counters in the disciplines), I observe that the number of calls is differents between disciplines (twice the number of d2 for d1 discipline) which is not expected . Does someone has an answer ?
Here is the results
Coupling vars: 25.588303, 12.058488
Number of discipline 1 and 2 calls (10,5)
And here is the code
# For printing, use this import if you are running Python 2.x from __future__ import print_function
import numpy as np
from openmdao.api import Component from openmdao.api import ExecComp, IndepVarComp, Group, NLGaussSeidel, \
ScipyGMRES
class SellarDis1(Component):
"""Component containing Discipline 1."""
def __init__(self):
super(SellarDis1, self).__init__()
# Global Design Variable
self.add_param('z', val=np.zeros(2))
# Local Design Variable
self.add_param('x', val=0.)
# Coupling parameter
self.add_param('y2', val=1.0)
# Coupling output
self.add_output('y1', val=1.0)
self.execution_count = 0
def solve_nonlinear(self, params, unknowns, resids):
"""Evaluates the equation
y1 = z1**2 + z2 + x1 - 0.2*y2"""
z1 = params['z'][0]
z2 = params['z'][1]
x1 = params['x']
y2 = params['y2']
unknowns['y1'] = z1**2 + z2 + x1 - 0.2*y2
self.execution_count += 1
def linearize(self, params, unknowns, resids):
""" Jacobian for Sellar discipline 1."""
J = {}
J['y1','y2'] = -0.2
J['y1','z'] = np.array([[2*params['z'][0], 1.0]])
J['y1','x'] = 1.0
return J
class SellarDis2(Component):
"""Component containing Discipline 2."""
def __init__(self):
super(SellarDis2, self).__init__()
# Global Design Variable
self.add_param('z', val=np.zeros(2))
# Coupling parameter
self.add_param('y1', val=1.0)
# Coupling output
self.add_output('y2', val=1.0)
self.execution_count = 0
def solve_nonlinear(self, params, unknowns, resids):
"""Evaluates the equation
y2 = y1**(.5) + z1 + z2"""
z1 = params['z'][0]
z2 = params['z'][1]
y1 = params['y1']
# Note: this may cause some issues. However, y1 is constrained to be
# above 3.16, so lets just let it converge, and the optimizer will
# throw it out
y1 = abs(y1)
unknowns['y2'] = y1**.5 + z1 + z2
self.execution_count += 1
def linearize(self, params, unknowns, resids):
""" Jacobian for Sellar discipline 2."""
J = {}
J['y2', 'y1'] = .5*params['y1']**-.5
#Extra set of brackets below ensure we have a 2D array instead of a 1D array
# for the Jacobian; Note that Jacobian is 2D (num outputs x num inputs).
J['y2', 'z'] = np.array([[1.0, 1.0]])
return J
class SellarDerivatives(Group):
""" Group containing the Sellar MDA. This version uses the disciplines
with derivatives."""
def __init__(self):
super(SellarDerivatives, self).__init__()
self.add('px', IndepVarComp('x', 1.0), promotes=['x'])
self.add('pz', IndepVarComp('z', np.array([5.0, 2.0])), promotes=['z'])
self.add('d1', SellarDis1(), promotes=['z', 'x', 'y1', 'y2'])
self.add('d2', SellarDis2(), promotes=['z', 'y1', 'y2'])
self.add('obj_cmp', ExecComp('obj = x**2 + z[1] + y1 + exp(-y2)',
z=np.array([0.0, 0.0]), x=0.0, y1=0.0, y2=0.0),
promotes=['obj', 'z', 'x', 'y1', 'y2'])
self.add('con_cmp1', ExecComp('con1 = 3.16 - y1'), promotes=['y1', 'con1'])
self.add('con_cmp2', ExecComp('con2 = y2 - 24.0'), promotes=['con2', 'y2'])
self.nl_solver = NLGaussSeidel()
self.nl_solver.options['atol'] = 1.0e-12
self.ln_solver = ScipyGMRES()
from openmdao.api import Problem, ScipyOptimizer
top = Problem() top.root = SellarDerivatives()
#top.driver = ScipyOptimizer()
#top.driver.options['optimizer'] = 'SLSQP'
#top.driver.options['tol'] = 1.0e-8
#
#top.driver.add_desvar('z', lower=np.array([-10.0, 0.0]),
# upper=np.array([10.0, 10.0]))
#top.driver.add_desvar('x', lower=0.0, upper=10.0)
#
#top.driver.add_objective('obj')
#top.driver.add_constraint('con1', upper=0.0)
#top.driver.add_constraint('con2', upper=0.0)
top.setup()
# Setting initial values for design variables top['x'] = 1.0 top['z'] = np.array([5.0, 2.0])
top.run()
print("\n")
print("Coupling vars: %f, %f" % (top['y1'], top['y2']))
count1 = top.root.d1.execution_count
count2 = top.root.d2.execution_count
print("Number of discipline 1 and 2 calls (%i,%i)"% (count1,count2))

This is a good observation. Whenever you have a cycle, the "head" component runs a second time. The reason is as follows:
If you have a model with components that contain implicit states, a single execution looks like this:
Call solve_nonlinear to execute components
Call apply_nonlinear to calculate the residuals.
We don't have any components with implicit states in this model, but we indirectly created the need for one by having a cycle. Our execution looks like this:
Call solve_nonlinear to execute all components.
Call apply_nonlinear (which caches the unknowns, calls solve_nolinear, and saves the difference in unknowns) on just the "head" component to generate a residual that we can converge.
Here, the head component is just the first component that is executed based on however it determines what order to run the cycle in. You can verify that only a single head component gets extra runs by building a cycle with more than 2 components.

Related

scipy.integrate.nquad ignoring opts?

I need to compute a numerical (triple) integral, but do not need very high precision on the value, and would therefore like to sacrifice some precision for speed when using nquad. I thought that I might be able to do this by increasing the epsrel and/or epsabs options, but they seem to have no effect. For example (note, this is just an example integrand - I don't actually need to compute this particular integral...):
import numpy as np
from scipy.integrate import nquad
def integrand(l, b, d, sigma=250):
x = d * np.cos(l) * np.cos(b)
y = d * np.sin(l) * np.cos(b)
z = d * np.sin(b)
return np.exp(-0.5 * z**2 / sigma**2) / np.sqrt(2*np.pi * sigma**2)
ranges = [
(0, 2*np.pi),
(0.5, np.pi/2),
(0, 1000.)
]
# No specification of `opts` - use the default epsrel and epsabs:
result1 = nquad(integrand, ranges=ranges, full_output=True)
# Set some `quad` opts:
result2 = nquad(integrand, ranges=ranges, full_output=True,
opts=dict(epsabs=1e-1, epsrel=0, limit=3))
Both outputs are identical:
>>> print(result1)
(4.252394424844468, 1.525272379143154e-12, {'neval': 9261})
>>> print(result2)
(4.252394424844468, 1.525272379143154e-12, {'neval': 9261})
A full example is included here: https://gist.github.com/adrn/b9aa92c236df011dbcdc131aa94ed9f9
Is this not the right approach, or is scipy.integrate ignoring my inputted opts?
From the scipy.integrate.nquad it is stated that opts can only be passed to quad as can be seen here:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.nquad.html
Example of application:
import numpy as np
from scipy.integrate import quad
def integrand(a, sigma=250):
x = 2 * np.sin(a) * np.cos(a)
return x
# No specification of `opts` - use the default epsrel and epsabs:
result1 = quad(integrand,0, 2*np.pi)
# Set some `quad` opts:
result2 = quad(integrand,0, 4*np.pi,epsabs=1e-6, epsrel=1e-6, limit=40)
returns:
result1: (-1.3690011097614755e-16, 4.4205541621600365e-14)
result2: (-1.7062635631484713e-15, 9.096805257467047e-14)
The reason nquad doesn't complain about the presence of options is because nquad includes quad, dbquad and tplquad.

ORTOOLS - CPSAT - Objective to minimize a value by intervals

I my model in ORTools CPSAT, I am computing a variable called salary_var (among others). I need to minimize an objective. Let’s call it « taxes ».
to compute the taxes, the formula is not linear but organised this way:
if salary_var below 10084, taxes corresponds to 0%
between 10085 and 25710, taxes corresponds to 11%
between 25711 and 73516, taxes corresponds to 30%
and 41% for above
For example, if salary_var is 30000 then, taxes are:
(25710-10085) * 0.11 + (30000-25711) * 0.3 = 1718 + 1286 = 3005
My question: how can I efficiently code my « taxes » objective?
Thanks for your help
Seb
This task looks rather strange, there is not much context and some parts of the task might touch some not-so-nice areas of finite-domain based solvers (large domains or scaling / divisions during solving).
Therefore: consider this as an idea / template!
Code
from ortools.sat.python import cp_model
# Data
INPUT = 30000
INPUT_UB = 1000000
TAX_A = 11
TAX_B = 30
TAX_C = 41
# Helpers
# new variable which is constrained to be equal to: given input-var MINUS constant
# can get negative / wrap-around
def aux_var_offset(model, var, offset):
aux_var = model.NewIntVar(-INPUT_UB, INPUT_UB, "")
model.Add(aux_var == var - offset)
return aux_var
# new variable which is equal to the given input-var IFF >= 0; else 0
def aux_var_nonnegative(model, var):
aux_var = model.NewIntVar(0, INPUT_UB, "")
model.AddMaxEquality(aux_var, [var, model.NewConstant(0)])
return aux_var
# Model
model = cp_model.CpModel()
# vars
salary_var = model.NewIntVar(0, INPUT_UB, "salary")
tax_component_a = model.NewIntVar(0, INPUT_UB, "tax_11")
tax_component_b = model.NewIntVar(0, INPUT_UB, "tax_30")
tax_component_c = model.NewIntVar(0, INPUT_UB, "tax_41")
# constraints
model.AddMinEquality(tax_component_a, [
aux_var_nonnegative(model, aux_var_offset(model, salary_var, 10085)),
model.NewConstant(25710 - 10085)])
model.AddMinEquality(tax_component_b, [
aux_var_nonnegative(model, aux_var_offset(model, salary_var, 25711)),
model.NewConstant(73516 - 25711)])
model.Add(tax_component_c == aux_var_nonnegative(model,
aux_var_offset(model, salary_var, 73516)))
tax_full_scaled = tax_component_a * TAX_A + tax_component_b * TAX_B + tax_component_c * TAX_C
# Demo
model.Add(salary_var == INPUT)
solver = cp_model.CpSolver()
status = solver.Solve(model)
print(list(map(lambda x: solver.Value(x), [tax_component_a, tax_component_b, tax_component_c, tax_full_scaled])))
Output
[15625, 4289, 0, 300545]
Remarks
As implemented:
uses scaled solving
produces scaled solution (300545)
no fiddling with non-integral / ratio / rounding stuff BUT large domains
Alternative:
Maybe something around AddDivisionEquality
Edit in regards to Laurents comments
In some scenarios, solving the scaled problem but being able to reason about the real unscaled values easier might make sense.
If i interpret the comment correctly, the following would be a demo (which i was not aware of and it's cool!):
Updated Demo Code (partial)
# Demo -> Attempt of demonstrating the objective-scaling suggestion
model.Add(salary_var >= 30000)
model.Add(salary_var <= 40000)
model.Minimize(salary_var)
model.Proto().objective.scaling_factor = 0.001 # DEFINE INVERSE SCALING
solver = cp_model.CpSolver()
solver.parameters.log_search_progress = True # SCALED BACK OBJECTIVE PROGRESS
status = solver.Solve(model)
print(list(map(lambda x: solver.Value(x), [tax_component_a, tax_component_b, tax_component_c, tax_full_scaled])))
print(solver.ObjectiveValue()) # SCALED BACK OBJECTIVE
Output (excerpt)
...
...
#1 0.00s best:30 next:[30,29.999] fixed_bools:0/1
#Done 0.00s
CpSolverResponse summary:
status: OPTIMAL
objective: 30
best_bound: 30
booleans: 1
conflicts: 0
branches: 1
propagations: 0
integer_propagations: 2
restarts: 1
lp_iterations: 0
walltime: 0.0039022
usertime: 0.0039023
deterministic_time: 8e-08
primal_integral: 1.91832e-07
[15625, 4289, 0, 300545]
30.0

Where the weights get updated in this code?

I want to train a model in distributed system. I have found a code in github for distributed training where the worker node send gradient to the parameter server and the parameter server sends the average gradient to the workers. But in client/worker side code, i couldn't understand where the received gradient updates the weights and biases.
Here is client/worker side the code, it receives initial gradients from the parameter server and then calculates loss, gradients and sends the gradient value to the server again.
from __future__ import division
from __future__ import print_function
import numpy as np
import sys
import pickle as pickle
import socket
from datetime import datetime
import time
import tensorflow as tf
import cifar10
TCP_IP = 'some IP'
TCP_PORT = 5014
port = 0
port_main = 0
s = 0
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/home/ubuntu/cifar10_train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 5000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
tf.app.flags.DEFINE_integer('log_frequency', 10,
"""How often to log results to the console.""")
#gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.30)
def safe_recv(size, server_socket):
data = ""
temp = ""
data = bytearray()
recv_size = 0
while 1:
try:
temp = server_socket.recv(size-len(data))
data.extend(temp)
recv_size = len(data)
if recv_size >= size:
break
except:
print("Error")
data = bytes(data)
return data
def train():
"""Train CIFAR-10 for a number of steps."""
g1 = tf.Graph()
with g1.as_default():
global_step = tf.Variable(-1, name='global_step',
trainable=False, dtype=tf.int32)
increment_global_step_op = tf.assign(global_step, global_step+1)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
grads = cifar10.train_part1(loss, global_step)
only_gradients = [g for g, _ in grads]
class _LoggerHook(tf.train.SessionRunHook):
"""Logs loss and runtime."""
def begin(self):
self._step = -1
self._start_time = time.time()
def before_run(self, run_context):
self._step += 1
return tf.train.SessionRunArgs(loss) # Asks for loss value.
def after_run(self, run_context, run_values):
if self._step % FLAGS.log_frequency == 0:
current_time = time.time()
duration = current_time - self._start_time
self._start_time = current_time
loss_value = run_values.results
examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
sec_per_batch = float(duration / FLAGS.log_frequency)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
# log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) as mon_sess:
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
global port
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((TCP_IP, port_main))
recv_size = safe_recv(17, s)
recv_size = pickle.loads(recv_size)
recv_data = safe_recv(recv_size, s)
var_vals = pickle.loads(recv_data)
s.close()
feed_dict = {}
i = 0
for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
feed_dict[v] = var_vals[i]
i = i+1
print("Received variable values from ps")
# Opening the socket and connecting to server
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((TCP_IP, port))
while not mon_sess.should_stop():
gradients, step_val = mon_sess.run(
[only_gradients, increment_global_step_op], feed_dict=feed_dict)
# sending the gradients
send_data = pickle.dumps(gradients, pickle.HIGHEST_PROTOCOL)
to_send_size = len(send_data)
send_size = pickle.dumps(to_send_size, pickle.HIGHEST_PROTOCOL)
s.sendall(send_size)
s.sendall(send_data)
# receiving the variable values
recv_size = safe_recv(17, s)
recv_size = pickle.loads(recv_size)
recv_data = safe_recv(recv_size, s)
var_vals = pickle.loads(recv_data)
feed_dict = {}
i = 0
for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
feed_dict[v] = var_vals[i]
i = i+1
s.close()
def main(argv=None): # pylint: disable=unused-argument
global port
global port_main
global s
if(len(sys.argv) != 3):
print("<port> <worker-id> required")
sys.exit()
port = int(sys.argv[1]) + int(sys.argv[2])
port_main = int(sys.argv[1])
print("Connecting to port ", port)
cifar10.maybe_download_and_extract()
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
total_start_time = time.time()
train()
print("--- %s seconds ---" % (time.time() - total_start_time))
if __name__ == '__main__':
tf.app.run()
EDIT:
Here is the train_part1() code:
def train_part1(total_loss, global_step):
"""Train CIFAR-10 model.
Create an optimizer and apply to all trainable variables. Add moving
average for all trainable variables.
Args:
total_loss: Total loss from loss().
global_step: Integer Variable counting the number of training steps
processed.
Returns:
train_op: op for training.
"""
# Variables that affect learning rate.
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(total_loss)
return grads
To me it seems that line
gradients, step_val = mon_sess.run(
[only_gradients, increment_global_step_op], feed_dict=feed_dict)
receieves new values for variables in feed_dict, assign these values to variables, and makes a training step, during which it only calculates and returns the gradients, that are later sent to the parameter server. I would expect cifar10.train_part1 (the one that returns only_gradients) to depend on variable values and define the update.
Update: I looked into the code and changed my mind. Had to google and found next answer that shed some light on what is happening.
Gradients are actually not applied in this code anywhere implicitly. Instead, gradients are sent to the parameter server, parameter server averages gradients and applies them to weights, it returns the weights to the local worker, * recieved weights are used instead of local weights during session run through feed_dict* i.e. local weights are never actually updated and do not actually matter at all. The key, is that feed_dict allows to rewrite any tensor output of the session run and this code rewrites variables.

How I read mouse data In a non blocking way

I'm implementing a fail safe handover procedure in ROS and I'm using python scripts to do so.
I'm using the optical sensor from a mouse to keep under control the acceleration of the object so I can detect when is falling. Everything seems to works fine but now I want to give give a limit to the monitoring procedure (let's say 1000 times) before declaring the handover succeded. The problem is that the function read that I use for the mouse get stucked, if no movement are detected the next iteration is not performed. How can I read from the device without encountering this issue?
Here is the code I'm using to read from the mouse:
def getMouseEvent():
buf = file.read(3)
x, y = struct.unpack( "bb", buf[1:] ) # <--- X and Y deltas.
return [x , y]
Here the loop I want to implement
release_grasp()
rospy.loginfo( "Force detected -- Release mode active")
# If the object is falling regrasp it.
detected= False
trials = 0
while (not(detected) and trials < 1000):
trials = trials + 1
rospy.loginfo ("Acc monitored for the" + str(trials) + "th time"
if fall_test():
cilindrical_grasp()
rospy.loginfo("Fall detected -- Object regrasped")
detected = True
rate.sleep()
The output I get blocks to a given iteration until the mouse does not detect some kind of movement.
UPDATE: Here is the full code
#!/usr/bin/env python2
import rospy
import numpy
import struct
from reflex_sf_msgs.msg import SFPose
from matteo.msg import force
from matteo.msg import acc
# Defining force treshold in each direction ( to be completed and tuned )
rospy.init_node('DetectionFail')
xt = 0.5
yt = xt
zt = 0.3
# For the future try to handle the initialization.
fx = None
fy = None
fz = None
ax = None
ay = None
rate = rospy.Rate(100) # <--- Rate Hz
#-----------------------------MOUSE-----------------------------------#
# Open the mouse device. To be sure if it is "mouse2" type in the terminal: cat /proc/bus/input/devices, look for the device whose name is "Logitech optical USB mouse" and get the name of the handler. If you need root permissions type: sudo chmod 777 /dev/input/(handler)
file = open ("/dev/input/mouse3" , "rb")
#Defining the function to read mouse deltas.
def getMouseEvent():
buf = file.read(3);
x,y = struct.unpack( "bb", buf[1:] ); # <--- X and Y deltas.
return [x , y]
#Defining the function to estimate the acceleraton.
def acc_comp():
vx_old = 0
vy_old = 0
vx_new = getMouseEvent()[0]
vy_new = getMouseEvent()[1]
x_acc = (vx_old - vx_new)*100
y_acc = (vy_old - vy_new)*100
vx_old = vx_new
vy_old = vy_new
return [x_acc , y_acc]
#---------------------------------------------------------------------#
#Defining function fall test
def fall_test():
if ( acc_comp()[1] >= 3000 or acc_comp()[1] <= -3000 ):
return True
else:
return False
#---------------------------------------------------------------------#
# Initialize hand publisher.
hand_pub = rospy.Publisher('/reflex_sf/command', SFPose, queue_size=1)
rospy.sleep(0.5)
#---------------------------------------------------------------------#
# Defining sferical grasp.
def cilindrical_grasp():
hand_pub.publish ( 2.5 , 2.5 , 2.5, 0)
#---------------------------------------------------------------------#
# Define release position.
def release_grasp():
hand_pub.publish ( 2, 2 , 2 , 0)
#---------------------------------------------------------------------#
# Define test for the force measure
def force_treshold ( fx, fy , fz):
if ( fx > xt and fy > yt or fz > zt):
return True
else:
return False
#---------------------------------------------------------------------#
# Callback function to save the datas obtained by the force sensor
def callback_force(msg):
global fx
global fy
global fz
fx = msg.fx
fy = msg.fy
fz = msg.fz
# Main loop.
def main():
#Apply the sferical grasp.
rospy.loginfo("Applying grasp")
cilindrical_grasp()
while not(rospy.is_shutdown()):
rospy.Subscriber("/Forces", force, callback_force )
if force_treshold ( fx , fy , fz ):
release_grasp()
rospy.loginfo( "Force detected -- Release mode active")
# If the object is falling regrasp it.
detected= False
trials = 0
while (not(detected) and trials < 1000):
trials = trials +1
if fall_test():
cilindrical_grasp()
rospy.loginfo("Fall detected -- Object regrasped")
detected = True
rate.sleep()
if rospy.is_shutdown() :
break
Yesterday I came out with this code:
#!/usr/bin/env python
import struct
import rospy
from matteo.msg import acc
import struct
import os
import time
i = 0
# Mouse read with a non blocking structure, the problem is that does not provide the same output as
# mouse_clean.py, probably there is a problem with the unpacking or the reading.
while i < 1000:
i += 1
try:
file = os.open("/dev/input/mouse0", os.O_RDONLY | os.O_NONBLOCK)
time.sleep(0.1)
buf = os.read(file , 3)
x,y = struct.unpack( "bb", buf[1:] ) # <--- X and Y deltas.
print ( "X:" +str ( x ) + "---" +"Y:" +str ( y ) )
except OSError as err:
if err.errno == 11:
print ( "No motion detected")
continue
os.close(file)
It works fine, if there is no motion the message is printed out but, in case of motion the output I get is quite different from the "vanilla" mode.

Implementing Adam in Pytorch

I’m trying to implement Adam by myself for a learning purpose.
Here is my Adam implementation:
class ADAMOptimizer(Optimizer):
"""
implements ADAM Algorithm, as a preceding step.
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.99), eps=1e-8, weight_decay=0):
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
super(ADAMOptimizer, self).__init__(params, defaults)
def step(self):
"""
Performs a single optimization step.
"""
loss = None
for group in self.param_groups:
#print(group.keys())
#print (self.param_groups[0]['params'][0].size()), First param (W) size: torch.Size([10, 784])
#print (self.param_groups[0]['params'][1].size()), Second param(b) size: torch.Size([10])
for p in group['params']:
grad = p.grad.data
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Momentum (Exponential MA of gradients)
state['exp_avg'] = torch.zeros_like(p.data)
#print(p.data.size())
# RMS Prop componenet. (Exponential MA of squared gradients). Denominator.
state['exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
b1, b2 = group['betas']
state['step'] += 1
# L2 penalty. Gotta add to Gradient as well.
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
# Momentum
exp_avg = torch.mul(exp_avg, b1) + (1 - b1)*grad
# RMS
exp_avg_sq = torch.mul(exp_avg_sq, b2) + (1-b2)*(grad*grad)
denom = exp_avg_sq.sqrt() + group['eps']
bias_correction1 = 1 / (1 - b1 ** state['step'])
bias_correction2 = 1 / (1 - b2 ** state['step'])
adapted_learning_rate = group['lr'] * bias_correction1 / math.sqrt(bias_correction2)
p.data = p.data - adapted_learning_rate * exp_avg / denom
if state['step'] % 10000 ==0:
print ("group:", group)
print("p: ",p)
print("p.data: ", p.data) # W = p.data
return loss
I think I implemented everything correct however the loss graph of my implementation is very spiky compared to that of torch.optim.Adam.
My ADAM implementation loss graph (below)
torch.optim.Adam loss graph (below)
If someone could tell me what I am doing wrong, I’ll be very grateful.
For the full code including data, graph (super easy to run): https://github.com/byorxyz/AMS_pytorch/blob/master/AdamFails_1dConvex.ipynb