Celery apply_async with eta executing faster than it should - celery

>>> def elp(min):
... when = datetime.now() + timedelta(minutes=min)
... print when
... r = add.apply_async(args=[500,500],eta=when)
... start = time.time()
... r.get()
... end = time.time()
... elapsed = end-start
... print elapsed
...
>>> elp(10)
2014-11-08 04:38:01.745000
1.00200009346
Where as when using countdown
>>> def elp_countdown(min):
... r = add.apply_async(args=[500,500],countdown=(min*60))
... start = time.time()
... r.get()
... end = time.time()
... elapsed = end-start
... print elapsed
...
>>> elp_countdown(0.5)
30.1380000114
Why does the task gets executed faster than its suppose when using eta?
my add task is as follows,
#task()
def add(x, y):
return x + y
Celery verison: 3.1.16 (Cipater)

So after some research and lot's of reading, it turns out that I needed to pass a utc datetime to celery, since it's time zone is configured by default to be in UTC.
In case anyone is wondering, changing to this:
when = datetime.utcnow() + timedelta(minutes=min)
instead of
when = datetime.now() + timedelta(minutes=min)
will make it work as it should.

Related

DateTime: Difference between Hour and Integer

I have some mistakes, in my code in the 2 lines where a comment above them:
import java.time.temporal.ChronoUnit
import java.time.LocalTime
import scala.concurrent.duration._
val t = LocalTime.now()
def toStart(t: LocalTime) = {
val start = LocalTime.of(9, 0)
val midEnd = LocalTime.of(13, 0)
val midStart = LocalTime.of(14, 0)
val end = LocalTime.of(18, 0)
if (t.isBefore(start)) 0.hours
// if (9 > myHour < 13 ==> myHour + 9 Hours, I wrote: - 9.hours instead of + 4.hours
else if (t.isBefore(midEnd)) t.until(midEnd, ChronoUnit.MILLIS).millis - 9.hours
else if (t.isBefore(midStart)) 4.hours
// if (14 > myHour < 18 Then (myhour - 14) + 4
else if (t.isBefore(end)) t.until(end, ChronoUnit.MILLIS).millis
else 8.hours
}
implicit class formatter(d: FiniteDuration) {
def withMinutes = {
val l = d.toMinutes
s"${l / 60}:${l % 60}"
}
def withSeconds = s"${d.toHours}:${d.toMinutes % 60}:${d.toSeconds % 60}"
}
The test of the function ToStart, is false in these tow cases:
scala> toStart(LocalTime.of(9, 30, 24)).withSeconds
res89: String = -5:-30:-24
scala> toStart(LocalTime.of(12, 30, 32)).withSeconds
res90: String = -8:-30:-32
scala> toStart(LocalTime.of(14, 30, 45)).withSeconds
res92: String = 3:29:15
scala> toStart(LocalTime.of(16, 22, 44)).withSeconds
res93: String = 1:37:16
How can I change my code to find the best result ?
Code should be similar to my answer to you here, but you need to understand what I did. You definitely need to check api calls I used, but I added some additional comments:
import java.time.temporal.ChronoUnit
import java.time.LocalTime
import scala.concurrent.duration._
val t = LocalTime.now()
// start of the day
val start = LocalTime.of(9, 0)
// end of first half
val midEnd = LocalTime.of(13, 0)
// start of second half
val midStart = LocalTime.of(14, 0)
// end of the day
val end = LocalTime.of(18, 0)
// here we define duration of first half a day: diff between start of a day and midEnd (end of first half)
val firstHalf = start.until(midEnd, ChronoUnit.MILLIS).millis
// here we define duration of second half a day: diff between start of second half a day and end of a day
val secondHalf = midStart.until(end, ChronoUnit.MILLIS).millis
def toStart(t: LocalTime) = {
// when checked time is before start of a day
if (t.isBefore(start)) 0.hours
// otherwise when checked time is before end of first half (will be diff between start time and checked time)
else if (t.isBefore(midEnd)) start.until(t, ChronoUnit.MILLIS).millis
// otherwise when checked time is before start of second half (will be duration of first half)
else if (t.isBefore(midStart)) firstHalf
// otherwise when checked time is before end of a day (will be duration of first half + duration of diff between checked time and start of second half)
else if (t.isBefore(end)) firstHalf + midStart.until(t, ChronoUnit.MILLIS).millis
// otherwise sum of durations
else firstHalf + secondHalf
}
// here you can add any specific format for evaluated duration
implicit class formatter(d: FiniteDuration) {
def withMinutes = {
// convert to minutes
val l = d.toMinutes
// format
s"${l / 60}:${l % 60}"
}
}
toStart(t).withMinutes
toStart(LocalTime.of(9, 30)).withMinutes
toStart(LocalTime.of(12, 30)).withMinutes
toStart(LocalTime.of(13, 30)).withMinutes
toStart(LocalTime.of(14, 30)).withMinutes
Spend some time and check java.time api (specifically LocalTime.until). Check FiniteDuration api to understand .millis suffix I used

Loss is not decreasing at all for RNN

I have already tried to change the weights initialization parameters, learning rate and the batch size and the activation functions to ReLu
Still no decrease in the loss
This is the code:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import numpy as np
no_time_steps = 28
input_size = 28
hidden_size = 30
output_size = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.01
dtype = torch.DoubleTensor
# MNIST Dataset
train_dataset = dsets.MNIST(root='./data/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = dsets.MNIST(root='./data/',
train=False,
transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
class RNN(torch.nn.Module):
def __init__(self,input_size,hidden_size,output_size,batch_size):
super(RNN, self).__init__()
self.input_size=input_size
self.hidden_size=hidden_size
self.output_size=output_size
self.wxh=Variable(torch.randn(input_size,hidden_size).type(dtype)*0.1,requires_grad=True)
self.whh=Variable(torch.randn(hidden_size,hidden_size).type(dtype)*0.1,requires_grad=True)
self.why=Variable(torch.randn(hidden_size,output_size).type(dtype)*0.1,requires_grad=True)
self.by=Variable(torch.Tensor(batch_size,output_size).type(dtype).zero_(),requires_grad=True)
self.bh=Variable(torch.Tensor(batch_size,hidden_size).type(dtype).zero_(),requires_grad=True)
self.mWxh= torch.zeros_like(self.wxh)
self.mWhh= torch.zeros_like(self.whh)
self.mWhy= torch.zeros_like(self.why)
self.mbh= torch.zeros_like(self.bh)
self.mby= torch.zeros_like(self.by)
self.dwxh, self.dwhh, self.dwhy = torch.zeros_like(self.wxh), torch.zeros_like(self.whh), torch.zeros_like(self.why)
self.dbh, self.dby = torch.zeros_like(self.bh), torch.zeros_like(self.by)
def hidden_init(self,batch_size):
self.hidden={}
self.hidden[0]=Variable(torch.Tensor(batch_size,hidden_size).type(dtype).zero_())
def tanh(self,value):
return (torch.exp(value)-torch.exp(-value))/(torch.exp(value)+torch.exp(-value))
def parameter(self):
self.params = torch.nn.ParameterList([torch.nn.Parameter(self.wxh.data),torch.nn.Parameter(self.whh.data),torch.nn.Parameter(self.why.data),torch.nn.Parameter(self.bh.data),torch.nn.Parameter(self.by.data)])
return self.params
def grad_data(self):
print(self.dwxh,self.dwhy)
def softmax(self,value):
return torch.exp(value) / torch.sum(torch.exp(value))
def updatess(self,lr):
for param, dparam, mem in zip([self.wxh, self.whh, self.why, self.bh, self.by],
[self.dwxh,self.dwhh,self.dwhy,self.dbh,self.dby],
[self.mWxh, self.mWhh, self.mWhy, self.mbh, self.mby]):
mem.data += dparam.data * dparam.data
param.data += -learning_rate * dparam.data / torch.sqrt(mem.data + 1e-8)
def forward(self,inputs,batch_size,no_time_steps,labels):
self.hidden_init(batch_size)
inputs=Variable(inputs.type(dtype))
self.output=Variable(torch.Tensor(no_time_steps,batch_size,self.output_size).type(dtype))
for t in xrange(no_time_steps):
if t==0:
self.hidden[t]=torch.matmul(self.hidden[0],self.whh)
#print 'time ',t#,"Inputs",inputs[:,t,:],"Weights",self.wxh
#print "hidden MATRIX",inputs[:,t,:]
self.hidden[t]+=torch.matmul(inputs[:,t,:],self.wxh)
self.hidden[t]=self.tanh(self.hidden[t]+self.bh)
#print 'time ',t#,"Inputs",inputs[:,t,:],"Weights",self.wxh
#print "HIDDEN MATRIX",self.hidden[t]
else:
self.hidden[t]=torch.matmul(self.hidden[t-1],self.whh)#+torch.matmul(self.hidden[t-1],self.whh)
#print 'time ',t#,"Inputs",inputs[:,t,:],"Weights",self.wxh
self.hidden[t]+=torch.matmul(inputs[:,t,:],self.wxh)
self.hidden[t]=self.tanh(self.hidden[t]+self.bh)
#print 'time ',t#,"Inputs",inputs[:,t,:],"Weights",self.wxh
#print "############################################################################################"
#print "hidden MATRIX",self.hidden[t]
self.output[t]=self.softmax(torch.matmul(self.hidden[t],self.why)+self.by)
#print "OUTPUT MATRIX",self.output[t]
return self.output
def backward(self,loss,label,inputs):
inputs=Variable(inputs.type(dtype))
self.dhnext = torch.zeros_like(self.hidden[0])
self.dy=self.output[27].clone()
#print(self.dy.shape)
self.dy[:,int(label[0])]=self.dy[:,int(label[0])]-1
#print(self.dy.shape)
self.dwhy += torch.matmul( self.hidden[27].t(),self.dy)
self.dby += self.dy
for t in reversed(xrange(no_time_steps)):
self.dh = torch.matmul(self.dy,self.why.t()) + self.dhnext # backprop into h
self.dhraw = (1 - self.hidden[t] * self.hidden[t]) * self.dh # backprop through tanh nonlinearity
self.dbh += self.dhraw #derivative of hidden bias
self.dwxh += torch.matmul(inputs[:,t,:].t(),self.dhraw) #derivative of input to hidden layer weight
self.dwhh += torch.matmul( self.hidden[t-1].t(),self.dhraw) #derivative of hidden layer to hidden layer weight
self.dhnext = torch.matmul(self.dhraw,self.whh.t())
rnn=RNN(input_size,hidden_size,output_size,batch_size)
def onehot(values,shape):
temp=torch.Tensor(shape).zero_()
for k,j in enumerate(labels):
temp[k][int(j)]=1
return Variable(temp)
for epoch in range(5):
for i, (images, labels) in enumerate(train_loader):
images = images.view(-1, no_time_steps, input_size)
outputs = rnn(images,batch_size,no_time_steps,labels)
labels = Variable(labels.double())
output=outputs[27,:,:]
labelss=onehot(labels,output.shape)
#print output
loss=-torch.mul(torch.log(output),labelss.double())
#print loss
loss=torch.sum(loss)
#print(labels)
rnn.backward(loss,labels,images)
rnn.updatess(0.01)
if i==1110:
break
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
OUTPUT:
Epoch [1/2], Step [100/600], Loss: 714.8081
Epoch [1/2], Step [200/600], Loss: 692.7232
Epoch [1/2], Step [300/600], Loss: 700.1103
Epoch [1/2], Step [400/600], Loss: 698.5468
Epoch [1/2], Step [500/600], Loss: 702.1227
Epoch [1/2], Step [600/600], Loss: 705.9571
It is difficult to find a bug in such code. I would suggest simplifying things a little:
1) pytorch takes care of parameters automatically if you do self.wxh=Parameter instead of self.wxh=Variable, so change all your Variable to Parameter. And delete your parameter functions.
2) pytorch takes care of the backward function automatically if you defined the forward function with functions which have a defined backward function. So delete your backward function in case there is a bug in it.
3) Use loss=torch.mean(loss) instead of loss=torch.sum(loss) because then your learning rate is independent of batch size.
4) Using backward is kind of tricky in pytorch, so use an optimizer instead:
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.03)
for epoch in range(5):
...
optimizer.zero_grad()
loss.backward()
optimizer.step()
If after all this, it still doesn't learn. There might be a problem in your RNN. So try to use a pytorch predefined RNN to see if your dataset is even learnable with an RNN.
If doing this solved the problem. You can than undo the above changes one by one, to discover what the problem was.

load on GPU gradually decreases using Tensorflow's queue

I've a large number of pickled data stored on my hard disk. I've a generator function that reads these pickled files in batch (batch_size = 512) and I'm using tensorflow's queue to speed-up the process. currently, my queue_size is 4096 and I'm using 6 threads as I've 6 physical cores. When I run the code and monitor my GPU load (I'm using TitanX), at the beginning, it looks OK:
But over time, I see less load on my GPU:
I also see increase in execution time per epoch:
Epoch 1 | Exec. time: 1646.523872
Epoch 2 | Exec. time: 1760.770192
Epoch 3 | Exec. time: 1861.450039
Epoch 4 | Exec. time: 1952.52812
Epoch 5 | Exec. time: 2167.598431
Epoch 6 | Exec. time: 2278.203603
Epoch 7 | Exec. time: 2320.280606
Epoch 8 | Exec. time: 2467.036160
Epoch 9 | Exec. time: 2584.932837
Epoch 10 | Exec. time: 2736.121618
...
Epoch 20 | Exec. time: 3841.635191
which the GPU load that I observe kind of explains it.
Now, the question is why is this happening? Is this a bug in tensorflow's queue? Have I done something wrong?! I'm using tensorflow ver. 1.4 and if it helps this is the way I defined my queue, enqueue and dequeue:
def get_train_queue(batch_size, data_generator, queue_size, num_threads):
# get train queue to parallelize loading data
q = tf.FIFOQueue(capacity = queue_size, dtypes = [tf.float32, tf.float32, tf.float32, tf.float32],
shapes = [[batch_size, x_height, x_width, num_channels],
[batch_size, num_classes],
[batch_size, latent_size],
[batch_size]])
batch = next(data_generator)
batch_z = np.random.uniform(-1.0, 1.0, size = (batch_size, latent_size))
mask = get_labled_mask(labeled_rate, batch_size)
enqueue_op = q.enqueue((batch[0], batch[1], batch_z, mask))
qr = tf.train.QueueRunner(q, [enqueue_op] * num_threads)
tf.train.add_queue_runner(qr)
return q
and
def train_per_batch(sess, q, train_samples_count, batch_size, parameters, epoch):
# train_per_batch and get train loss and accuracy
t_total = 0
for iteration in range(int(train_samples_count / batch_size)):
t_start = time.time()
data = q.dequeue()
feed_dictionary = {parameters['x']: sess.run(data[0]),
parameters['z']: sess.run(data[2]),
parameters['label']: sess.run(data[1]),
parameters['labeled_mask']: sess.run(data[3]),
parameters['dropout_rate']: dropout,
parameters['d_init_learning_rate']: D_init_learning_rate,
parameters['g_init_learning_rate']: G_init_learning_rate,
parameters['is_training']: True}
sess.run(parameters['D_optimizer'], feed_dict = feed_dictionary)
sess.run(parameters['G_optimizer'], feed_dict = feed_dictionary)
train_D_loss = sess.run(parameters['D_L'], feed_dict = feed_dictionary)
train_G_loss = sess.run(parameters['G_L'], feed_dict = feed_dictionary)
t_total += (time.time() - t_start)
I also tried tf.data.Dataset.from_generator() recommended by tensorflow 1.4 as:
train_dataset = tf.data.Dataset.from_generator(data_generator_training_from_pickles,
(tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32),
([batch_size, x_height, x_width, num_channels],
[batch_size, num_classes],
[batch_size, latent_size],
[batch_size])))
and then using:
def train_per_batch(sess, train_dataset, train_samples_count, batch_size, parameters, epoch):
# train_per_batch and get train loss and accuracy
t_total = 0
for iteration in range(int(train_samples_count / batch_size)):
t_start = time.time()
data = train_dataset.make_one_shot_iterator().get_next()
feed_dictionary = {parameters['x']: sess.run(data[0]),
parameters['z']: sess.run(data[2]),
parameters['label']: sess.run(data[1]),
parameters['labeled_mask']: sess.run(data[3]),
parameters['dropout_rate']: dropout,
parameters['d_init_learning_rate']: D_init_learning_rate,
parameters['g_init_learning_rate']: G_init_learning_rate,
parameters['is_training']: True}
sess.run(parameters['D_optimizer'], feed_dict = feed_dictionary)
sess.run(parameters['G_optimizer'], feed_dict = feed_dictionary)
train_D_loss = sess.run(parameters['D_L'], feed_dict = feed_dictionary)
train_G_loss = sess.run(parameters['G_L'], feed_dict = feed_dictionary)
t_total += (time.time() - t_start)
It's the worst. There is no queueing obviously:

Difference between UTC Time values scala

I would like to perform difference between UTC current time and a UTC value in scala.
How about the following?
import java.time._
val time1 = Instant.parse("2017-01-01T12:00:00Z")
val time2 = Instant.now()
val diff = Duration.between(time1, time2)
println("Difference is " + diff)
println("or in seconds: " + diff.getSeconds())
Try it here
Further reading:
https://docs.oracle.com/javase/8/docs/api/java/time/package-summary.html
https://docs.oracle.com/javase/8/docs/api/java/time/Instant.html
https://docs.oracle.com/javase/8/docs/api/java/time/Duration.html

How to get the same initial results if seed is provided , without restarting the Ipython kernel in Tensorflow

I am not sure , whether this question follow any logic as per the design of Tensorflow . Here is the Code
import numpy as np
import tensorflow as tf
np.random.seed(0)
tf.set_random_seed(0)
class Sample():
def __init__(self, hidden_dim = 50 , input_dim = 784):
self.hidden_dim = hidden_dim
self.input_dim = input_dim
self.x = tf.placeholder(tf.float32, [None, self.input_dim])
self._create_network()
self.__minimize()
self.sess = tf.InteractiveSession()
init = tf.initialize_all_variables()
self.sess.run(init)
def _create_network(self):
self.W1 = tf.Variable(tf.random_normal([self.input_dim, self.hidden_dim]))
self.W2 = tf.Variable(tf.random_normal([self.hidden_dim, self.input_dim]))
def __minimize(self):
h1 = tf.matmul(self.x , self.W1)
h2 = tf.matmul(h1, self.W2)
reconstruction = tf.nn.sigmoid(h2)
self.loss = tf.reduce_mean(tf.squared_difference(self.x , reconstruction))
self.optimizer = \
tf.train.AdamOptimizer(learning_rate=0.01).minimize(self.loss)
def partial_fit(self, X):
cost , _ = self.sess.run([self.loss, self.optimizer] , feed_dict = {self.x: X})
return cost
import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
n_samples = mnist.train.num_examples
ex_1 = mnist.train.next_batch(1)[0]
model = Sample()
for i in xrange(11):
c = model.partial_fit(ex_1)
print c
The result is as follows :
0.498799
0.469001
0.449659
0.436665
0.424995
0.414473
0.404129
0.394458
0.39165
0.38483
0.380042
This result is achieved with seed 0 and it is same when I restart the kernel . But suppose , if I ran 10 iteration and then , if I have to start it from the scratch , how will i do it in Ipython . Because , if run after 10 or so iterations , the model continues to start from the remaining values .
I used tf.reset_default_graph() , but that has not make any change to the behavior .
Don't use an InterativeSession but use a normal Session.
Create a new Session each time with the same seed and you will get the same results.
graph = tf.Graph()
with graph.as_default():
model = Sample()
with Session(graph=graph) as sess:
np.random.seed(0)
tf.set_random_seed(0)
for i in xrange(11):
c = model.partial_fit(ex_1)
print c