Storage values using os.stat(filename) - python-3.7

I'm trying to create an EDUCATIONAL PURPOSES ONLY virus. I do not plan on spreading it. It's purpose is to grow a file to the point your storage is full and slow your computer down. It prints the size of the file every 0.001 seconds. With that, I also want to know how fast it is growing the file. The following code doesn't seem to let it run:
class Vstatus():
def _init_(Status):
Status.countspeed == True
Status.active == True
Status.growingspeed == 0
import time
import os
#Your storage is at risk of over-expansion. Please do not let this file run forever, as your storage will fill continuously.
#This is for educational purposes only.
while Vstatus.Status.countspeed == True:
f = open('file.txt', 'a')
f.write('W')
fsize = os.stat('file.txt')
Key1 = fsize
time.sleep(1)
Key2 = fsize
Vstatus.Status.growingspeed = (Key2 - Key1)
Vstatus.Status.countspeed = False
while Vstatus.Status.active == True:
time.sleep(0.001)
f = open('file.txt', 'a')
f.write('W')
fsize = os.stat('file.txt')
print('size:' + fsize.st_size.__str__() + ' at a speed of ' + Vstatus.Status.growingspeed + 'bytes per second.')
This is for Educational Purposes ONLY
The main error I keep getting when running the file is here:
TypeError: unsupported operand type(s) for -: 'os.stat_result' and 'os.stat_result'
What does this mean? I thought os.stat returned an integer Can I get a fix on this?

Vstatus.Status.growingspeed = (Key2 - Key1)
You can't subtract os.stat objects. Your code also has some other problems. Your loops will run sequentially, meaning that your first loop will try to estimate how quickly the file is being written to without writing anything to the file.
import time # Imports at the top
import os
class VStatus:
def __init__(self): # double underscores around __init__
self.countspeed = True # Assignment, not equality test
self.active = True
self.growingspeed = 0
status = VStatus() # Make a VStatus instance
# You need to do the speed estimation and file appending in the same loop
with open('file.txt', 'a+') as f: # Only open the file once
start = time.time() # Get the current time
starting_size = os.fstat(f.fileno()).st_size
while status.active: # Access the attribute of the VStatus instance
size = os.fstat(f.fileno()).st_size # Send file desciptor to stat
f.write('W') # Writing more than one character at a time will be your biggest speed up
f.flush() # make sure the byte is written
if status.countspeed:
diff = time.time() - start
if diff >= 1: # More than a second has gone by
status.countspeed = False
status.growingspeed = (os.fstat(f.fileno()).st_size - starting_size)/diff # get rate of growth
else:
print(f"size: {size} at a speed of {status.growingspeed}")

Related

Add a constraint of renewable ressource

I am new to these optimisation problems, I just found the or-tools library and saw that cp_model can fix problems that are close to mine.
I have printers and some tasks, that I want to schedule in order to finish the production the earliest. The tasks uses time on machine and raw material, that I must refill at the end of coil. For the moment, I don't consider changing a plastic coil before using all the material.
Here is some information about my situation:
1- The printers are all same, they can do every task, with the same efficiency.
2- A printer can only print one task at a time.
3- A printer cannot start without human around, so tasks can start only at certain hours (in the code below, from 0AM to 10 AM).
4- A task can finish at any time.
5- If a printer has no more material, it needs to be change, this can happen only on opening hours.
6- If a printer has no more material, the task is paused until new material is put.
7- I consider having unlimited quantity of material coil.
Thanks to examples and some search in the documentation, I have been able to fix all the issues that are not related to material. I have been able to set a maximum quantity per machine, but it is not my issue.
I don't understand how I can pause/resume my intervals (for the moment I set the duration to a fixed one).
from ortools.sat.python import cp_model
from ortools.util.python import sorted_interval_list
import random
Domain = sorted_interval_list.Domain
def main():
random.seed(0)
nb_jobs = 10
nb_machine = 2
horizon = 30000000
job_list = [] #Job : (id,time,quantity)
listOfEnds = []
quantityPerMachine = []
maxQuantity = 5
#create the jobs
for i in range(nb_jobs):
time = random.randrange(1,24)
quantity = random.randrange(1,4)
job_list.append([i,time,quantity])
print([job[1] for job in job_list])
print("total time to print = ",horizon)
print("quantity")
print([job[2] for job in job_list])
print("total quantity = ",sum([job[2] for job in job_list]))
model = cp_model.CpModel()
makespan = model.NewIntVar(0, horizon, 'makespan')
machineForJob = {}
#boolean variable for each combination of machine and job. if True then machine works on this job
for machine in range(nb_machine):
for job in job_list:
j = job[0]
machineForJob[(machine,j)]=(model.NewBoolVar(f'x[{machine},{j}]'))
#for each job, apply sum()==1 to ensure one machine works on each job only
for j in range(nb_jobs):
model.Add(sum([machineForJob[(machine,j)] for machine in range(nb_machine)])==1)
#set the affectation of the jobs
time_intervals={}
starts = {}
ends = {}
#Time domain represents working hours when someone can start the taks
timeDomain = []#[[0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[24],[25],[26]]
for i in range(20):
for j in range(10):
t= [j +i*24]
timeDomain.append(t)
for machine in range(nb_machine):
time_intervals[machine] = []
for job in job_list:
j = job[0]
duration = job[1]
starts[(machine,j)] = model.NewIntVarFromDomain(Domain.FromIntervals(timeDomain),f'start {machine},{j}')
ends[(machine,j)] = (model.NewIntVar(0, horizon, f'end {machine},{j}'))
time_intervals[machine].append(model.NewOptionalIntervalVar(starts[(machine,j)], duration, ends[(machine,j)],
machineForJob[(machine,j)],f'interval {machine},{j} '))
#time should not overlap, quantity of raw material is limited,
for machine in range(nb_machine):
#model.Add(quantityPerMachine[machine] <= maxQuantity) Not working as expected as the raw material cannot be refilled
model.AddNoOverlap(time_intervals[machine])
#calculate time per machine
time_per_machine = []
for machine in range(nb_machine):
q = 0
s = 0
for job in job_list:
s+= job[1]*machineForJob[(machine,job[0])]
listOfEnds.append(ends[(machine,job[0])])
q+= job[2]*machineForJob[(machine,job[0])]
time_per_machine.append(s)
quantityPerMachine.append(q)
#Goal is to finish all taks earliest
model.AddMaxEquality(makespan,listOfEnds)
model.Minimize(makespan)
solver = cp_model.CpSolver()
# Sets a time limit of 10 seconds.
solver.parameters.max_time_in_seconds = 600.0
#Solve and prints
status = solver.Solve(model)
if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
print("optimal =",status == cp_model.OPTIMAL )
print(f'Total cost = {solver.ObjectiveValue()}')
for i in range(nb_machine):
for j in range(nb_jobs):
if solver.BooleanValue(machineForJob[(i,j)]):
print(
f'Machine {i} assigned to Job {j} Time = {job_list[j][1]},Quantity = {job_list[j][2]}')
print(f"[{solver.Value(starts[(i,j)])} ,{solver.Value(ends[(i,j)])}]")
else:
print('No solution found.')
# Statistics.
print('\nStatistics')
print(' - conflicts: %i' % solver.NumConflicts())
print(' - branches : %i' % solver.NumBranches())
print(' - wall time: %f s' % solver.WallTime())

Using pytorch cuda for RNNs on google colaboratory

I have a code (a code we saw in a class) of a recurrent neural network that reads a given text and tries to produce its own text similar to the example. The code is written in python and uses the pytorch library. I wanted to modify to see whether I could increase its speed by using GPU instead of CPU and I made some tests on google collaboratory. The GPU version of the code runs fine but is about three times slower than the CPU version. I do not know the details of GPU architecture so I can not really understand why it is slower. I know that GPUs can do more arithmetic operations per cycle but have more limited memory so I am curious if I am having a memory issue. I also tried using CUDA with a generative adversarial network and in this case it was almost ten times faster. Any tips on this would be welcome.
The code (CUDA version) is below. I am new at this stuff so sorry if some of the terminology is not correct.
The architecture is input->encoder->recursive network->decoder->output.
import torch
import time
import numpy as np
from torch.autograd import Variable
import matplotlib.pyplot as plt
from google.colab import files
#uploding text on google collab
uploaded = files.upload()
for fn in uploaded.keys():
print('User uploaded file "{name}" with length {length} bytes'.format(
name=fn, length=len(uploaded[fn])))
#data preprocessing
with open('text.txt','r') as file:
#with open closes the file after we are done with it
rawtxt=file.read()
rawtxt = rawtxt.lower()
#a function that assigns a number to each unique character in the text
def create_map(rawtxt):
letters = list(set(rawtxt))
lettermap = dict(enumerate(letters)) #gives each letter in the list a number
return lettermap
num_to_let = create_map(rawtxt)
#inverse to num_to_let
let_to_num =dict(zip(num_to_let.values(), num_to_let.keys()))
print(num_to_let)
#turns a text of characters into text of numbers using the mapping
#given by the input mapdict
def maparray(txt, mapdict):
txt = list(txt)
for k, letter in enumerate(txt):
txt[k]=mapdict[letter]
txt=np.array(txt)
return txt
X=maparray(rawtxt, let_to_num) #the data text in numeric format
Y= np.roll(X, -1, axis=0) #shifted data text in numeric format
X=torch.LongTensor(X)
Y=torch.LongTensor(Y)
#up to here we are done with data preprocessing
#return a random batch for training
#this reads a random piece inside data text
#with the size chunk_size
def random_chunk(chunk_size):
k=np.random.randint(0,len(X)-chunk_size)
return X[k:k+chunk_size], Y[k:k+chunk_size]
nchars = len(num_to_let)
#define the recursive neural network class
class rnn(torch.nn.Module):
def __init__(self,input_size,hidden_size,output_size, n_layers=1):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers= n_layers
self.encoder = torch.nn.Embedding (input_size, hidden_size)
self.rnn = torch.nn.RNN(hidden_size, hidden_size, n_layers, batch_first=True)
self.decoder = torch.nn.Linear (hidden_size, output_size)
def forward (self,x,hidden):
x=self.encoder(x.view(1,-1))
output, hidden = self.rnn(x.view(1,1,-1), hidden)
output = self.decoder(output.view(1,-1))
return output, hidden
def init_hidden(self):
return Variable(torch.zeros(self.n_layers , 1 , self.hidden_size)).cuda()
#hyper-params
lr = 0.009
no_epochs = 50
chunk_size = 150
myrnn = rnn(nchars, 150, nchars,1)
myrnn.cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(myrnn.parameters(), lr=lr)
t0 = time.time()
for epoch in range(no_epochs):
totcost=0
generated = ''
for _ in range(len(X)//chunk_size):
h=myrnn.init_hidden()
cost = 0
x, y=random_chunk(chunk_size)
x, y= Variable(x).cuda(), Variable(y).cuda()
for i in range(chunk_size):
out, h = myrnn.forward(x[i],h)
_, outl = out.data.max(1)
letter = num_to_let[outl[0]]
generated+=letter
cost += criterion(out, y[i])
optimizer.zero_grad()
cost.backward()
optimizer.step()
totcost+=cost
totcost/=len(X)//chunk_size
print('Epoch', epoch, 'Avg cost/chunk: ', totcost)
print(generated[0:750],'\n\n\n')
t1 = time.time()
total = t1-t0
print('total',total)
#we encode each word into a vector of fixed size

Callbackfunction modelcheckpoint causes error in keras

I seem to get this error when I am using the callback function modelcheckpoint..
I read from a github issue that the solution would be make use of model.get_weight, but I am implicitly only storing that since i am only storing the one with best weight.
Keras only seem to save weights using h5, which make me question is there any other way to do store them using the eras API, if so how? If not, how do i store it?
Made an example to recreate the problem:
#!/usr/bin/python
import glob, os
import sys
from os import listdir
from os.path import isfile, join
import numpy as np
import warnings
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from keras.utils import np_utils
from keras import metrics
import keras
from keras import backend as K
from keras.models import Sequential
from keras.optimizers import SGD, Adam
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten
from keras.layers import Conv1D,Conv2D,MaxPooling2D, MaxPooling1D, Reshape
#from keras.utils.visualize_util import plot
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers.merge import Concatenate, Add
import h5py
import random
import tensorflow as tf
import math
from keras.callbacks import CSVLogger
from keras.callbacks import ModelCheckpoint
if len(sys.argv) < 5:
print "Missing Arguments!"
print "python keras_convolutional_feature_extraction.py <workspace> <totale_frames> <fbank-dim> <window-height> <batch_size>"
print "Example:"
print "python keras_convolutional_feature_extraction.py deltas 15 40 5 100"
sys.exit()
total_frames = int(sys.argv[2])
total_frames_with_deltas = total_frames*3
dim = int(sys.argv[3])
window_height = int(sys.argv[4])
inserted_batch_size = int(sys.argv[5])
stride = 1
splits = ((dim - window_height)+1)/stride
#input_train_data = "/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_train_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(inserted_batch_size)+"_fws_input"
#output_train_data ="/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_train_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(inserted_batch_size)+"_fws_output"
#input_test_data = "/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_test_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(1)+"_fws_input"
#output_test_data = "/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_test_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(1)+"_fws_output"
#train_files =[f for f in listdir(input_train_data) if isfile(join(input_train_data, f))]
#test_files =[f for f in listdir(input_test_data) if isfile(join(input_test_data, f))]
#print len(train_files)
np.random.seed(100)
print "hallo"
def train_generator():
while True:
# input = random.choice(train_files)
# h5f = h5py.File(input_train_data+'/'+input, 'r')
# train_input = h5f['train_input'][:]
# train_output = h5f['train_output'][:]
# h5f.close()
train_input = np.random.randint(100,size=((inserted_batch_size,splits*total_frames_with_deltas,window_height,3)))
train_list_list = []
train_input = train_input.reshape((inserted_batch_size,splits*total_frames_with_deltas,window_height,3))
train_input_list = np.split(train_input,splits*total_frames_with_deltas,axis=1)
for i in range(len(train_input_list)):
train_input_list[i] = train_input_list[i].reshape(inserted_batch_size,window_height,3)
#for i in range(len(train_input_list)):
# train_input_list[i] = train_input_list[i].reshape(inserted_batch_size,33,window_height,1,3)
train_output = np.random.randint(5, size = (1,total_frames,5))
middle = int(math.ceil(total_frames/2))
train_output = train_output[:,middle:middle+1,:].reshape((inserted_batch_size,1,5))
#print train_output.shape
#print len(train_input_list)
#print train_input_list[0].shape
yield (train_input_list, train_output)
print "hallo"
def test_generator():
while True:
# input = random.choice(test_files)
# h5f = h5py.File(input_test_data+'/'+input, 'r')
# test_input = h5f['test_input'][:]
# test_output = h5f['test_output'][:]
# h5f.close()
test_input = np.random.randint(100,size=((inserted_batch_size,splits*total_frames_with_deltas,window_height,3)))
test_input = test_input.reshape((inserted_batch_size,splits*total_frames_with_deltas,window_height,3))
test_input_list = np.split(test_input,splits*total_frames_with_deltas,axis=1)
#test_input_list = np.split(test_input,45,axis=3)
for i in range(len(test_input_list)):
test_input_list[i] = test_input_list[i].reshape(inserted_batch_size,window_height,3)
#for i in range(len(test_input_list)):
# test_input_list[i] = test_input_list[i].reshape(inserted_batch_size,33,window_height,1,3)
test_output = np.random.randint(5, size = (1,total_frames,5))
middle = int(math.ceil(total_frames/2))
test_output = test_output[:,middle:middle+1,:].reshape((inserted_batch_size,1,5))
yield (test_input_list, test_output)
print "hallo"
def fws():
#print "Inside"
# Params:
# batch , lr, decay , momentum, epochs
#
#Input shape: (batch_size,40,45,3)
#output shape: (1,15,50)
# number of unit in conv_feature_map = splitd
next(train_generator())
model_output = []
list_of_input = [Input(shape=(8,3)) for i in range(splits*total_frames_with_deltas)]
output = []
#Conv
skip = total_frames_with_deltas
for steps in range(total_frames_with_deltas):
conv = Conv1D(filters = 100, kernel_size = 8)
column = 0
for _ in range(splits):
#print "column " + str(column) + "steps: " + str(steps)
output.append(conv(list_of_input[(column*skip)+steps]))
column = column + 1
#print len(output)
#print splits*total_frames_with_deltas
conv = []
for section in range(splits):
column = 0
skip = splits
temp = []
for _ in range(total_frames_with_deltas):
temp.append(output[((column*skip)+section)])
column = column + 1
conv.append(Add()(temp))
#print len(conv)
output_conc = Concatenate()(conv)
#print output_conc.get_shape
output_conv = Reshape((splits, -1))(output_conc)
#print output_conv.get_shape
#Pool
pooled = MaxPooling1D(pool_size = 6, strides = 2)(output_conv)
reshape = Reshape((1,-1))(pooled)
#Fc
dense1 = Dense(units = 1024, activation = 'relu', name = "dense_1")(reshape)
#dense2 = Dense(units = 1024, activation = 'relu', name = "dense_2")(dense1)
dense3 = Dense(units = 1024, activation = 'relu', name = "dense_3")(dense1)
final = Dense(units = 5, activation = 'relu', name = "final")(dense3)
model = Model(inputs = list_of_input , outputs = final)
sgd = SGD(lr=0.1, decay=1e-1, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd , metrics = ['accuracy'])
print "compiled"
model_yaml = model.to_yaml()
with open("model.yaml", "w") as yaml_file:
yaml_file.write(model_yaml)
print "Model saved!"
log= CSVLogger('/home/carl/kaldi-trunk/dnn/experimental/yesno_cnn_50_training_total_frames_'+str(total_frames)+"_dim_"+str(dim)+"_window_height_"+str(window_height)+".csv")
filepath='yesno_cnn_50_training_total_frames_'+str(total_frames)+"_dim_"+str(dim)+"_window_height_"+str(window_height)+"weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_weights_only=True, mode='max')
print "log"
#plot_model(model, to_file='model.png')
print "Fit"
hist_current = model.fit_generator(train_generator(),
steps_per_epoch=444,#len(train_files),
epochs = 10000,
verbose = 1,
validation_data = test_generator(),
validation_steps=44,#len(test_files),
pickle_safe = True,
workers = 4,
callbacks = [log,checkpoint])
fws()
Execute the script by: python name_of_script.py yens 50 40 8 1
which give me a full traceback:
full traceback
Error:
carl#ca-ThinkPad-T420s:~/Dropbox$ python mini.py yesno 50 40 8 1
Using TensorFlow backend.
Couldn't import dot_parser, loading of dot files will not be possible.
hallo
hallo
hallo
compiled
Model saved!
log
Fit
/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:2252: UserWarning: Expected no kwargs, you passed 1
kwargs passed to function are ignored with Tensorflow backend
warnings.warn('\n'.join(msg))
Epoch 1/10000
2017-05-26 13:01:45.851125: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-26 13:01:45.851345: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-26 13:01:45.851392: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
443/444 [============================>.] - ETA: 4s - loss: 100.1266 - acc: 0.3138Epoch 00000: saving model to yesno_cnn_50_training_total_frames_50_dim_40_window_height_8weights-improvement-00-0.48.hdf5
Traceback (most recent call last):
File "mini.py", line 205, in <module>
File "mini.py", line 203, in fws
File "/usr/local/lib/python2.7/dist-packages/keras/legacy/interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1933, in fit_generator
callbacks.on_epoch_end(epoch, epoch_logs)
File "/usr/local/lib/python2.7/dist-packages/keras/callbacks.py", line 77, in on_epoch_end
callback.on_epoch_end(epoch, logs)
File "/usr/local/lib/python2.7/dist-packages/keras/callbacks.py", line 411, in on_epoch_end
self.model.save_weights(filepath, overwrite=True)
File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 2503, in save_weights
save_weights_to_hdf5_group(f, self.layers)
File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 2746, in save_weights_to_hdf5_group
f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers]
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2684)
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2642)
File "/usr/local/lib/python2.7/dist-packages/h5py/_hl/attrs.py", line 93, in __setitem__
self.create(name, data=value, dtype=base.guess_dtype(value))
File "/usr/local/lib/python2.7/dist-packages/h5py/_hl/attrs.py", line 183, in create
attr = h5a.create(self._id, self._e(tempname), htype, space)
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2684)
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2642)
File "h5py/h5a.pyx", line 47, in h5py.h5a.create (/tmp/pip-4rPeHA-build/h5py/h5a.c:1904)
RuntimeError: Unable to create attribute (Object header message is too large)
If you look at the amount of data Keras is trying to save under layer_names attribute (inside the output HDF5 file being create), you will find that it takes more than 64kB.
np.asarray([layer.name.encode('utf8') for layer in model.layers]).nbytes
>> 77100
I quote from https://support.hdfgroup.org/HDF5/faq/limits.html:
Is there an object header limit and how does that affect HDF5 ?
There is a limit (in HDF5-1.8) of the object header, which is 64 KB.
The datatype for a dataset is stored in the object header, so there is
therefore a limit on the size of the datatype that you can have. (See
HDFFV-1089)
The code above was (almost entirely) copied from the traceback:
File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 2746, in save_weights_to_hdf5_group
f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers]
I am using numpy asarray method to get the figure fast but h5py gets similar figure (I guess), see https://github.com/h5py/h5py/blob/master/h5py/_hl/attrs.py#L102 if you want to find exact figure.
Anyway, either you will need to implement your own methods for saving/loading of the weights (or use existing workarounds), or you need to give a really short name to ALL the layers inside your model :), something like this:
list_of_input = [Input(shape=(8,3), name=('i%x' % i)) for i in range(splits*total_frames_with_deltas)]
conv = Conv1D(filters = 100, kernel_size = 8, name='cv%x' % steps)
conv.append(Add(name='add%x' % section)(temp))
output_conc = Concatenate(name='ct')(conv)
output_conv = Reshape((splits, -1), name='rs1')(output_conc)
pooled = MaxPooling1D(pool_size = 6, strides = 2, name='pl')(output_conv)
reshape = Reshape((1,-1), name='rs2')(pooled)
dense1 = Dense(units = 1024, activation = 'relu', name = "d1")(reshape)
dense2 = Dense(units
= 1024, activation = 'relu', name = "d2")(dense1)
dense3 = Dense(units = 1024, activation = 'relu', name = "d3")(dense1)
final = Dense(units = 5, activation = 'relu', name = "fl")(dense3)
You mustn't forget to name all the layers because the (numpy) string array into which the layer names are converted is using the size of the longest string for each individual string in it when it is saved!
After renaming the layers as proposed above (which takes almost 26kB) the model is saved successfully. Hope this elaborate answer helps someone.
Update: I have just made a PR to Keras which should fix the issue without implementing any custom loading/saving methods, see 7508
A simple solution, albeit possibly not the most elegant, could be to run a while loop with epochs = 1.
Get the weights at the end of every epoch together with the accuracy and the loss
Save the weights to file 1 with model.get_weight
if accuracy is greater than at the previous epoch (i.e. loop), store the weights to a different file (file 2)
Run the loop again loading the weights from file 1
Break the loops setting a manual early stopping so that it breaks if the loss does not improve for a certain number of loops
You can use get_weights() together with numpy.save.
It's not the best solution, because it will save several files, but it actually works.
The problem is that you won't have the "optimizer" saved with the current states. But you can perhaps work around that by using smaller learning rates after loading.
Custom callback using numpy.save:
def myCallback(epoch,logs):
global storedLoss
#do your comparisons here using the "logs" var.
print(logs)
if (logs['loss'] < storedLoss):
storedLoss = logs['loss']
for i in range(len(model.layers)):
WandB = model.layers[i].get_weights()
if len (WandB) > 0: #necessary because some layers have no weights
np.save("W" + "-" + str(i), WandB[0],False)
np.save("B" + "-" + str(i), WandB[1],False)
#remember that get and set weights use a list: [weights,biases]
#it may happen (not sure) that there is no bias, and thus you may have to check it (len(WandB)==1).
The logs var brings a dictionary with named metrics, such as "loss", and "accuracy", if you used it.
You can store the losses within the callback in a global var, and compare if each loss is better or worse than the last.
When fitting, use the lambda callback:
from keras.callbacks import LambdaCallback
model.fit(...,callbacks=[LambdaCallback(on_epoch_end=myCallback)])
In the example above, I used the LambdaCallback, which has more possibilities than just on_epoch_end.
For loading, do a similar loop:
#you have to create the model first and then set the layers
def loadModel(model):
for i in range(len(model.layers)):
WandBForCheck = model.layers[i].get_weights()
if len (WandBForCheck) > 0: #necessary because some layers have no weights
W = np.load(Wfile + str(i))
B = np.load(Bfile + str(i))
model.layers[i].set_weights([W,B])
See follow-up at https://github.com/fchollet/keras/issues/6766 and https://github.com/farizrahman4u/keras-contrib/pull/90.
I saw the YAML and the root cause is probably that you have so many Inputs. A few Inputs with many dimensions is preferred to many Inputs, especially if you can use scanning and batch operations to do everything efficiently.
Now, ignoring that entirely, here is how you can save and load your model if it has too much stuff to save as JSON efficiently:
You can pass save_weights_only=True. That won't save optimizer weights, so isn't a great solution.
Just put together a PR for saving model weights and optimizer weights but not configuration. When you want to load, first instantiate and compile the model as you did when you were going to train it, then use load_all_weights to load the model and optimizer weights into that model. I'll try to merge it soon so you can use it from the master branch.
You could use it something like this:
from keras.callbacks import LambdaCallback
from keras_contrib.utils.save_load_utils import save_all_weights, load_all_weights
# do some stuff to create and compile model
# use `save_all_weights` as a callback to checkpoint your model and optimizer weights
model.fit(..., callbacks=[LambdaCallback(on_epoch_end=lambda epoch, logs: save_all_weights(model, "checkpoint-{:05d}.h5".format(epoch))])
# use `load_all_weights` to load model and optimizer weights into an existing model
# if not compiled (no `model.optimizer`), this will just load model weights
load_all_weights(model, 'checkpoint-1337.h5')
So I don't endorse the model, but if you want to get it to save and load anyways this should probably work for you.
As a side note, if you want to save weights in a different format, something like this would work.
pickle.dump([K.get_value(w) for w in model.weights], open( "save.p", "wb" ) )
Cheers
Your model architecture must be too large to be saved.
USE get_weights AND set_weights TO SAVE AND LOAD MODEL, RESPECTIVELY.
Do not use callback model checkpoint. just once the training ends, save its weights with pickle.
Have a look at this link: Unable to save DataFrame to HDF5 ("object header message is too large")

Simpy: How can I represent failures in a train subway simulation?

New python user here and first post on this great website. I haven't been able to find an answer to my question so hopefully it is unique.
Using simpy I am trying to create a train subway/metro simulation with failures and repairs periodically built into the system. These failures happen to the train but also to signals on sections of track and on plaforms. I have read and applied the official Machine Shop example (which you can see resemblance of in the attached code) and have thus managed to model random failures and repairs to the train by interrupting its 'journey time'.
However I have not figured out how to model failures of signals on the routes which the trains follow. I am currently just specifying a time for a trip from A to B, which does get interrupted but only due to train failure.
Is it possible to define each trip as its own process i.e. a separate process for sections A_to_B and B_to_C, and separate platforms as pA, pB and pC. Each one with a single resource (to allow only one train on it at a time) and to incorporate random failures and repairs for these section and platform processes? I would also need to perhaps have several sections between two platforms, any of which could experience a failure.
Any help would be greatly appreciated.
Here's my code so far:
import random
import simpy
import numpy
RANDOM_SEED = 1234
T_MEAN_A = 240.0 # mean journey time
T_MEAN_EXPO_A = 1/T_MEAN_A # for exponential distribution
T_MEAN_B = 240.0 # mean journey time
T_MEAN_EXPO_B = 1/T_MEAN_B # for exponential distribution
DWELL_TIME = 30.0 # amount of time train sits at platform for passengers
DWELL_TIME_EXPO = 1/DWELL_TIME
MTTF = 3600.0 # mean time to failure (seconds)
TTF_MEAN = 1/MTTF # for exponential distribution
REPAIR_TIME = 240.0
REPAIR_TIME_EXPO = 1/REPAIR_TIME
NUM_TRAINS = 1
SIM_TIME_DAYS = 100
SIM_TIME = 3600 * 18 * SIM_TIME_DAYS
SIM_TIME_HOURS = SIM_TIME/3600
# Defining the times for processes
def A_B(): # returns processing time for journey A to B
return random.expovariate(T_MEAN_EXPO_A) + random.expovariate(DWELL_TIME_EXPO)
def B_C(): # returns processing time for journey B to C
return random.expovariate(T_MEAN_EXPO_B) + random.expovariate(DWELL_TIME_EXPO)
def time_to_failure(): # returns time until next failure
return random.expovariate(TTF_MEAN)
# Defining the train
class Train(object):
def __init__(self, env, name, repair):
self.env = env
self.name = name
self.trips_complete = 0
self.broken = False
# Start "travelling" and "break_train" processes for the train
self.process = env.process(self.running(repair))
env.process(self.break_train())
def running(self, repair):
while True:
# start trip A_B
done_in = A_B()
while done_in:
try:
# going on the trip
start = self.env.now
yield self.env.timeout(done_in)
done_in = 0 # Set to 0 to exit while loop
except simpy.Interrupt:
self.broken = True
done_in -= self.env.now - start # How much time left?
with repair.request(priority = 1) as req:
yield req
yield self.env.timeout(random.expovariate(REPAIR_TIME_EXPO))
self.broken = False
# Trip is finished
self.trips_complete += 1
# start trip B_C
done_in = B_C()
while done_in:
try:
# going on the trip
start = self.env.now
yield self.env.timeout(done_in)
done_in = 0 # Set to 0 to exit while loop
except simpy.Interrupt:
self.broken = True
done_in -= self.env.now - start # How much time left?
with repair.request(priority = 1) as req:
yield req
yield self.env.timeout(random.expovariate(REPAIR_TIME_EXPO))
self.broken = False
# Trip is finished
self.trips_complete += 1
# Defining the failure
def break_train(self):
while True:
yield self.env.timeout(time_to_failure())
if not self.broken:
# Only break the train if it is currently working
self.process.interrupt()
# Setup and start the simulation
print('Train trip simulator')
random.seed(RANDOM_SEED) # Helps with reproduction
# Create an environment and start setup process
env = simpy.Environment()
repair = simpy.PreemptiveResource(env, capacity = 1)
trains = [Train(env, 'Train %d' % i, repair)
for i in range(NUM_TRAINS)]
# Execute
env.run(until = SIM_TIME)
# Analysis
trips = []
print('Train trips after %s hours of simulation' % SIM_TIME_HOURS)
for train in trains:
print('%s completed %d trips.' % (train.name, train.trips_complete))
trips.append(train.trips_complete)
mean_trips = numpy.mean(trips)
std_trips = numpy.std(trips)
print "mean trips: %d" % mean_trips
print "standard deviation trips: %d" % std_trips
it looks like you are using Python 2, which is a bit unfortunate, because
Python 3.3 and above give you some more flexibility with Python generators. But
your problem should be solveable in Python 2 nonetheless.
you can use sub processes within in a process:
def sub(env):
print('I am a sub process')
yield env.timeout(1)
# return 23 # Only works in py3.3 and above
env.exit(23) # Workaround for older python versions
def main(env):
print('I am the main process')
retval = yield env.process(sub(env))
print('Sub returned', retval)
As you can see, you can use Process instances returned by Environment.process()
like normal events. You can even use return values in your sub proceses.
If you use Python 3.3 or newer, you don’t have to explicitly start a new
sub-process but can use sub() as a sub routine instead and just forward the
events it yields:
def sub(env):
print('I am a sub routine')
yield env.timeout(1)
return 23
def main(env):
print('I am the main process')
retval = yield from sub(env)
print('Sub returned', retval)
You may also be able to model signals as resources that may either be used
by failure process or by a train. If the failure process requests the signal
at first, the train has to wait in front of the signal until the failure
process releases the signal resource. If the train is aleady passing the
signal (and thus has the resource), the signal cannot break. I don’t think
that’s a problem be cause the train can’t stop anyway. If it should be
a problem, just use a PreemptiveResource.
I hope this helps. Please feel welcome to join our mailing list for more
discussions.

Simpy subway simulation: how to fix interrupt failure of class train while queueing for a resource?

I am working on a train simulation in simpy and have had success so far with a single train entity following the code below.
The trains processes are sections followed by platforms. Each section and platform has a resource of 1 to ensure that only one train utilises at a time.
However I can't find a way to get around the error below:
When I add in a second train to the simulation there is occasionally the situation where one train waits for an unavailable resource and then a failure occurs on that train while it is waiting.
I end up with an Interrupt: Interrupt() error.
Is there a way around these failing queues for resources?
Any help is much appreciated.
import random
import simpy
import numpy
# Configure parameters for the model
RANDOM_SEED = random.seed() # makes results repeatable
T_MEAN_SECTION = 200.0 # journey time (seconds)
DWELL_TIME = 30.0 # dwell time mean (seconds)
DWELL_TIME_EXPO = 1/DWELL_TIME # for exponential distribution
MTTF = 600.0 # mean time to failure (seconds)
TTF_MEAN = 1/MTTF # for exponential distribution
REPAIR_TIME = 120.0 # mean repair time for when failure occurs (seconds)
REPAIR_TIME_EXPO = 1/REPAIR_TIME # for exponential distribution
NUM_TRAINS = 2 # number of trains to simulate
SIM_TIME_HOURS = 1 # sim time in hours
SIM_TIME_DAYS = SIM_TIME_HOURS/18.0 # number of days to simulate
SIM_TIME = 3600 * 18 * SIM_TIME_DAYS # sim time in seconds (this is used in the code below)
# Defining the times for processes
def Section(): # returns processing time for platform 7 Waterloo to 26 Bank
return T_MEAN_SECTION
def Dwell(): # returns processing time for platform 25 Bank to platform 7 Waterloo
return random.expovariate(DWELL_TIME_EXPO)
def time_to_failure(): # returns time until next failure
return random.expovariate(TTF_MEAN)
# Defining the train
class Train(object):
def __init__(self, env, name, repair):
self.env = env
self.name = name
self.trips_complete = 0
self.num_saf = 0
self.sum_saf = 0
self.broken = False
# Start "running" and "downtime_train" processes for the train
self.process = env.process(self.running(repair))
env.process(self.downtime_train())
def running(self, repair):
while True:
# request section A
request_SA = sectionA.request()
########## SIM ERROR IF FAILURE OCCURS HERE ###########
yield request_SA
done_in_SA = Section()
while done_in_SA:
try:
# going on the trip
start = self.env.now
print('%s leaving platform at time %d') % (self.name, env.now)
# processing time
yield self.env.timeout(done_in_SA)
# releasing the section resource
sectionA.release(request_SA)
done_in_SA = 0 # Set to 0 to exit while loop
except simpy.Interrupt:
self.broken = True
delay = random.expovariate(REPAIR_TIME_EXPO)
print('Oh no! Something has caused a delay of %d seconds to %s at time %d') % (delay, self.name, env.now)
done_in_SA -= self.env.now - start # How much time left?
with repair.request(priority = 1) as request_D_SA:
yield request_D_SA
yield self.env.timeout(delay)
self.broken = False
print('Okay all good now, failure fixed on %s at time %d') % (self.name, env.now)
self.num_saf += 1
self.sum_saf += delay
# request platform A
request_PA = platformA.request()
########## SIM ERROR IF FAILURE OCCURS HERE ###########
yield request_PA
done_in_PA = Dwell()
while done_in_PA:
try:
# platform process
start = self.env.now
print('%s arriving to platform A and opening doors at time %d') % (self.name, env.now)
yield self.env.timeout(done_in_PA)
print('%s closing doors, ready to depart platform A at %d\n') % (self.name, env.now)
# releasing the platform resource
platformA.release(request_PA)
done_in_PA = 0 # Set to 0 to exit while loop
except simpy.Interrupt:
self.broken = True
delay = random.expovariate(REPAIR_TIME_EXPO)
print('Oh no! Something has caused a delay of %d seconds to %s at time %d') % (delay, self.name, env.now)
done_in_PA -= self.env.now - start # How much time left?
with repair.request(priority = 1) as request_D_PA:
yield request_D_PA
yield self.env.timeout(delay)
self.broken = False
print('Okay all good now, failure fixed on %s at time %d') % (self.name, env.now)
self.num_saf += 1
self.sum_saf += delay
# Round trip is finished
self.trips_complete += 1
# Defining the failure event
def downtime_train(self):
while True:
yield self.env.timeout(time_to_failure())
if not self.broken:
# Only break the train if it is currently working
self.process.interrupt()
# Setup and start the simulation
print('Train trip simulator')
random.seed(RANDOM_SEED) # Helps with reproduction
# Create an environment and start setup process
env = simpy.Environment()
# Defining resources
platformA = simpy.Resource(env, capacity = 1)
sectionA = simpy.Resource(env, capacity = 1)
repair = simpy.PreemptiveResource(env, capacity = 10)
trains = [Train(env, 'Train %d' % i, repair)
for i in range(NUM_TRAINS)]
# Execute
env.run(until = SIM_TIME)
Your processes request a resource and never release it. That’s why the second trains waits forever for its request to succeed. While it is waiting, the failure process seems to interrupt the process. That’s why you get an error. Please read the guide to resources to understand how SimPy’s resources work and, especially, how to release a resource when you are done.