Theano ANN "TypeError: randint() takes at least 1 positional argument (0 given)" - neural-network

This is the error that I'm receiving:
File "mtrand.pyx",line 1192, in mtrand.RandomState.randint(numpy/random/mtrand/mtrand.c:14128)
I am somewhat new to coding, but I really want to get started with simple ANNs so I decided to start this project.
TypeError: randint() takes at least 1 positional argument (0 given)
# -- coding: utf-8 --
"""
Created on Sun Sep 18 14:56:44 2016
#author: Jamoonie
"""
##theano practice
import numpy as np
import theano
import theano.tensor as T
from sklearn.datasets import load_digits
digits=load_digits()
print (digits.data.shape)
train_x = list(digits.data)
#print train_x.count
train_x = np.array(train_x)
#print train_x
train_y = list(digits.target)
#print train_y.count
train_y = np.array(train_y)
#print train_y
#q = T.matrix('q') checking how matrix dot products work, and how the row,col of the W0 should be set up
#q = np.zeros([5,10])
#print q
#p = T.matrix('p')
#p = np.zeros([10,5])
#
#print np.dot(q,p)
nn_input_dim = train_x.shape[1] ## if shape[0] it yields 1797, which is the number of rows
print nn_input_dim ##shows 64; shape[1] yields 1 row thus 64 columns! which are the layers of data we want to apply
nn_hdim0 = 10
nn_output_dim = len(train_y)
#nn_hdim0 = np.transpose(np.zeros(digits.data.shape))
#print nn_hdim0
epsilon = 0.008
batch_size = 100 ## how much data input per iteration
X = T.matrix('X')
y = T.lvector('y')
## set weight shapeswith random values
#W0 = np.transpose(np.zeros(digits.data.shape))
W0 = theano.shared(np.random.randn(nn_input_dim,nn_hdim0),name='W0') ##the shape of W0 should be row=input_dim, col=# hidden nodes
b0 = theano.shared(np.zeros(nn_hdim0),name='b0')
W1 = theano.shared(np.random.randn(nn_hdim0,nn_output_dim),name='W1') ## shape of W1 should have row=#hidden nodes, col = output dimension
b1 = theano.shared(np.zeros(nn_output_dim),name='b1')
z0 = X.dot(W0)+b0
a0 = T.nnet.softmax(z0) ## first hidden layer result
z1 = a0.dot(W1)+b1
a1 = T.nnet.softmax(z1) ## final result or prediction
loss = T.nnet.categorical_crossentropy(a1,y).mean() ## howmuch the prediction differrs from the real result
prediction = T.argmax(a1,axis=1) ## the maximum values of a1, presented in index posn 1
fwd_propagation = theano.function([X],a1) ## forward propagation function dpeneding on the array of X values and final prediction
calc_loss = theano.function([X,y],loss)
predict= theano.function([X],prediction)
accuracy = theano.function([X],T.sum(T.eq(prediction,train_y))) ## T.eq is elementwise. so this does an elementwise sum of prediction and train_y
dW0 = T.grad(loss,W0)
dW1 = T.grad(loss,W1)
db0=T.grad(loss,b0)
db1=T.grad(loss,b1)
np.random.randint()
gradient_step = theano.function(
[X,y], ##for each set of X,y values
updates=((W1,W1-epsilon*dW1), ##updates W1 by deltaW1(error)*learning rate and subtracting from original W1
(W0,W0-epsilon*dW0),
(b1,b1-epsilon*db1),
(b0,b0-epsilon*db0)))
def build(iterations = 80000):
W1.set_value(np.random.randn(nn_hdim0,nn_output_dim)/np.sqrt(nn_input_dim)) ## why dividing by the sqrt of nn_input_dim,i'm not sure, but they're meant to be random anyway.
W0.set_value(np.random.randn(nn_input_dim,nn_hdim0)/np.sqrt(nn_input_dim))
b1.set_value(np.zeros(nn_output_dim))
b0.set_value(np.zeros(nn_hdim0))
for i in range(0, iterations):
batch_indicies=np.random.randint(0,17,size=100)
batch_x,batch_y=train_x[batch_indicies],train_y[batch_indicies]
gradient_step(batch_x,batch_y)
##so we're providing the values now for the weights, biases and input output values
if i%2000==0:
print("loss after iteration %r: %r" % (i, calc_loss(train_x,train_y)))
print(accuracy(train_x))
if i==80000:
print (W0,b0,W1,b1)
build()

As per the documentation, you need to at-least specify the lowest value of integer to be drawn from the distribution. If you want a random number less than 213 (to be exact between 0 and 213) then you would do r = np.random.randint(213), and if you want a random number between some range let's say 213 and 537 then you would do, r = np.random.randint(213, 537). Also you are trying to get a random number from randint(..) without even storing it to any variable (or passing to any function), which is useless. I would suggest going through basic Theano tutorials to get started, start from here.

Related

How can I fix tensor dimension matching error (with 1 unit difference)

I'm trying to run my code for Graph Convolution Network (GCN) in PyTorch with several .csv input files, but I get error below:
RuntimeError: The expanded size of the tensor (732) must match the existing size (731) at non-singleton dimension 0. Target sizes: [732]. Tensor sizes: [731]
here is my code:
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from sklearn.metrics import r2_score
import numpy as np
import datetime
import dgl.function as fn
# Below are the graph convolution functions:
# (where each node collects information about nearby nodes)
def gcn_message(edges):
return {'msg' : edges.src['h']}
def gcn_reduce(nodes):
return {'h' : torch.sum(nodes.mailbox['msg'], dim=1)}
# Below is the pytorch module that defines the operations at each graph convolution layer
class gcnLayer(nn.Module):
def __init__(self, in_feats, out_feats):
super(gcnLayer, self).__init__()
self.linear = nn.Linear(in_feats*2, out_feats)
def forward(self, g, inputs):
with g.local_scope():
g.ndata['h'] = inputs # inputs: POI features
print(g.ndata['h'])
g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N'))
h_N=g.ndata['h_N']
h_total = torch.cat([inputs, h_N], dim=1) # Result (Convoluted POIs) of convolution at a layer is extracted
return self.linear(h_total) # Result is linearly transformed
# Below is the pytorch class (machine learning architectures are initiliazed as classes)
# that defines the the graph convolutional network (GCN) architecture (number of hidden layers, neurons, activation function, etc)
class gcn(torch.nn.Module):
def __init__(self, input, hidden, output):
super(gcn, self).__init__()
# Initially each row in the input has (input) number of elements.
#In other words, each node in the network has (input number of features, i.e.: number of POI types)
self.gcnInput = gcnLayer(input,hidden) # Input size is converted into hidden size
self.gcnHidden = gcnLayer(hidden,hidden) # Hidden size is converted into hidden size
self.gcnOutput = gcnLayer(hidden,output) # Hidden size is converted into desired output size
# Forward function: this function is run when we call the class
def forward(self, g, pois):
y = F.relu(self.gcnInput(g, pois)) # Result of the input layer is sent through activation function
y = F.relu(self.gcnHidden(g, y)) # Result of the hidden layer is sent through activation function
y = F.relu(self.gcnHidden(g, y)) # Result of the hidden layer is sent through activation function (Here, an arbitrary amount of hidden layers can be added)
y = self.gcnOutput(g, y) # Result of the output layer (not activated)
return y
# Below is the pytorch class that defines the the multilayer perceptron (MLP) architecture
# (number of hidden layers, neurons, activation function, etc)
class mlp(torch.nn.Module):
def __init__(self, input, hidden):
super(mlp, self).__init__() #initialize
self.classifier = nn.Sequential( # Sequential is used when combining different layers
nn.Linear(input, hidden), # Input feature matrix is converted into a matrix with shape (hidden) and linearly transformated
nn.ReLU(), # Activation function is applied
nn.Linear(hidden, hidden), # Result of previous layer is linearly transformaed
nn.ReLU(), # Activation function is applied
nn.Linear(hidden, 1)) # At the final layer, one output is given (Trip amount)
def forward(self, x):
x = self.classifier(x) # the input is sent throught the MLP architecture defined above
return x
# Below is the pytorch class that defines the the the combined deep learning architecture
class od(nn.Module):
def __init__(self, gcnInput, gcnHidden, gcnOutput, mlpHidden):
super(od, self).__init__()
self.gcn = gcn(gcnInput, gcnHidden,gcnOutput) # First: GCN
self.mlp = mlp((2*gcnoutput+1), mlpHidden) # Afterwards: MLP
def forward(self, g, pois, costs, indices, q, zoneCount):
y = self.gcn(g,pois) # First, send the input through GCN
p = torch.zeros(len(costs),2*q).cuda() # Prepare a matrix that will have the POI output at origin (size: q), POI output at destination (size: q)
count = 0
for i in range(zoneCount):
for j in range(zoneCount):
p[count][:q] = y[i][:] # POI output at origin (size: q)
p[count][q:] = y[j][:] # POI output at destination (size: q)
count +=1
p = p[indices][:] # Order the input matrix in the order of shuffled zones (or OD pairs)
costs = costs[indices][:] # Order the cost matrix in the order of shuffled zones (or OD pairs)
inputs = torch.cat((p, costs), 1).cuda() # Combine POI and cost matrices
y = self.mlp(inputs) # Last, send through MLP
return y
def train(optimizer, model, criterion, pois, costs, labels, indices, zoneCount, gcnOutput):
model.train() # Model is in the training mode (meaning gradients are calculated)
optimizer.zero_grad() # Gradients are zeroed
print(optimizer)
pred = model(g, pois, costs, indices, gcnOutput, zoneCount) # Get model output as predicted output
loss = criterion(pred, labels) # Calculate loss between prediction and label
loss.backward() # Backpropagate the gradients
optimizer.step() # (I dont fully know what happens with this code)
return loss.item() # Return loss
def test(model, pois, costs, labels, indices, zoneCount, gcnOutput):
model.eval() # Mode is in evaluation mode: no gradients are calcualted
with torch.no_grad(): # In tensorflow if tensor has a parameter "autograd:true" then, gradients are calculated. This code sets the autograd to false for all tensors below
pred = model(g, pois, costs, indices,gcnOutput, zoneCount) # Get prediction
predictions = pred.detach().cpu() # Move prediction tensor from GPU to CPU
r2 = r2_score(labels.cpu(), predictions) # Calculate R2
return r2
def data_collection(key): #Below part gets the data from the files into the program (POIS, nodes, costs, labels). If the file types are different than the ones used in this research, this part should be adjusted.
if key == "mb": #mb: manhattan and brooklyn case
no = 3
else:
no = 2
with open("/nodes.csv".format(key)) as f:
nodeCount = sum(1 for line in f)
print (nodeCount)
with open("/poisInfo.csv".format(key)) as f:
poiCount = sum(1 for line in f)
print(poiCount)
with open("/zones.csv".format(key)) as f:
zoneCount = sum(1 for line in f)
print(zoneCount)
pois = torch.zeros((nodeCount,poiCount)).cuda()
print(pois)
i = 0
with open('/nodes.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
print(row)
pois[i][:] = torch.FloatTensor([int(i) for i in row[no:]])
i += 1
costs = torch.zeros((zoneCount*zoneCount,1)).cuda()
labels = torch.zeros((zoneCount*zoneCount,1)).cuda()
count = 0
with open('/costsTrips.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
costs[count][0] = int(row[2])
labels[count][0] = int(row[3])
count += 1
g = dgl.DGLGraph().to(torch.device('cuda:0')) # dgl: deep graph learning library: We move POIs to the graph for graph convolution
print (nodeCount)
g.add_nodes(nodeCount) # Add nodes to the graph
print (nodeCount)
print (g.number_of_nodes)
with open('/edges.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
g.add_edge(int(row[0]), int(row[1])) # If edge exists between 2 nodes, add edge
print('We have %d nodes.' % g.number_of_nodes())
print('We have %d edges.' % g.number_of_edges())
return([g, pois, labels,costs, zoneCount, poiCount])
gcnoutput = 10
keys = ["manhattan", "brooklyn", "mb"]
count = 0
with open("costFinal.csv", mode='w', newline="") as wx:
w = csv.writer(wx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
w.writerow(["place", "iteration", "split", "r2"])
for key in keys:
[g, pois, labels, costs, zoneCount, poiCount] = data_collection(key)
for iteration in range(1,11): # We test each split ratio with 10 times to get the average
a = np.random.permutation(zoneCount) # randomize the zones
for i in range(1,10):
split = i/10 # Below lines split the training and test subsets
breaker = int(split * zoneCount)
train_zones = a[:breaker]
test_zones = a[breaker:]
train_indices = []
test_indices = []
for z in train_zones:
train_indices += [j for j in range(z * zoneCount, z * zoneCount + zoneCount)]
for z in test_zones:
test_indices += [j for j in range(z * zoneCount, z * zoneCount + zoneCount)]
# model parameters: gcninput, gcnhidden, gcnoutput, mlphidden
model = od(poiCount, 64, gcnoutput, 64).cuda() # construct the model
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # optimizer: adam optimizer
print(optimizer)
criterion = torch.nn.MSELoss() # loss: mean squared error loss
print(criterion)
for epoch in range(1, 11): # Train the algorithm 500 epochs
print (epoch)
loss = train(optimizer, model, criterion, pois, costs, labels[train_indices], train_indices, zoneCount, gcnoutput)
# print(count, datetime.datetime.now() - start, key, iteration, i, epoch, loss)
count += 1
r2 = test(model, pois, costs, labels[test_indices], test_indices, zoneCount, gcnoutput) # At the end of the algorithm, test the model and get r2
w.writerow([key, iteration, i*10, r2]) # write key[manhattan,brooklyn,manhattan and brooklyn], iteration[0...9], split ratio[10%...90%], r2 to the file

Precison issue with sigmoid activation function for Tensorflow/Keras 2.3.1

Assuming the following forward pass in a classic ANN
(Based on https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/):
Now let's use a sigmoid activation on that, I get:
So far so good, now let's check the result of this calculation in python:
1 / (1+ math.exp(-0.3775)) # ... = 0.5932699921071872, OK
However this is double precision and since Keras uses float32 let's calculate the same thing but with float32, I get:
w1 = np.array(0.15, dtype=np.float32)
i1 = np.array(0.05, dtype=np.float32)
w2 = np.array(0.2, dtype=np.float32)
i2 = np.array(0.1, dtype=np.float32)
b1 = np.array(0.35, dtype=np.float32)
k1 = np.array(1, dtype=np.float32)
k2= np.array(1, dtype=np.float32)
n1 = w1 * i1 + w2 * i2 + b1 * k1
np.array(k2 / (1 + np.exp(-n1)), dtype=np.float32) # --->array(0.59327, dtype=float32)
Ok so normally I should be able to get 0.59327 for our out_{h1} using Keras float32 framework.
Let's now try to get this result using Keras:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
import numpy as np
model = Sequential()
model.add(Dense(2,activation='sigmoid')) # Build only one layer (enough for example)
model.compile(optimizer = SGD(lr = 0.5),loss='mse') # Don't really care for this example, simply for compilation
input = np.array([[0.05, 0.10]]) # Set input, same as the ones provided in the example
output = np.array([[0.01, 0.99]]) # Don't really care for this example since we want to check activation only
weights = [np.array([[0.15, 0.20 ], # Required for set_weights
[0.25, 0.30]], dtype=np.float32),
np.array([0.35, 0.35], dtype=np.float32)]
model.build(input.shape) # Required for set_weights
model.set_weights(weights) # Set the weights to be the same as the one provided in the example
model.predict(np.array([[0.05, 0.10]], dtype=np.float32)) # This can be seen as out_{h1}
# array([[0.5944759 , 0.59628266]], dtype=float32)
# NOK: 0.5944759 != 0.59327
Can someone explain to me why I get 0.5944759 instead of 0.59327? The result seem far from the expected ouput and if possible provide an example of calculation and/or a way to get the expected output of 0.59327.
Please note this example was done using:
tensorflow 2.3.1
numpy 1.18.5
python 3.8.12
Thx for your help.
Tensorflow perform the Dense layer row to column:
It multiplies each row of your input with a column in the weights matrix, You need to transpose your weights matrix, if you do it you will get the correct result.
I validated using your code just modified:
weights = [np.array([[0.15, 0.20], # Required for set_weights
[0.2, 0.30]], dtype=np.float32),
np.array([0.35, 0.35], dtype=np.float32)]
To (pay attention to the transpose operator T:
weights = [np.array([[0.15, 0.20], # Required for set_weights
[0.2, 0.30]], dtype=np.float32).T,
np.array([0.35, 0.35], dtype=np.float32)]

I want to use Numpy to simulate the inference process of a quantized MobileNet V2 network, but the outcome is different with pytorch realized one

Python version: 3.8
Pytorch version: 1.9.0+cpu
Platform: Anaconda Spyder5.0
To reproduce this problem, just copy every code below to a single file.
The ILSVRC2012_val_00000293.jpg file used in this code is shown below, you also need to download it and then change its destination in the code.
Some background of this problem:
I am now working on a project that aims to develop a hardware accelerator to complete the inference process of the MobileNet V2 network. I used pretrained quantized Pytorch model to simulate the outcome, and the result comes out very well.
In order to use hardware to complete this task, I wish to know every inputs and outputs as well as intermidiate variables during runing this piece of pytorch code. I used a package named torchextractor to fetch the outcomes of first layer, which in this case, is a 3*3 convolution layer.
import numpy as np
import torchvision
import torch
from torchvision import transforms, datasets
from PIL import Image
from torchvision import transforms
import torchextractor as tx
import math
#########################################################################################
##### Processing of input image
#########################################################################################
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
test_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,])
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
#image file destination
filename = "D:\Project_UM\MobileNet_VC709\MobileNet_pytorch\ILSVRC2012_val_00000293.jpg"
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)
#########################################################################################
#########################################################################################
#########################################################################################
#----First verify that the torchextractor class should not influent the inference outcome
# ofmp of layer1 before putting into torchextractor
a,b,c = quantize_tensor(input_batch)# to quantize the input tensor and return an int8 tensor, scale and zero point
input_qa = torch.quantize_per_tensor(torch.tensor(input_batch.clone().detach()), b, c, torch.quint8)# Using quantize_per_tensor method of torch
# Load a quantized mobilenet_v2 model
model_quantized = torchvision.models.quantization.mobilenet_v2(pretrained=True, quantize=True)
model_quantized.eval()
with torch.no_grad():
output = model_quantized.features[0][0](input_qa)# Ofmp of layer1, datatype : quantized_tensor
# print("FM of layer1 before tx_extractor:\n",output.int_repr())# Ofmp of layer1, datatype : int8 tensor
output1_clone = output.int_repr().detach().numpy()# Clone ofmp of layer1, datatype : ndarray
#########################################################################################
#########################################################################################
#########################################################################################
# ofmp of layer1 after adding torchextractor
model_quantized_ex = tx.Extractor(model_quantized, ["features.0.0"])#Capture of the module inside first layer
model_output, features = model_quantized_ex(input_batch)# Forward propagation
# feature_shapes = {name: f.shape for name, f in features.items()}
# print(features['features.0.0']) # Ofmp of layer1, datatype : quantized_tensor
out1_clone = features['features.0.0'].int_repr().numpy() # Clone ofmp of layer1, datatype : ndarray
if(out1_clone.all() == output1_clone.all()):
print('Model with torchextractor attached output the same value as the original model')
else:
print('Torchextractor method influence the outcome')
Here I define a numpy quantization scheme based on the quantization scheme proposed by
Quantization and Training of Neural Networks for Efficient
Integer-Arithmetic-Only Inference
# Convert a normal regular tensor to a quantized tensor with scale and zero_point
def quantize_tensor(x, num_bits=8):# to quantize the input tensor and return an int8 tensor, scale and zero point
qmin = 0.
qmax = 2.**num_bits - 1.
min_val, max_val = x.min(), x.max()
scale = (max_val - min_val) / (qmax - qmin)
initial_zero_point = qmin - min_val / scale
zero_point = 0
if initial_zero_point < qmin:
zero_point = qmin
elif initial_zero_point > qmax:
zero_point = qmax
else:
zero_point = initial_zero_point
# print(zero_point)
zero_point = int(zero_point)
q_x = zero_point + x / scale
q_x.clamp_(qmin, qmax).round_()
q_x = q_x.round().byte()
return q_x, scale, zero_point
#%%
# #############################################################################################
# --------- Simulate the inference process of layer0: conv33 using numpy
# #############################################################################################
# get the input_batch quantized buffer data
input_scale = b.item()
input_zero = c
input_quantized = a[0].detach().numpy()
# get the layer0 output scale and zero_point
output_scale = model_quantized.features[0][0].state_dict()['scale'].item()
output_zero = model_quantized.features[0][0].state_dict()['zero_point'].item()
# get the quantized weight with scale and zero_point
weight_scale = model_quantized.features[0][0].state_dict()["weight"].q_scale()
weight_zero = model_quantized.features[0][0].state_dict()["weight"].q_zero_point()
weight_quantized = model_quantized.features[0][0].state_dict()["weight"].int_repr().numpy()
# print(weight_quantized)
# print(weight_quantized.shape)
# bias_quantized,bias_scale,bias_zero= quantize_tensor(model_quantized.features[0][0].state_dict()["bias"])# to quantize the input tensor and return an int8 tensor, scale and zero point
# print(bias_quantized.shape)
bias = model_quantized.features[0][0].state_dict()["bias"].detach().numpy()
# print(input_quantized)
print(type(input_scale))
print(type(output_scale))
print(type(weight_scale))
Then I write a quantized 2D convolution using numpy, hope to figure out every details in pytorch data flow during the inference.
#%% numpy simulated layer0 convolution function define
def conv_cal(input_quantized, weight_quantized, kernel_size, stride, out_i, out_j, out_k):
weight = weight_quantized[out_i]
input = np.zeros((input_quantized.shape[0], kernel_size, kernel_size))
for i in range(weight.shape[0]):
for j in range(weight.shape[1]):
for k in range(weight.shape[2]):
input[i][j][k] = input_quantized[i][stride*out_j+j][stride*out_k+k]
# print(np.dot(weight,input))
# print(input,"\n")
# print(weight)
return np.multiply(weight,input).sum()
def QuantizedConv2D(input_scale, input_zero, input_quantized, output_scale, output_zero, weight_scale, weight_zero, weight_quantized, bias, kernel_size, stride, padding, ofm_size):
output = np.zeros((weight_quantized.shape[0],ofm_size,ofm_size))
input_quantized_padding = np.full((input_quantized.shape[0],input_quantized.shape[1]+2*padding,input_quantized.shape[2]+2*padding),0)
zero_temp = np.full(input_quantized.shape,input_zero)
input_quantized = input_quantized - zero_temp
for i in range(input_quantized.shape[0]):
for j in range(padding,padding + input_quantized.shape[1]):
for k in range(padding,padding + input_quantized.shape[2]):
input_quantized_padding[i][j][k] = input_quantized[i][j-padding][k-padding]
zero_temp = np.full(weight_quantized.shape, weight_zero)
weight_quantized = weight_quantized - zero_temp
for i in range(output.shape[0]):
for j in range(output.shape[1]):
for k in range(output.shape[2]):
# output[i][j][k] = (weight_scale*input_scale)*conv_cal(input_quantized_padding, weight_quantized, kernel_size, stride, i, j, k) + bias[i] #floating_output
output[i][j][k] = weight_scale*input_scale/output_scale*conv_cal(input_quantized_padding, weight_quantized, kernel_size, stride, i, j, k) + bias[i]/output_scale + output_zero
output[i][j][k] = round(output[i][j][k])
# int_output
return output
Here I input the same image, weight, and bias together with their zero_point and scale, then compare this "numpy simulated" result to the PyTorch calculated one.
quantized_model_out1_int8 = np.squeeze(features['features.0.0'].int_repr().numpy())
print(quantized_model_out1_int8.shape)
print(quantized_model_out1_int8)
out1_np = QuantizedConv2D(input_scale, input_zero, input_quantized, output_scale, output_zero, weight_scale, weight_zero, weight_quantized, bias, 3, 2, 1, 112)
np.save("out1_np.npy",out1_np)
for i in range(quantized_model_out1_int8.shape[0]):
for j in range(quantized_model_out1_int8.shape[1]):
for k in range(quantized_model_out1_int8.shape[2]):
if(out1_np[i][j][k] < 0):
out1_np[i][j][k] = 0
print(out1_np)
flag = np.zeros(quantized_model_out1_int8.shape)
for i in range(quantized_model_out1_int8.shape[0]):
for j in range(quantized_model_out1_int8.shape[1]):
for k in range(quantized_model_out1_int8.shape[2]):
if(quantized_model_out1_int8[i][j][k] == out1_np[i][j][k]):
flag[i][j][k] = 1
out1_np[i][j][k] = 0
quantized_model_out1_int8[i][j][k] = 0
# Compare the simulated result to extractor fetched result, gain the total hit rate
print(flag.sum()/(112*112*32)*100,'%')
If the "numpy simulated" results are the same as the extracted one, call it a hit. Print the total hit rate, it shows that numpy gets 92% of the values right. Now the problem is, I have no idea why the rest 8% of values come out wrong.
Comparison of two outcomes:
The picture below shows the different values between Numpy one and PyTorch one, the sample channel is index[1]. The left upper corner is Numpy one, and the upright corner is PyTorch one, I have set all values that are the same between them to 0, as you can see, most of the values just have a difference of 1(This can be view as the error brought by the precision loss of fixed point arithmetics), but some have large differences, e.g. the value[1][4], 121 vs. 76 (I don't know why)
Focus on one strange value:
This code is used to replay the calculation process of the value[1][4], originally I was expecting a trial and error process could lead me to solve this problem, to get my wanted number of 76, but no matter how I tried, it didn't output 76. If you want to try this, I paste this code for your convenience.
#%% A test code to check the calculation process
weight_quantized_sample = weight_quantized[2]
M_t = input_scale * weight_scale / output_scale
ifmap_t = np.int32(input_quantized[:,1:4,7:10])
weight_t = np.int32(weight_quantized_sample)
bias_t = bias[2]
bias_q = bias_t/output_scale
res_t = 0
for ch in range(3):
ifmap_offset = ifmap_t[ch]-np.int32(input_zero)
weight_offset = weight_t[ch]-np.int32(weight_zero)
res_ch = np.multiply(ifmap_offset, weight_offset)
res_ch = res_ch.sum()
res_t = res_t + res_ch
res_mul = M_t*res_t
# for n in range(1, 30):
# res_mul = multiply(n, M_t, res_t)
res_t = round(res_mul + output_zero + bias_q)
print(res_t)
Could you help me out of this, have been stuck here for a long time.
I implemented my own version of quantized convolution and got from 99.999% to 100% hitrate (and mismatch of a single value is by 1 that I can consider to be a rounding issue). The link on the paper in the question helped a lot.
But I found that your formulas are the same as mine. So I don't know what was your issue. As I understand quantization in pytorch is hardware dependent.
Here is my code:
def my_Conv2dRelu_b2(input_q, conv_layer, output_shape):
'''
Args:
input_q: quantized tensor
conv_layer: quantized tensor
output_shape: the pre-computed shape of the result
Returns:
'''
output = np.zeros(output_shape)
# extract needed float numbers from quantized operations
weights_scale = conv_layer.weight().q_per_channel_scales()
input_scale = input_q.q_scale()
weights_zp = conv_layer.weight().q_per_channel_zero_points()
input_zp = input_q.q_zero_point()
# extract needed convolution parameters
padding = conv_layer.padding
stride = conv_layer.stride
# extract float numbers for results
output_zp = conv_layer.zero_point
output_scale = conv_layer.scale
conv_weights_int = conv_layer.weight().int_repr()
input_int = input_q.int_repr()
biases = conv_layer.bias().numpy()
for k in range(input_q.shape[0]):
for i in range(conv_weights_int.shape[0]):
output[k][i] = manual_convolution_quant(
input_int[k].numpy(),
conv_weights_int[i].numpy(),
biases[i],
padding=padding,
stride=stride,
image_zp=input_zp, image_scale=input_scale,
kernel_zp=weights_zp[i].item(), kernel_scale=weights_scale[i].item(),
result_zp=output_zp, result_scale=output_scale
)
return output
def manual_convolution_quant(image, kernel, b, padding, stride, image_zp, image_scale, kernel_zp, kernel_scale,
result_zp, result_scale):
H = image.shape[1]
W = image.shape[2]
new_H = H // stride[0]
new_W = W // stride[1]
results = np.zeros([new_H, new_W])
M = image_scale * kernel_scale / result_scale
bias = b / result_scale
paddedIm = np.pad(
image,
[(0, 0), (padding[0], padding[0]), (padding[1], padding[1])],
mode="constant",
constant_values=image_zp,
)
s = kernel.shape[1]
for i in range(new_H):
for j in range(new_W):
patch = paddedIm[
:, i * stride[0]: i * stride[0] + s, j * stride[1]: j * stride[1] + s
]
res = M * ((kernel - kernel_zp) * (patch - image_zp)).sum() + result_zp + bias
if res < 0:
res = 0
results[i, j] = round(res)
return results
Code to compare pytorch and my own version.
def calc_hit_rate(array1, array2):
good = (array1 == array2).astype(np.int).sum()
all = array1.size
return good / all
# during inference
y2 = model.conv1(y1)
y2_int = torch.int_repr(y2)
y2_int_manual = my_Conv2dRelu_b2(y1, model.conv1, y2.shape)
print(f'y2 hit rate= {calc_hit_rate(y2.int_repr().numpy(), y2_int_manual)}') #hit_rate=1.0

2d bin packing using or-tools: AddNoOverlap2D and OnlyEnforceIf gives MODEL_INVALID

I am playing with a 2d bin packing model. I tried this using:
for j in range(n):
for i in range(j):
model.Add(b[i] == b[j]).OnlyEnforceIf(b2[(i,j)]) # not needed?
model.Add(b[i] != b[j]).OnlyEnforceIf(b2[(i,j)].Not())
model.AddNoOverlap2D([xival[i],xival[j]],[yival[i],yival[j]]).OnlyEnforceIf(b2[(i,j)])
The purpose here is to only enforce the no-overlap constraint if items i and j are assigned to the same bin. The combination of AddNoOverlap2D and OnlyEnforceIf seems to give the status:
MODEL_INVALID
If I remove OnlyEnforceIf(b2[(i,j)]) the (now incorrect) model solves fine.
Am I correct to conclude this is just not supported in or-tools (yet)?
I guess I can reformulate things to more MIP-like approach.
A reproducible example is below. I used version 8.1.8487.
from ortools.sat.python import cp_model
#---------------------------------------------------
# data
#---------------------------------------------------
# bin width and height
H = 60
W = 40
# h,w for each item
h = [7,7]
w = [12,12]
n = len(h) # number of items
m = 2 # number of bins
#---------------------------------------------------
# or-tools model
#---------------------------------------------------
model = cp_model.CpModel()
#
# variables
#
# x1,x2 and y1,y2 are start and end
x1 = [model.NewIntVar(0,W-w[i],'x1{}'.format(i)) for i in range(n)]
x2 = [model.NewIntVar(w[i],W,'x2{}'.format(i)) for i in range(n)]
y1 = [model.NewIntVar(0,H-h[i],'y1{}'.format(i)) for i in range(n)]
y2 = [model.NewIntVar(h[i],H,'y2{}'.format(i)) for i in range(n)]
# interval variables
xival = [model.NewIntervalVar(x1[i],w[i],x2[i],'xival{}'.format(i)) for i in range(n)]
yival = [model.NewIntervalVar(y1[i],w[i],y2[i],'yival{}'.format(i)) for i in range(n)]
# bin numbers
b = [model.NewIntVar(0,m-1,'b{}'.format(i)) for i in range(n)]
# b2[(i,j)] = true if b[i]=b[j] for i<j
b2 = {(i,j):model.NewBoolVar('b2{}.{}'.format(i,j)) for j in range(n) for i in range(j)}
#
# constraints
#
for j in range(n):
for i in range(j):
model.Add(b[i] == b[j]).OnlyEnforceIf(b2[(i,j)]) # not needed?
model.Add(b[i] != b[j]).OnlyEnforceIf(b2[(i,j)].Not())
model.AddNoOverlap2D([xival[i],xival[j]],[yival[i],yival[j]]).OnlyEnforceIf(b2[(i,j)])
# model.AddNoOverlap2D([xival[i],xival[j]],[yival[i],yival[j]]) # this one works
#
# solve model
#
solver = cp_model.CpSolver()
rc = solver.Solve(model)
print(rc)
print(solver.StatusName())
Notes:
As indicated in the answer, this is just not supported.
A different formulation for this 2d bin packing problem is shown here. That seems to work quite well.
It is further noted that the pairwise NoOverlap2D formulation may not be a good thing.
If you set solver.parameters.log_search_progress = True you'll see:
Parameters: log_search_progress: true
Enforcement literal not supported in constraint: enforcement_literal: 11 no_overlap_2d { x_intervals: 0 x_intervals: 1 y_intervals: 2 y_intervals: 3 }
...
So yeah, it isn't supported, maybe you can open a feature request as documented here.
But I think you can also solve it using OptionalIntervalVar if you encode the bin number with booleans.

Kalman Filter (pykalman): Value for obs_covariance and model without intercept

I am looking at the KalmanFilter from pykalman shown in examples:
pykalman documentation
Example 1
Example 2
and I am wondering
observation_covariance=100,
vs
observation_covariance=1,
the documentation states
observation_covariance R: e(t)^2 ~ Gaussian (0, R)
How should the value be set here correctly?
Additionally, is it possible to apply the Kalman filter without intercept in the above module?
The observation covariance shows how much error you assume to be in your input data. Kalman filter works fine on normally distributed data. Under this assumption you can use the 3-Sigma rule to calculate the covariance (in this case the variance) of your observation based on the maximum error in the observation.
The values in your question can be interpreted as follows:
Example 1
observation_covariance = 100
sigma = sqrt(observation_covariance) = 10
max_error = 3*sigma = 30
Example 2
observation_covariance = 1
sigma = sqrt(observation_covariance) = 1
max_error = 3*sigma = 3
So you need to choose the value based on your observation data. The more accurate the observation, the smaller the observation covariance.
Another point: you can tune your filter by manipulating the covariance, but I think it's not a good idea. The higher the observation covariance value the weaker impact a new observation has on the filter state.
Sorry, I did not understand the second part of your question (about the Kalman Filter without intercept). Could you please explain what you mean?
You are trying to use a regression model and both intercept and slope belong to it.
---------------------------
UPDATE
I prepared some code and plots to answer your questions in details. I used EWC and EWA historical data to stay close to the original article.
First of all here is the code (pretty the same one as in the examples above but with a different notation)
from pykalman import KalmanFilter
import numpy as np
import matplotlib.pyplot as plt
# reading data (quick and dirty)
Datum=[]
EWA=[]
EWC=[]
for line in open('data/dataset.csv'):
f1, f2, f3 = line.split(';')
Datum.append(f1)
EWA.append(float(f2))
EWC.append(float(f3))
n = len(Datum)
# Filter Configuration
# both slope and intercept have to be estimated
# transition_matrix
F = np.eye(2) # identity matrix because x_(k+1) = x_(k) + noise
# observation_matrix
# H_k = [EWA_k 1]
H = np.vstack([np.matrix(EWA), np.ones((1, n))]).T[:, np.newaxis]
# transition_covariance
Q = [[1e-4, 0],
[ 0, 1e-4]]
# observation_covariance
R = 1 # max error = 3
# initial_state_mean
X0 = [0,
0]
# initial_state_covariance
P0 = [[ 1, 0],
[ 0, 1]]
# Kalman-Filter initialization
kf = KalmanFilter(n_dim_obs=1, n_dim_state=2,
transition_matrices = F,
observation_matrices = H,
transition_covariance = Q,
observation_covariance = R,
initial_state_mean = X0,
initial_state_covariance = P0)
# Filtering
state_means, state_covs = kf.filter(EWC)
# Restore EWC based on EWA and estimated parameters
EWC_restored = np.multiply(EWA, state_means[:, 0]) + state_means[:, 1]
# Plots
plt.figure(1)
ax1 = plt.subplot(211)
plt.plot(state_means[:, 0], label="Slope")
plt.grid()
plt.legend(loc="upper left")
ax2 = plt.subplot(212)
plt.plot(state_means[:, 1], label="Intercept")
plt.grid()
plt.legend(loc="upper left")
# check the result
plt.figure(2)
plt.plot(EWC, label="EWC original")
plt.plot(EWC_restored, label="EWC restored")
plt.grid()
plt.legend(loc="upper left")
plt.show()
I could not retrieve data using pandas, so I downloaded them and read from the file.
Here you can see the estimated slope and intercept:
To test the estimated data I restored the EWC value from the EWA using the estimated parameters:
About the observation covariance value
By varying the observation covariance value you tell the Filter how accurate the input data is (normally you just describe your confidence in the observation using some datasheets or your knowledge about the system).
Here are estimated parameters and the restored EWC values using different observation covariance values:
You can see the filter follows the original function better with a bigger confidence in observation (smaller R). If the confidence is low (bigger R) the filter leaves the initial estimate (slope = 0, intercept = 0) very slowly and the restored function is far away from the original one.
About the frozen intercept
If you want to freeze the intercept for some reason, you need to change the whole model and all filter parameters.
In the normal case we had:
x = [slope; intercept] #estimation state
H = [EWA 1] #observation matrix
z = [EWC] #observation
Now we have:
x = [slope] #estimation state
H = [EWA] #observation matrix
z = [EWC-const_intercept] #observation
Results:
Here is the code:
from pykalman import KalmanFilter
import numpy as np
import matplotlib.pyplot as plt
# only slope has to be estimated (it will be manipulated by the constant intercept) - mathematically incorrect!
const_intercept = 10
# reading data (quick and dirty)
Datum=[]
EWA=[]
EWC=[]
for line in open('data/dataset.csv'):
f1, f2, f3 = line.split(';')
Datum.append(f1)
EWA.append(float(f2))
EWC.append(float(f3))
n = len(Datum)
# Filter Configuration
# transition_matrix
F = 1 # identity matrix because x_(k+1) = x_(k) + noise
# observation_matrix
# H_k = [EWA_k]
H = np.matrix(EWA).T[:, np.newaxis]
# transition_covariance
Q = 1e-4
# observation_covariance
R = 1 # max error = 3
# initial_state_mean
X0 = 0
# initial_state_covariance
P0 = 1
# Kalman-Filter initialization
kf = KalmanFilter(n_dim_obs=1, n_dim_state=1,
transition_matrices = F,
observation_matrices = H,
transition_covariance = Q,
observation_covariance = R,
initial_state_mean = X0,
initial_state_covariance = P0)
# Creating the observation based on EWC and the constant intercept
z = EWC[:] # copy the list (not just assign the reference!)
z[:] = [x - const_intercept for x in z]
# Filtering
state_means, state_covs = kf.filter(z) # the estimation for the EWC data minus constant intercept
# Restore EWC based on EWA and estimated parameters
EWC_restored = np.multiply(EWA, state_means[:, 0]) + const_intercept
# Plots
plt.figure(1)
ax1 = plt.subplot(211)
plt.plot(state_means[:, 0], label="Slope")
plt.grid()
plt.legend(loc="upper left")
ax2 = plt.subplot(212)
plt.plot(const_intercept*np.ones((n, 1)), label="Intercept")
plt.grid()
plt.legend(loc="upper left")
# check the result
plt.figure(2)
plt.plot(EWC, label="EWC original")
plt.plot(EWC_restored, label="EWC restored")
plt.grid()
plt.legend(loc="upper left")
plt.show()