use numba to speed up binary processing - numba

I have a binary file containing information of different types. I know the types and have written a parsing script to extract the information:
import numba as nb
import numpy as np
def fread(fid, nelements, dtype):
if dtype is np.str:
dt = np.uint8 # WARNING: assuming 8-bit ASCII for np.str!
else:
dt = dtype
data_array = np.fromfile(fid, dt, nelements)
data_array.shape = (nelements, 1)
return data_array
fid = open('./binary_information.bts', 'rb');
nffc = 3
fileFmt = 'int16'
nz = fread( fid, 1, np.int32); # the number of grid points vertically, INT(4)
ny = fread( fid, 1, np.int32); # the number of grid points laterally, INT(4)
nPts = ny*nz;
nv = nffc*nPts; # the size of one time step
velocity = np.zeros([int(s) for s in (nt,nffc,ny,nz)])
for it in range(1, nt):
ip = 1
v_cnt = fread( fid, int(nv), fileFmt )
for iz in range(1, nz):
for iy in range(1, ny):
for k in range(1, nffc):
velocity[it,k,iy,iz] = ( v_cnt[ip] - Voffset[k])/Vslope[k]
ip = ip + 1
My for loop takes a large amount of time. I want to speed it up with numba. I tried to do this, and got a weird error, posted below. Is there any way I can speed up this reading process with numba? What went wrong here? Why is it raising this range issue?
#nb.jit(nopython=True)
def main(velocity, v_cnt):
ip = 1
for iz in xrange(1, nz):
for iy in xrange(1, ny):
for k in xrange(1, nffc):
velocity[it,k,iy,iz] = ( v_cnt[ip] - Voffset[k])/Vslope[k]
ip = ip + 1
return velocity
for it in range(nt):
ip = 0
v_cnt = fread( fid, int(nv), fileFmt )
velocity = main(velocity, v_cnt)
Traceback (most recent call last):
File "bts_read.py", line 66, in <module>
velocity = main(velocity, v_cnt)
File "/usr/local/lib/python2.7/site-packages/numba/dispatcher.py", line 330, in _compile_for_args
raise e
numba.errors.TypingError: Caused By:
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/numba/compiler.py", line 235, in run
stage()
File "/usr/local/lib/python2.7/site-packages/numba/compiler.py", line 449, in stage_nopython_frontend
self.locals)
File "/usr/local/lib/python2.7/site-packages/numba/compiler.py", line 805, in type_inference_stage
infer.propagate()
File "/usr/local/lib/python2.7/site-packages/numba/typeinfer.py", line 767, in propagate
raise errors[0]
TypingError: Invalid usage of Function(<built-in function range>) with parameters (int64, readonly array(int32, 2d, C))
Known signatures:
* (int32,) -> range_state_int32
* (int32, int32) -> range_state_int32
* (int32, int32, int32) -> range_state_int32
* (int64,) -> range_state_int64
* (int64, int64) -> range_state_int64
* (int64, int64, int64) -> range_state_int64
* (uint64,) -> range_state_uint64
* (uint64, uint64) -> range_state_uint64
* (uint64, uint64, uint64) -> range_state_uint64
File "bts_read.py", line 56
[1] During: resolving callee type: Function(<built-in function range>)
[2] During: typing of call at bts_read.py (56)
Failed at nopython (nopython frontend)
Invalid usage of Function(<built-in function range>) with parameters (int64, readonly array(int32, 2d, C))
Known signatures:
* (int32,) -> range_state_int32
* (int32, int32) -> range_state_int32
* (int32, int32, int32) -> range_state_int32
* (int64,) -> range_state_int64
* (int64, int64) -> range_state_int64
* (int64, int64, int64) -> range_state_int64
* (uint64,) -> range_state_uint64
* (uint64, uint64) -> range_state_uint64
* (uint64, uint64, uint64) -> range_state_uint64
File "bts_read.py", line 56
[1] During: resolving callee type: Function(<built-in function range>)
[2] During: typing of call at bts_read.py (56)

Related

NameError: name 'pbc' is not defined

Hello I am learning MDAnalysis through python-3.7. Would you please check my code and advise how to resolve the following error:
Traceback (most recent call last):
File "/home/pulokdeb/projects/def-sohrabz/pulokdeb/beluga_python/Closest_atom_Oxy_group.py", line 242, in <module>
eigen_value = iio.eigen_vals()
File "/home/pulokdeb/ENV/lib/python3.7/site-packages/MDAnalysis/core/topologyattrs.py", line 1347, in eigen_vals
com = atomgroup.center_of_mass(pbc=pbc)
NameError: name 'pbc' is not defined
The code (partial) is below:
def radius_of_gyration(group, pbc=False, **kwargs):
"""Radius of gyration.
Parameters
----------
pbc : bool, optional
If ``True``, move all atoms within the primary unit cell before
calculation. [``False``]
.. versionchanged:: 0.8 Added *pbc* keyword
"""
atomgroup = group.atoms
masses = atomgroup.masses
com = atomgroup.center_of_mass(pbc=pbc)
if pbc:
recenteredpos = atomgroup.pack_into_box(inplace=False) - com
else:
recenteredpos = atomgroup.positions - com
rog_sq = np.sum(masses * np.sum(recenteredpos**2,
axis=1)) / atomgroup.total_mass()
return np.sqrt(rog_sq)
transplants[GroupBase].append(
('radius_of_gyration', radius_of_gyration))
I changed a few lines (def_eif_vals) in topologyattrs.py file and got my results. Hope it works for my future simulations.
def shape_parameter(group, pbc=False, **kwargs):
"""Shape parameter.
See [Dima2004a]_ for background information.
Parameters
----------
pbc : bool, optional
If ``True``, move all atoms within the primary unit cell before
calculation. [``False``]
References
----------
.. [Dima2004a] Dima, R. I., & Thirumalai, D. (2004). Asymmetry
in the shapes of folded and denatured states of
proteins. *J Phys Chem B*, 108(21),
6564-6570. doi:`10.1021/jp037128y
<https://doi.org/10.1021/jp037128y>`_
.. versionadded:: 0.7.7
.. versionchanged:: 0.8 Added *pbc* keyword
"""
atomgroup = group.atoms
masses = atomgroup.masses
com = atomgroup.center_of_mass(pbc=pbc)
if pbc:
recenteredpos = atomgroup.pack_into_box(inplace=False) - com
else:
recenteredpos = atomgroup.positions - com
tensor = np.zeros((3, 3))
for x in range(recenteredpos.shape[0]):
tensor += masses[x] * np.outer(recenteredpos[x, :],
recenteredpos[x, :])
tensor /= atomgroup.total_mass()
eig_vals = np.linalg.eigvalsh(tensor)
shape = 27.0 * np.prod(eig_vals - np.mean(eig_vals)
) / np.power(np.sum(eig_vals), 3)
return shape
transplants[GroupBase].append(
('shape_parameter', shape_parameter))
def eigen_vals(group, pbc=False, **kwargs):
""" Changed by Pulok Deb
"""
atomgroup = group.atoms
masses = atomgroup.masses
com = atomgroup.center_of_mass(pbc=pbc)
if pbc:
recenteredpos = atomgroup.pack_into_box(inplace=False) - com
else:
recenteredpos = atomgroup.positions - com
tensor = np.zeros((3, 3))
for x in range(recenteredpos.shape[0]):
tensor += masses[x] * np.outer(recenteredpos[x, :],
recenteredpos[x, :])
tensor /= atomgroup.total_mass()
eig_vals = np.linalg.eigvalsh(tensor)
return eig_vals
transplants[GroupBase].append(
('eigen_vals', eigen_vals))
#warn_if_not_unique
#check_pbc_and_unwrap

Naive Bayes classification technique algorithm

I found a code online for Naive bayes classification for a small research I am doing. The code I am using is showing some errors and cannot find the solution for them. I would greatly appreciate your help.
The code is below:
# Example of Naive Bayes implemented from Scratch in Python
import csv
import random
import math
def loadCsv(filename):
lines = csv.reader(open(filename, "rt"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers) / float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
def getAccuracy(testSet, predictions):
correct = 0
for i in range(len(testSet)):
if testSet[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(testSet))) * 100.0
def main():
filename = 'E:\iris.data.csv'
splitRatio = 0.67
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print(('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet)))
# prepare model
summaries = summarizeByClass(trainingSet)
# test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print(('Accuracy: {0}%').format(accuracy))
main()
The traceback for the same is below:
File "<ipython-input-18-4397d9969e66>", line 1, in <module>
runfile('C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py', wdir='C:/Users/Lenovo/Desktop/EE Codes')
File "C:\Users\Lenovo\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "C:\Users\Lenovo\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 76, in <module>
main()
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 69, in main
neighbors = getNeighbors(trainingSet, testSet[x], k)
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 31, in getNeighbors
dist = euclideanDistance(testInstance, trainingSet[x], length)
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 24, in euclideanDistance
distance += pow((instance1[x] - instance2[x]), 2)
TypeError: unsupported operand type(s) for -: 'str' and 'str'
I would request you all to please provide a solution to how to solve this error for the respective code. If you require the dataset then please do ask. I can provide you the link for that too.
Thanks in advance

Cryptic TypeError: 'decimal.Decimal' object cannot be interpreted as an integer

I am struggling to understand why this function apparently fails in the Jupyter Notebook, but not in the IPython shell:
def present_value( r, n, fv = None, pmt = None ):
'''
Function to compute the Present Value based on interest rate and
a given future value.
Arguments accepted
------------------
* r = interest rate,
which should be given in its original percentage, eg.
5% instead of 0.05
* n = number of periods for which the cash flow,
either as annuity or single flow from one present value
* fv = future value in dollars,
if problem is annuity based, leave this empty
* pmt = each annuity payment in dollars,
if problem is single cash flow based, leave this empty
'''
original_args = [r, n, fv, pmt]
dec_args = [Decimal( arg ) if arg != None
else arg
for arg in original_args
]
if dec_args[3] == None:
return dec_args[2] / ( ( 1 + ( dec_args[0] / 100 ) )**dec_args[1] )
elif dec_args[2] == None:
# annuity_length = range( 1, dec_args[1] + 1 )
# Not allowed to add a Decimal object
# with an integer and to use it
# in the range() function,
# so we dereference the integer from original_args
annuity_length = range( 1, original_args[1] + 1 )
# Apply discounting to each annuity payment made
# according to number of years left till end
all_compounded_pmt = [dec_args[3] * ( 1 / ( ( 1 + dec_args[0] / 100 ) ** time_left ) ) \
for time_left in annuity_length
]
return sum( all_compounded_pmt )
When I imported the module that this function resides in, named functions.py, using from functions import *, and then executed present_value(r=7, n=35, pmt = 11000), I got the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-93-c1cc587f7e27> in <module>()
----> 1 present_value(r=7, n=35, pmt = 11000)
/path_to_file/functions.py in present_value(r, n, fv, pmt)
73 if dec_args[3] == None:
74 return dec_args[2]/((1 + (dec_args[0]/100))**dec_args[1])
---> 75
76 elif dec_args[2] == None:
77 # annuity_length = range(1, dec_args[1]+1)
TypeError: 'decimal.Decimal' object cannot be interpreted as an integer
but in the IPython shell, evaluating this function it works perfectly fine:
In [42]: functions.present_value(r=7, n=35, pmt = 11000)
Out[42]: Decimal('142424.39530474029537')
Can anyone please help me with this really confusing and obscure issue?

scipy.optimize.fsolve 'proper array of floats' error

I need to compute the root of a function and I'm using scipy.optimize.fsolve. However when I call fsolve, sometimes it outputs an error that says 'Result from function call is not a proper array of floats.'
Here's an example of the inputs I'm using:
In [45]: guess = linspace(0.1,1.0,11)
In [46]: alpha_old = 0.5
In [47]: n_old = 0
In [48]: n_new = 1
In [49]: S0 = 0.9
In [50]: fsolve(alpha_eq,guess,args=(n_old,alpha_old,n_new,S0))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: array cannot be safely cast to required type
---------------------------------------------------------------------------
error Traceback (most recent call last)
/home/andres/Documents/UdeA/Proyecto/basis_analysis/<ipython-input-50-f1e9a42ba072> in <module>()
----> 1 fsolve(bb.alpha_eq,guess,args=(n_old,alpha_old,n_new,S0))
/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.pyc in fsolve(func, x0, args, fprime, full_output, col_deriv, xtol, maxfev, band, epsfcn, factor, diag)
123 maxfev = 200*(n + 1)
124 retval = _minpack._hybrd(func, x0, args, full_output, xtol,
--> 125 maxfev, ml, mu, epsfcn, factor, diag)
126 else:
127 _check_func('fsolve', 'fprime', Dfun, x0, args, n, (n,n))
error: Result from function call is not a proper array of floats.
In [51]: guess = linspace(0.1,1.0,2)
In [52]: fsolve(alpha_eq,guess,args=(n_old,alpha_old,n_new,S0))
Out[52]: array([ 0.54382423, 1.29716005])
In [53]: guess = linspace(0.1,1.0,3)
In [54]: fsolve(alpha_eq,guess,args=(n_old,alpha_old,n_new,S0))
Out[54]: array([ 0.54382423, 0.54382423, 1.29716005])
There you can see that for 'guess' as defined in In[46] it outputs an error, however for 'guess' as defined in In[51] and in In[53] it works ok. As far as I know both In[46], In[51] and In[53] are the same type of arrays so what's the reason for the error I'm getting in In[50]?
Here are the functions I'm calling in case they're the reason of the problem:
def alpha_eq(alpha2,n1,alpha1,n2,S0):
return overlap(n1,alpha1,n2,alpha2) - S0
def overlap(n1,alpha1,n2,alpha2):
aux1 = sqrt((2.0*alpha1)**(2*n1+3)/factorial(2*n1+2))
aux2 = sqrt((2.0*alpha2)**(2*n2+3)/factorial(2*n2+2))
return aux1 * aux2 * factorial(n1+n2+2) / (alpha1+alpha2)**(n1+n2+3)
(the functions linspace, sqrt and factorial are imported from scipy)
This is a plot of the function for which I'm trying to find the roots.
plot
It seems to me like this is a bug of fsolve, however I want to make sure I'm not making a stupid mistake before reporting it.
If there's something wrong with my code please let me know. Thanks!
I have modified your overlap function for debugging as follows:
def overlap(n1,alpha1,n2,alpha2):
print n1, alpha1, n2, alpha2
aux1 = sqrt((2.0*alpha1)**(2*n1 + 3)/factorial(2*n1 + 2))
aux2 = sqrt((2.0*alpha2)**(2*n2 + 3)/factorial(2*n2 + 2))
ret = aux1 * aux2 * factorial(n1+n2+2) / (alpha1+alpha2)**(n1+n2+3)
print ret, ret.dtype
return ret
And when I try to reproduce your error, here's what happens:
>>> scipy.optimize.fsolve(alpha_eq,guess,args=(n_old,alpha_old,n_new,S0))
0 0.5 1 [ 0.1 0.19 0.28 0.37 0.46 0.55 0.64 0.73 0.82 0.91 1. ]
[ 0.11953652 0.34008953 0.54906314 0.71208678 0.82778065 0.90418052
0.95046505 0.97452352 0.98252708 0.97911263 0.96769965] float64
...
0 0.5 1 [ 0.45613162 0.41366639 0.44818267 0.49222515 0.52879856 0.54371741
0.50642005 0.28700652 -3.72580492 1.81152096 1.41975621]
[ 0.82368346+0.j 0.77371428+0.j 0.81503304+0.j
0.85916030+0.j 0.88922137+0.j 0.89992643+0.j
0.87149667+0.j 0.56353606+0.j 0.00000000+1.21228156j
0.75791881+0.j 0.86627491+0.j ] complex128
So in the process of solving your equation, the square root of a negative number is being calculated, which leads to the complex128 dtype and your error.
With your function, if you are only interested in the zeros, I think you can get rid of the sqrts if you raise S0 to the 4th power:
def alpha_eq(alpha2,n1,alpha1,n2,S0):
return overlap(n1,alpha1,n2,alpha2) - S0**4
def overlap(n1,alpha1,n2,alpha2):
aux1 = (2.0*alpha1)**(2*n1 + 3)/factorial(2*n1 + 2)
aux2 = (2.0*alpha2)**(2*n2 + 3)/factorial(2*n2 + 2)
ret = aux1 * aux2 * factorial(n1+n2+2) / (alpha1+alpha2)**(n1+n2+3)
return ret
And now:
>>> scipy.optimize.fsolve(alpha_eq,guess,args=(n_old,alpha_old,n_new,S0))
array([ 0.92452239, 0.92452239, 0.92452239, 0.92452239, 0.92452239,
0.92452239, 0.92452239, 0.92452239, 0.92452239, 0.92452239,
0.92452239])

Convert timestamp to datetime in erlang

How can I convert a timestamp (number of milliseconds since 1 Jan 1970..., aka epoch) to Date or DateTime format in Erlang? Something like {Year,Month,Day}.
Roughly:
msToDate(Milliseconds) ->
BaseDate = calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}}),
Seconds = BaseDate + (Milliseconds div 1000),
{ Date,_Time} = calendar:gregorian_seconds_to_datetime(Seconds),
Date.
It just so happens that I have a github gist with a bunch of datetime utilities for exactly this purpose: http://gist.github.com/104903. Calendar has most of the low level plumbing for this stuff.
-module(date_util).
-compile(export_all).
epoch() ->
now_to_seconds(now())
.
epoch_hires() ->
now_to_seconds_hires(now())
.
now_to_seconds({Mega, Sec, _}) ->
(Mega * 1000000) + Sec
.
now_to_milliseconds({Mega, Sec, Micro}) ->
now_to_seconds({Mega, Sec, Micro}) * 1000
.
now_to_seconds_hires({Mega, Sec, Micro}) ->
now_to_seconds({Mega, Sec, Micro}) + (Micro / 1000000)
.
now_to_milliseconds_hires({Mega, Sec, Micro}) ->
now_to_seconds_hires({Mega, Sec, Micro}) * 1000
.
epoch_gregorian_seconds() ->
calendar:datetime_to_gregorian_seconds({{1970,1,1}, {0,0,0}})
.
now_to_gregorian_seconds() ->
epoch_to_gregorian_seconds(now())
.
epoch_to_gregorian_seconds({Mega, Sec, Micro}) ->
epoch_to_gregorian_seconds(now_to_seconds({Mega, Sec, Micro}));
epoch_to_gregorian_seconds(Now) ->
EpochSecs = epoch_gregorian_seconds()
, Now + EpochSecs
.
gregorian_seconds_to_epoch(Secs) ->
EpochSecs = epoch_gregorian_seconds()
, Secs - EpochSecs
.
date_to_epoch(Date) ->
datetime_to_epoch({Date, {0,0,0} })
.
datetime_to_epoch({Date, Time}) ->
gregorian_seconds_to_epoch(
calendar:datetime_to_gregorian_seconds({Date, Time}))
.
is_older_by(T1, T2, {days, N}) ->
N1 = day_difference(T1, T2)
, case N1 of
N2 when (-N < N2) ->
true;
_ ->
false
end
.
is_sooner_by(T1, T2, {days, N}) ->
case day_difference(T1, T2) of
N1 when N > N1 ->
true;
_ ->
false
end
.
is_time_older_than({Date, Time}, Mark) ->
is_time_older_than(calendar:datetime_to_gregorian_seconds({Date, Time})
, Mark);
is_time_older_than(Time, {DateMark, TimeMark}) ->
is_time_older_than(Time
, calendar:datetime_to_gregorian_seconds({DateMark, TimeMark}));
is_time_older_than(Time, Mark) when is_integer(Time), is_integer(Mark) ->
Time < Mark
.
day_difference({D1, _}, D2) ->
day_difference(D1, D2);
day_difference(D1, {D2, _}) ->
day_difference(D1, D2);
day_difference(D1, D2) ->
Days1 = calendar:date_to_gregorian_days(D1)
, Days2 = calendar:date_to_gregorian_days(D2)
, Days1 - Days2
.
is_time_sooner_than({Date, Time}, Mark) ->
is_time_sooner_than(calendar:datetime_to_gregorian_seconds({Date, Time})
, Mark);
is_time_sooner_than(Time, {DateMark, TimeMark}) ->
is_time_sooner_than(Time
, calendar:datetime_to_gregorian_seconds({DateMark, TimeMark}));
is_time_sooner_than(Time, Mark) when is_integer(Time), is_integer(Mark) ->
Time > Mark
.
subtract(Date, {days, N}) ->
New = calendar:date_to_gregorian_days(Date) - N
, calendar:gregorian_days_to_date(New)
.
add(Date, {days, N}) ->
New = calendar:date_to_gregorian_days(Date) + N
, calendar:gregorian_days_to_date(New)
.
OTP 21.0 added this function
calendar:system_time_to_universal_time(Time, TimeUnit) -> datetime()
Types
Time = integer()
TimeUnit = erlang:time_unit()
Converts a specified system time into universal date and time.
Example:
> os:system_time(1000).
1598512151718
> calendar:system_time_to_universal_time(1598512151718, 1000).
{{2020,8,27},{7,9,11}}
Refrence: https://erlang.org/doc/man/calendar.html#system_time_to_universal_time-2