Is there a way to make a log 2d histogram in plotly? - ipython

Is there a way to make a log 2d histogram in plotly? I'm not sure if this is even possible.
Here is the matplotlib code that worked:
%matplotlib inline
import math
from matplotlib.colors import LogNorm
print len(list_length_view)
%time xy = [(math.log10(x[0]), math.log10(x[1])) for x in list_length_view if x[0] > 0 and x[1] > 0]
print len(xy)
## Why are there negative lengths or view counts!?!?
plt.hist2d([x[0] for x in list_length_view], [x[1] for x in list_length_view],
bins=(40, 60), range=numpy.array([(0, 10000), (0, 1000000)]),
norm=LogNorm(), cmap='Oranges')
cb1 = plt.colorbar()
plt.gca().set_xlabel(r'Clip Duration in Seconds')
plt.gca().set_ylabel(r'Views Per Clip')
cb1.set_label(r'Total Clips')
However, my original goal was to do it in plotly. Here is the code I tried to use so far to get it to work:
data4 = Data([
Histogram2d(
x=[x[0] for x in list_length_view], #Durations of highlights
y=[y[1] for y in list_length_view], # Views fo hgihlights
nbinsx=10,
nbinsy=10,
)])
layout4 = dict(
title='Public Highlight Analysis',
yaxis=YAxis(
title = 'Average Number of Views'),
xaxis1=XAxis(
title = "Duration of Highlight in Seconds")
)
fig3 = Figure(data=data4, layout=layout4)
py.iplot(fig3)
I also tried converting matplotlib to plotly.
def plot_mpl_fig():
xy = [(math.log10(x[0]), math.log10(x[1])) for x in list_length_view if x[0] > 0 and x[1] > 0]
plt.hist2d([x[0] for x in xy], [x[1] for x in xy],
bins=200, range=numpy.array([(-1, 4), (-1, 6)]),
norm=LogNorm(), cmap='Oranges')
cb1 = plt.colorbar()
plt.gca().set_xlabel(r'Log$_{10}$(Clip Duration in Seconds)')
plt.gca().set_ylabel(r'Log$_{10}$(Views Per Clip)')
cb1.set_label(r'Total Clips')
plot_mpl_fig()
mpl_fig1 = plt.gcf()
py_fig1 = tls.mpl_to_plotly(mpl_fig1, verbose=True)

Related

How can I add the slope of a specific point in a polynomial line in plotly

Let's say I have a polynomial regression in plotly that looks like that
Something along a code like this:
fig = px.scatter(
x=final_df.index,
y=final_df.nr_deaths,
trendline="lowess", #ols
trendline_color_override="red",
trendline_options=dict(frac=0.1),
opacity=.5,
title='Deaths per year'
)
fig.show()
How would I calculate the slope (= tangent) line on a specific point of the polynomial regression line?
Currently, this cannot be done within plotly alone. But you can achieve this by using other libraries for calculation and applying the results in the chart.
The difficulty in this question lies in
calculating the slope of the polynomial at a certain point
calculating the x and y values for plotting them as lines
For calculating the slopes at a certain point you can use numpy functionality. Afterwards you can just calculate the x and y values with python and plot them with plotly.
poly_degree = 3
y = df.col.values
x = np.arange(0, len(y))
x = x.reshape(-1, 1)
fitted_params = np.polyfit(np.arange(0, len(y)), y, poly_degree )
polynomials = np.poly1d(fitted_params)
derivatives = np.polyder(polynomials)
y_value_at_point = polynomials(x).flatten()
slope_at_point = np.polyval(derivatives, np.arange(0, len(y)))
For calculating the corresponding slope values (the necessary x values and y values) at a point, and plotting it in plotly you can do something like this:
def draw_slope_line_at_point(fig, ind, x, y, slope_at_point, verbose=False):
"""Plot a line from an index at a specific point for x values, y values and their slopes"""
y_low = (x[0] - x[ind]) * slope_at_point[ind] + y[ind]
y_high = (x[-1] - x[ind]) * slope_at_point[ind] + y[ind]
x_vals = [x[0], x[-1]]
y_vals = [y_low, y_high]
if verbose:
print((x[0] - x[ind]))
print(x[ind], x_vals, y_vals, y[ind],slope_at_point[ind])
fig.add_trace(
go.Scatter(
x=x_vals,
y=y_vals,
name="Tangent at point",
line = dict(color='orange', width=2, dash='dash'),
)
)
return x_vals, y_vals
Calling it and adding annotation would look like this:
for pt in [31]:
draw_slope_line_at_point(
fig,
x= np.arange(0, len(y)),
y = y_value_at_point,
slope_at_point=slope_at_point,
ind = pt)
fig.add_annotation(x=pt, y=y_value_at_point[pt],
text=f'''Slope: {slope_at_point[pt]:.2f}\t {df.date.strftime('%Y-%m-%d')[pt]}''',
showarrow=True,
arrowhead=1)
and then looking like that in the result:

How to use solve_ivp to integrate harmonic oscillator with static and kinetic friction zone in a robust way?

I would like to integrate the equation of motion for the harmonic oscillator with a static and kinetic friction zone (as depicted in the following image).
For x < a the mass slides without friction over the surface. For x > a static and kinetic friction takes effect, with its respective coefficients of friction mu0 (static) and mu (kinetic). I tried to implement this with scipy.integrate.solve_ivp in a while-loop, making use of solve_ivp's event handling, stopping and restarting the integration each time one of the following events gets triggered:
the oscillator's velocity is zero (mass_stops in code below)
the oscillator reaches x=a and has positive velocity, i.e. entering the friction zone (entering_friction_zone)
the oscillator reaches x=a and has negative velocity, i.e. leaving the friction zone (leaving_friction_zone)
Using this approach, I faced the same problem as other users already did, documented here and here, namely that the initial conditions (inits) trigger an event and the while-loop gets stuck.
Using one of the workarounds, as described in the second thread, it kind of works but isn't as robust as I would like it to be. To be more specific, I now integrate the equations (initially and after each occurrence of an event) for a very short time interval (1e-13) and use the final state of this integration (sol_in_between) as initial condition for the "real integration" (sol). Of course I don't know beforehand how short the time interval needs to be for it to work. Too short of an interval results in an infinite loop (as before), too large of an interval and the results become crude.
Now I'm wondering if there is a more straight forward and more importantly a more robust way to integrate the given system, ideally using solve_ivp but I'm open for other suggestions.
Code:
import numpy as np
from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt
import os, sys
def system(t,xv,params):
"""
INPUT: current time t, current position x & velocity v in list xv = [x, v],
current system parameter in dict params
OUTPUT: xdot and vdot in list f = [xdot, vdot]"""
x, v = xv
m, c, g, r, muq = [params[par] for par in ['m', 'c', 'g', 'r', 'muq']]
return [v, -c*x/m - r*muq*g]
def mass_stops(t,xv,params):
"""Event function for event 'velocity zero'"""
x, v = xv
return v
mass_stops.terminal = True
mass_stops.direction = 0
def leaving_friction_zone(t,xv,params):
"""Event function for event 'mass crosses border with positive velocity', i.e. leaving friction zone"""
x, v = xv
return x-params['a']
leaving_friction_zone.terminal = True
leaving_friction_zone.direction = 1
def entering_friction_zone(t,xv,params):
"""Event function for event 'mass crosses border with negative velocity', i.e. entering friction zone"""
x, v = xv
return x-params['a']
entering_friction_zone.terminal = True
entering_friction_zone.direction = -1
def event_number(events):
"""Helper function for identifying which event got triggered"""
for i, event in enumerate(events):
if event.size > 0: return i
def integrate_oscillator(kinet_friction_coeff=.4, static_friction_coeff=.4, tend=20, m=20, c=300, a=.5, g=9.81, inits=[1, 0]):
mu = kinet_friction_coeff
mu0 = static_friction_coeff
params = {'m': m, 'c': c, 'g': g, 'a': a}
tspan = [0, tend]
params['r'] = -1 if inits[0] > 0 else 1
params['muq'] = mu if inits[0] > a else 0
if params['muq'] > 0:
print('friction')
else:
print('no friction')
tout = list()
x = list()
v = list()
CONT = 1
while CONT == 1:
# we integrate the equation of motion for a short time interval without event handling, HOPING that the initial/last event doesn't get triggered again in sol
# we use the end state of this integration as initial condition for the "actual integration" and throw everything else away
sol_in_between = solve_ivp(system, [0, 1e-13], inits, args=(params,), rtol=1e-9, atol=1e-12)
inits = sol_in_between.y[:,-1]
sol = solve_ivp(system, tspan, inits, args=(params,), events=[mass_stops,leaving_friction_zone,entering_friction_zone], rtol=1e-9, atol=1e-12)
tout = np.concatenate([tout, sol.t])
x = np.concatenate([x, sol.y[0,:]]) #x.append(sol.y[0,:])
v = np.concatenate([v, sol.y[1,:]]) #y.append(sol.y[1,:])
tnow = tout[-1]
if tnow >= tend:
CONT = 0
else:
tspan = [tnow, tend]
inits = [x[-1], v[-1]]
#CONT = 0
event_num = event_number(sol.y_events)
if event_num == 0: # Event: "velocity zero"
print('Velocity zero')
if np.abs(c*x[-1]) < mu0*m*g and x[-1] > a: # spring force is smaller than static friction force...
CONT = 0 # ... therefore stop!
print(f'Mass stopped at time t={tout[-1]}')
tout = np.concatenate([tout, np.array([tend])])
x = np.concatenate([x, np.array([x[-1]])])
v = np.concatenate([v, np.array([0])])
elif x[-1] - a < 0: # mass moves to the right
print('to the right')
params['r'] = 1 # change direction of kinetic friction force to the left
else: # mass moves to the left
print('to the left')
params['r'] = -1 # change direction of kinetic friction force to the right
elif event_num == 1: # entering friction zone
print('entering friction zone')
params['muq'] = mu
elif event_num == 2: # leaving friction zone
print('leaving friction zone')
params['muq'] = 0
return [tout,x,v]
[t,x,v] = integrate_oscillator(kinet_friction_coeff=.4,static_friction_coeff=.8,tend=50,inits=[1,0])
plt.rcParams['font.size'] = 16
fig, (ax1, ax2) = plt.subplots(2,figsize=(12,9),sharex=True )
figsize=(9,6)
ax1.plot(t,x)
ax1.grid(True)
ax1.plot([0,t[-1]], [.5, .5])
ax2.plot(t,v)
ax2.grid(True)

How to use interpn?

I am trying to use interpn (in python using Scipy) to replicate results from Matlab using interp3. However, I am struggling to structure my arguments. I tried the following line:
f = interpn(blur_maps, fx, fy, pyr_level)
Where blur maps is a 600 x 800 x 7 representing a grayscale image at seven levels of blur,
fx and fy are indices of the seven maps. Both fx and fy are 2d arrays. pyr_level is a 2d array that contains values from 1 to 7 representing the blur map to be interpolated.
My question is since I incorrectly arranged the arguments, how can I arrange them in a way that works? I tried to look up examples but I didn't see anything similar. Here is an example of the data I am trying to interpolate:
import numpy as np
import cv2, math
from scipy.interpolate import interpn
levels = 7
img_path = '/Users/alimahdi/Desktop/i4.jpg'
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY)
row, col = img.shape
x_range = np.arange(0, col)
y_range = np.arange(0, row)
fx, fy = np.meshgrid(x_range, y_range)
e = np.exp(np.sqrt(fx ** 2 + fy ** 2))
pyr_level = 7 * (e - np.min(e)) / (np.max(e) - np.min(e))
blur_maps = np.zeros((row, col, levels))
blur_maps[:, :, 0] = img
for i in range(levels - 1):
img = cv2.pyrDown(img)
r, c = img.shape
tmp = img
for j in range(int(math.log(row / r, 2))):
tmp = cv2.pyrUp(tmp)
blur_maps[:, :, i + 1] = tmp
pixelGrid = [np.arange(x) for x in blur_maps.shape]
interpPoints = np.array([fx.flatten(), fy.flatten(), pyr_level.flatten()])
interpValues = interpn(pixelGrid, blur_maps, interpPoints.T)
finalValues = np.reshape(interpValues, fx.shape)
I am now getting the following error: ValueError: One of the requested xi is out of bounds in dimension 0 I do know that the problem is in interpPoints but I am not sure how to fix it. Any suggestions?
The documentation for scipy.interpolate.interpn states that the first argument is a grid of the data you are interpolating over (which is just the integers of the pixel numbers), second argument is data (blur_maps) and third arguments is the interpolation points in the form (npoints, ndims). So you would have to do something like:
import scipy.interpolate
pixelGrid = [np.arange(x) for x in blur_maps.shape] # create grid of pixel numbers as per the docs
interpPoints = np.array([fx.flatten(), fy.flatten(), pyr_level.flatten()])
# interpolate
interpValues = scipy.interpolate.interpn(pixelGrid, blur_maps, interpPoints.T)
# now reshape the output array to get in the original format you wanted
finalValues = np.reshape(interpValues, fx.shape)

Kalman Filter (pykalman): Value for obs_covariance and model without intercept

I am looking at the KalmanFilter from pykalman shown in examples:
pykalman documentation
Example 1
Example 2
and I am wondering
observation_covariance=100,
vs
observation_covariance=1,
the documentation states
observation_covariance R: e(t)^2 ~ Gaussian (0, R)
How should the value be set here correctly?
Additionally, is it possible to apply the Kalman filter without intercept in the above module?
The observation covariance shows how much error you assume to be in your input data. Kalman filter works fine on normally distributed data. Under this assumption you can use the 3-Sigma rule to calculate the covariance (in this case the variance) of your observation based on the maximum error in the observation.
The values in your question can be interpreted as follows:
Example 1
observation_covariance = 100
sigma = sqrt(observation_covariance) = 10
max_error = 3*sigma = 30
Example 2
observation_covariance = 1
sigma = sqrt(observation_covariance) = 1
max_error = 3*sigma = 3
So you need to choose the value based on your observation data. The more accurate the observation, the smaller the observation covariance.
Another point: you can tune your filter by manipulating the covariance, but I think it's not a good idea. The higher the observation covariance value the weaker impact a new observation has on the filter state.
Sorry, I did not understand the second part of your question (about the Kalman Filter without intercept). Could you please explain what you mean?
You are trying to use a regression model and both intercept and slope belong to it.
---------------------------
UPDATE
I prepared some code and plots to answer your questions in details. I used EWC and EWA historical data to stay close to the original article.
First of all here is the code (pretty the same one as in the examples above but with a different notation)
from pykalman import KalmanFilter
import numpy as np
import matplotlib.pyplot as plt
# reading data (quick and dirty)
Datum=[]
EWA=[]
EWC=[]
for line in open('data/dataset.csv'):
f1, f2, f3 = line.split(';')
Datum.append(f1)
EWA.append(float(f2))
EWC.append(float(f3))
n = len(Datum)
# Filter Configuration
# both slope and intercept have to be estimated
# transition_matrix
F = np.eye(2) # identity matrix because x_(k+1) = x_(k) + noise
# observation_matrix
# H_k = [EWA_k 1]
H = np.vstack([np.matrix(EWA), np.ones((1, n))]).T[:, np.newaxis]
# transition_covariance
Q = [[1e-4, 0],
[ 0, 1e-4]]
# observation_covariance
R = 1 # max error = 3
# initial_state_mean
X0 = [0,
0]
# initial_state_covariance
P0 = [[ 1, 0],
[ 0, 1]]
# Kalman-Filter initialization
kf = KalmanFilter(n_dim_obs=1, n_dim_state=2,
transition_matrices = F,
observation_matrices = H,
transition_covariance = Q,
observation_covariance = R,
initial_state_mean = X0,
initial_state_covariance = P0)
# Filtering
state_means, state_covs = kf.filter(EWC)
# Restore EWC based on EWA and estimated parameters
EWC_restored = np.multiply(EWA, state_means[:, 0]) + state_means[:, 1]
# Plots
plt.figure(1)
ax1 = plt.subplot(211)
plt.plot(state_means[:, 0], label="Slope")
plt.grid()
plt.legend(loc="upper left")
ax2 = plt.subplot(212)
plt.plot(state_means[:, 1], label="Intercept")
plt.grid()
plt.legend(loc="upper left")
# check the result
plt.figure(2)
plt.plot(EWC, label="EWC original")
plt.plot(EWC_restored, label="EWC restored")
plt.grid()
plt.legend(loc="upper left")
plt.show()
I could not retrieve data using pandas, so I downloaded them and read from the file.
Here you can see the estimated slope and intercept:
To test the estimated data I restored the EWC value from the EWA using the estimated parameters:
About the observation covariance value
By varying the observation covariance value you tell the Filter how accurate the input data is (normally you just describe your confidence in the observation using some datasheets or your knowledge about the system).
Here are estimated parameters and the restored EWC values using different observation covariance values:
You can see the filter follows the original function better with a bigger confidence in observation (smaller R). If the confidence is low (bigger R) the filter leaves the initial estimate (slope = 0, intercept = 0) very slowly and the restored function is far away from the original one.
About the frozen intercept
If you want to freeze the intercept for some reason, you need to change the whole model and all filter parameters.
In the normal case we had:
x = [slope; intercept] #estimation state
H = [EWA 1] #observation matrix
z = [EWC] #observation
Now we have:
x = [slope] #estimation state
H = [EWA] #observation matrix
z = [EWC-const_intercept] #observation
Results:
Here is the code:
from pykalman import KalmanFilter
import numpy as np
import matplotlib.pyplot as plt
# only slope has to be estimated (it will be manipulated by the constant intercept) - mathematically incorrect!
const_intercept = 10
# reading data (quick and dirty)
Datum=[]
EWA=[]
EWC=[]
for line in open('data/dataset.csv'):
f1, f2, f3 = line.split(';')
Datum.append(f1)
EWA.append(float(f2))
EWC.append(float(f3))
n = len(Datum)
# Filter Configuration
# transition_matrix
F = 1 # identity matrix because x_(k+1) = x_(k) + noise
# observation_matrix
# H_k = [EWA_k]
H = np.matrix(EWA).T[:, np.newaxis]
# transition_covariance
Q = 1e-4
# observation_covariance
R = 1 # max error = 3
# initial_state_mean
X0 = 0
# initial_state_covariance
P0 = 1
# Kalman-Filter initialization
kf = KalmanFilter(n_dim_obs=1, n_dim_state=1,
transition_matrices = F,
observation_matrices = H,
transition_covariance = Q,
observation_covariance = R,
initial_state_mean = X0,
initial_state_covariance = P0)
# Creating the observation based on EWC and the constant intercept
z = EWC[:] # copy the list (not just assign the reference!)
z[:] = [x - const_intercept for x in z]
# Filtering
state_means, state_covs = kf.filter(z) # the estimation for the EWC data minus constant intercept
# Restore EWC based on EWA and estimated parameters
EWC_restored = np.multiply(EWA, state_means[:, 0]) + const_intercept
# Plots
plt.figure(1)
ax1 = plt.subplot(211)
plt.plot(state_means[:, 0], label="Slope")
plt.grid()
plt.legend(loc="upper left")
ax2 = plt.subplot(212)
plt.plot(const_intercept*np.ones((n, 1)), label="Intercept")
plt.grid()
plt.legend(loc="upper left")
# check the result
plt.figure(2)
plt.plot(EWC, label="EWC original")
plt.plot(EWC_restored, label="EWC restored")
plt.grid()
plt.legend(loc="upper left")
plt.show()

BVP4c solve for unknown boundary

I am trying to use bvp4c to solve a system of 4 odes. The issue is that one of the boundaries is unknown.
Can bvp4c handle this? In my code L is the unknown I am solving for.
I get an error message printed below.
function mat4bvp
L = 8;
solinit = bvpinit(linspace(0,L,100),#mat4init);
sol = bvp4c(#mat4ode,#mat4bc,solinit);
sint = linspace(0,L);
Sxint = deval(sol,sint);
end
% ------------------------------------------------------------
function dtdpdxdy = mat4ode(s,y,L)
Lambda = 0.3536;
dtdpdxdy = [y(2)
-sin(y(1)) + Lambda*(L-s)*cos(y(1))
cos(y(1))
sin(y(1))];
end
% ------------------------------------------------------------
function res = mat4bc(ya,yb,L)
res = [ ya(1)
ya(2)
ya(3)
ya(4)
yb(1)];
end
% ------------------------------------------------------------
function yinit = mat4init(s)
yinit = [ cos(s)
0
0
0
];
end
Unfortunately I get the following error message ;
>> mat4bvp
Not enough input arguments.
Error in mat4bvp>mat4ode (line 13)
-sin(y(1)) + Lambda*(L-s)*cos(y(1))
Error in bvparguments (line 105)
testODE = ode(x1,y1,odeExtras{:});
Error in bvp4c (line 130)
bvparguments(solver_name,ode,bc,solinit,options,varargin);
Error in mat4bvp (line 4)
sol = bvp4c(#mat4ode,#mat4bc,solinit);
One trick to transform a variable end point into a fixed one is to change the time scale. If x'(t)=f(t,x(t)) is the differential equation, set t=L*s, s from 0 to 1, and compute the associated differential equation for y(s)=x(L*s)
y'(s)=L*x'(L*s)=L*f(L*s,y(s))
The next trick to employ is to transform the global variable into a part of the differential equation by computing it as constant function. So the new system is
[ y'(s), L'(s) ] = [ L(s)*f(L(s)*s,y(s)), 0 ]
and the value of L occurs as additional free left or right boundary value, increasing the number of variables = dimension of the state vector to the number of boundary conditions.
I do not have Matlab readily available, in Python with the tools in scipy this can be implemented as
from math import sin, cos
import numpy as np
from scipy.integrate import solve_bvp, odeint
import matplotlib.pyplot as plt
# The original function with the interval length as parameter
def fun0(t, y, L):
Lambda = 0.3536;
#print t,y,L
return np.array([ y[1], -np.sin(y[0]) + Lambda*(L-t)*np.cos(y[0]), np.cos(y[0]), np.sin(y[0]) ]);
# Wrapper function to apply both tricks to transform variable interval length to a fixed interval.
def fun1(s,y):
L = y[-1];
dydt = np.zeros_like(y);
dydt[:-1] = L*fun0(L*s, y[:-1], L);
return dydt;
# Implement evaluation of the boundary condition residuals:
def bc(ya, yb):
return [ ya[0],ya[1], ya[2], ya[3], yb[0] ];
# Define the initial mesh with 5 nodes:
x = np.linspace(0, 1, 3)
# This problem has multiple solutions. Try two initial guesses.
L_a=8
L_b=9
y_a = odeint(lambda y,t: fun1(t,y), [0,0,0,0,L_a], x)
y_b = odeint(lambda y,t: fun1(t,y), [0,0,0,0,L_b], x)
# Now we are ready to run the solver.
res_a = solve_bvp(fun1, bc, x, y_a.T)
res_b = solve_bvp(fun1, bc, x, y_b.T)
L_a = res_a.sol(0)[-1]
L_b = res_b.sol(0)[-1]
print "L_a=%.8f, L_b=%.8f" % ( L_a,L_b )
# Plot the two found solutions. The solution are in a spline form, use this to produce a smooth plot.
x_plot = np.linspace(0, 1, 100)
y_plot_a = res_a.sol(x_plot)[0]
y_plot_b = res_b.sol(x_plot)[0]
plt.plot(L_a*x_plot, y_plot_a, label='L=%.8f'%L_a)
plt.plot(L_b*x_plot, y_plot_b, label='L=%.8f'%L_b)
plt.legend()
plt.xlabel("t")
plt.ylabel("y")
plt.grid(); plt.show()
which produces
Trying different initial values for L finds other solutions on quite different scales, among them
L=0.03195111
L=0.05256775
L=0.05846539
L=0.06888907
L=0.08231966
L=4.50411522
L=6.84868060
L=20.01725616
L=22.53189063