scipy gaussian_kde and circular data - scipy

I am using scipys gaussian_kde to get probability density of some bimodal data. However, as my data is angular (it's directions in degrees) I have a problem when values occur near the limits. The code below gives two example kde's, when the domain is 0-360 it under estimates as it cannot deal with the circular nature of the data. The pdf needs to be defined on the unit circle but I can't find anything in scipy.stats suitable to this type of data (von mises distribution is there but only works for unimodal data). Has anyone out there ran into this one before? Is there anything (preferable python based) available to estimate bimodal pdf's on the unit circle?
import numpy as np
import scipy as sp
from pylab import plot,figure,subplot,show,hist
from scipy import stats
baz = np.array([-92.29061004, -85.42607874, -85.42607874, -70.01689348,
-63.43494882, -63.43494882, -70.01689348, -70.01689348,
-59.93141718, -63.43494882, -59.93141718, -63.43494882,
-63.43494882, -63.43494882, -57.52880771, -53.61564818,
-57.52880771, -63.43494882, -63.43494882, -92.29061004,
-16.92751306, -99.09027692, -99.09027692, -16.92751306,
-99.09027692, -16.92751306, -9.86580694, -8.74616226,
-9.86580694, -8.74616226, -8.74616226, -2.20259816,
-2.20259816, -2.20259816, -9.86580694, -2.20259816,
-2.48955292, -2.48955292, -2.48955292, -2.48955292,
4.96974073, 4.96974073, 4.96974073, 4.96974073,
-2.48955292, -2.48955292, -2.48955292, -2.48955292,
-2.48955292, -9.86580694, -9.86580694, -9.86580694,
-16.92751306, -19.29004622, -19.29004622, -26.56505118,
-19.29004622, -19.29004622, -19.29004622, -19.29004622])
xx = np.linspace(-180, 180, 181)
scipy_kde = stats.gaussian_kde(baz)
print scipy_kde.integrate_box_1d(-180,180)
figure()
plot(xx, scipy_kde(xx), c='green')
baz[baz<0] += 360
xx = np.linspace(0, 360, 181)
scipy_kde = stats.gaussian_kde(baz)
print scipy_kde.integrate_box_1d(-180,180)
plot(xx, scipy_kde(xx), c='red')

Dave's answer isn't correct, because scipy's vonmises doesn't wrap around [-pi, pi].
Instead you can use the following code, which is based on the same principle. It is based on the equations described in numpy.
def vonmises_kde(data, kappa, n_bins=100):
from scipy.special import i0
bins = np.linspace(-np.pi, np.pi, n_bins)
x = np.linspace(-np.pi, np.pi, n_bins)
# integrate vonmises kernels
kde = np.exp(kappa*np.cos(x[:, None]-data[None, :])).sum(1)/(2*np.pi*i0(kappa))
kde /= np.trapz(kde, x=bins)
return bins, kde
Here is an example
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import vonmises
# generate complex circular distribution
data = np.r_[vonmises(-1, 5, 1000), vonmises(2, 10, 500), vonmises(3, 20, 100)]
# plot data histogram
fig, axes = plt.subplots(2, 1)
axes[0].hist(data, 100)
# plot kernel density estimates
x, kde = vonmises_kde(data, 20)
axes[1].plot(x, kde)
Histogram and kernel density plots

Here is a fast approximation to #kingjr's more exact answer:
def vonmises_pdf(x, mu, kappa):
return np.exp(kappa * np.cos(x - mu)) / (2. * np.pi * scipy.special.i0(kappa))
def vonmises_fft_kde(data, kappa, n_bins):
bins = np.linspace(-np.pi, np.pi, n_bins + 1, endpoint=True)
hist_n, bin_edges = np.histogram(data, bins=bins)
bin_centers = np.mean([bin_edges[1:], bin_edges[:-1]], axis=0)
kernel = vonmises_pdf(
x=bin_centers,
mu=0,
kappa=kappa
)
kde = np.fft.fftshift(np.fft.irfft(np.fft.rfft(kernel) * np.fft.rfft(hist_n)))
kde /= np.trapz(kde, x=bin_centers)
return bin_centers, kde
Test (using tqdm for progress bar and timing, and matplotlib to verify results):
import numpy as np
from tqdm import tqdm
import scipy.stats
import matplotlib.pyplot as plt
n_runs = 1000
n_bins = 100
kappa = 10
for _ in tqdm(xrange(n_runs)):
bins1, kde1 = vonmises_kde(
data=np.r_[
np.random.vonmises(-1, 5, 1000),
np.random.vonmises(2, 10, 500),
np.random.vonmises(3, 20, 100)
],
kappa=kappa,
n_bins=n_bins
)
for _ in tqdm(xrange(n_runs)):
bins2, kde2 = vonmises_fft_kde(
data=np.r_[
np.random.vonmises(-1, 5, 1000),
np.random.vonmises(2, 10, 500),
np.random.vonmises(3, 20, 100)
],
kappa=kappa,
n_bins=n_bins
)
plt.figure()
plt.plot(bins1, kde1, label="kingjr's solution")
plt.plot(bins2, kde2, label="dolf's FFT solution")
plt.legend()
plt.show()
Results:
100%|██████████| 1000/1000 [00:07<00:00, 135.29it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1945.14it/s]
(1945 / 135 = 14 times faster)
For even more speed, use an integer power of 2 as the number of bins. It also scales better (i.e. it stays fast with many bins and lots of data). On my PC it's 118 times faster than the original answer with n_bins=1024.
Why does it work?
The product of the FFTs of two signals (without zero-padding) is equal to the circular (or cyclic) convolution of the two signals. A kernel density estimation is basically a kernel convolved with a signal that has an impulse at the position of each data point.
Why is it not exact?
Since I use a histogram to space the data evenly, I lose the exact position of each sample, and only use the centre of the bin to which it belongs. The number of samples in each bin is used as the magnitude of the impulse at that point. For example: Ignoring the normalisation for a moment, if you have a bin from 0 to 1, and two samples in that bin, at 0.1 and at 0.2, the exact KDE will be the kernel centred around 0.1 + the kernel centred around 0.2. The approximation will be 2x `the kernel centred around 0.5, which is the centre of the bin.

So I have what I think is a reasonable solution. Basically I use Von Mises distribution as a basis function for a kernel density estimate. Code is below in case it is useful for anyone else.
def vonmises_KDE(data, kappa, plot=None):
"""
Create a kernal densisity estimate of circular data using the von mises
distribution as the basis function.
"""
# imports
from scipy.stats import vonmises
from scipy.interpolate import interp1d
# convert to radians
data = np.radians(data)
# set limits for von mises
vonmises.a = -np.pi
vonmises.b = np.pi
x_data = np.linspace(-np.pi, np.pi, 100)
kernels = []
for d in data:
# Make the basis function as a von mises PDF
kernel = vonmises(kappa, loc=d)
kernel = kernel.pdf(x_data)
kernels.append(kernel)
if plot:
# For plotting
kernel /= kernel.max()
kernel *= .2
plt.plot(x_data, kernel, "grey", alpha=.5)
vonmises_kde = np.sum(kernels, axis=0)
vonmises_kde = vonmises_kde / np.trapz(vonmises_kde, x=x_data)
f = interp1d( x_data, vonmises_kde )
if plot:
plt.plot(x_data, vonmises_kde, c='red')
return x_data, vonmises_kde, f
baz = np.array([-92.29061004, -85.42607874, -85.42607874, -70.01689348,
-63.43494882, -63.43494882, -70.01689348, -70.01689348,
-59.93141718, -63.43494882, -59.93141718, -63.43494882,
-63.43494882, -63.43494882, -57.52880771, -53.61564818,
-57.52880771, -63.43494882, -63.43494882, -92.29061004,
-16.92751306, -99.09027692, -99.09027692, -16.92751306,
-99.09027692, -16.92751306, -9.86580694, -8.74616226,
-9.86580694, -8.74616226, -8.74616226, -2.20259816,
-2.20259816, -2.20259816, -9.86580694, -2.20259816,
-2.48955292, -2.48955292, -2.48955292, -2.48955292,
4.96974073, 4.96974073, 4.96974073, 4.96974073,
-2.48955292, -2.48955292, -2.48955292, -2.48955292,
-2.48955292, -9.86580694, -9.86580694, -9.86580694,
-16.92751306, -19.29004622, -19.29004622, -26.56505118,
-19.29004622, -19.29004622, -19.29004622, -19.29004622])
kappa = 12
x_data, vonmises_kde, f = vonmises_KDE(baz, kappa, plot=1)

Related

Kmeans, GMM or any cluster algorithm but you can set a minimum number of datapoints any cluster should have?

Is there any clustering algorithm or method out there that you can set the minimum and maximum number of data points any cluster should have? Thank you!
I have never heard of a minimum number, other than 1 or 2, perhaps. You can certainly get an 'optimal' number of clusters, using an Elbow Curve method. Here is a great example, using data from the stock market.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.optimize as sco
import datetime as dt
import math
from pylab import plot,show
from numpy import vstack,array
from numpy.random import rand
from math import sqrt
from numpy import vstack,array
from datetime import datetime, timedelta
from pandas_datareader import data as wb
from scipy.cluster.vq import kmeans,vq
np.random.seed(777)
start = '2020-1-01'
end = '2020-3-27'
tickers = ['MMM',
'ABT',
'ABBV',
'ABMD',
'ACN',
'ATVI',
'ADBE',
'AMD',
'AAP',
'AES',
'AMG',
'AFL',
'A',
'APD',
'AKAM',
'ALK',
'ALB',
'ARE',
'ALXN',
'ALGN',
'ALLE',
'AGN',
'ADS',
'ALL',
'GOOGL',
'GOOG',
'MO',
'AMZN',
'AEP',
'AXP',
'AIG',
'AMT',
'AWK',
'AMP',
'ABC',
'AME',
'AMGN',
'APH',
'ADI',
'ANSS',
'ANTM',
'AON',
'AOS',
'APA',
'AIV',
'AAPL',
'AMAT',
'APTV',
'ADM',
'ARNC',
'ADSK',
'ADP',
'AZO',
'AVB',
'AVY',
'ZBH',
'ZION',
'ZTS']
thelen = len(tickers)
price_data = []
for ticker in tickers:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Adj Close']])
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
pd.set_option('display.max_columns', 500)
df = df.reset_index()
df = df.set_index('Date')
table = df.pivot(columns='ticker')
# By specifying col[1] in below list comprehension
# You can select the stock names under multi-level column
table.columns = [col[1] for col in table.columns]
table.head()
#Calculate average annual percentage return and volatilities over a theoretical one year period
returns = table.pct_change().mean() * 252
returns = pd.DataFrame(returns)
returns.columns = ['Returns']
returns['Volatility'] = table.pct_change().std() * math.sqrt(252)
#format the data as a numpy array to feed into the K-Means algorithm
data = np.asarray([np.asarray(returns['Returns']),np.asarray(returns['Volatility'])]).T
X = data
distorsions = []
for k in range(2, 20):
k_means = kmeans(n_clusters=k)
k_means.fit(X)
distorsions.append(k_means.inertia_)
fig = plt.figure(figsize=(15, 5))
plt.plot(range(2, 20), distorsions)
plt.grid(True)
plt.title('Elbow curve')
[![enter image description here][1]][1]
# computing K-Means with K = 5 (5 clusters)
# computing K-Means with K = 5 (5 clusters)
centroids,_ = KMeans(data,15)
# assign each sample to a cluster
idx,_ = vq(data,centroids)
kmeans = KMeans(n_clusters=15)
kmeans.fit(data)
y_kmeans = kmeans.predict(data)
viridis = cm.get_cmap('viridis', 15)
for i in range(0, len(data)):
plt.scatter(data[i,0], data[i,1], c=viridis(y_kmeans[i]), s= 50)
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.5)
You can do a Google search and find all kinds of info on this concept. Here is one link to get you started.
https://blog.cambridgespark.com/how-to-determine-the-optimal-number-of-clusters-for-k-means-clustering-14f27070048f
As an aside, you can experiment with Affinity Propogation, which will automatically pick an optimized number of centroids for you.
from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from sklearn.datasets import make_blobs
# #############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,
random_state=0)
# #############################################################################
# Compute Affinity Propagation
af = AffinityPropagation(preference=-50).fit(X)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels, metric='sqeuclidean'))
# #############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle
plt.close('all')
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
class_members = labels == k
cluster_center = X[cluster_centers_indices[k]]
plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
for x in X[class_members]:
plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
https://scikit-learn.org/stable/auto_examples/cluster/plot_affinity_propagation.html#sphx-glr-auto-examples-cluster-plot-affinity-propagation-py
Or, consider using Mean Shift.
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets import make_blobs
# #############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1], [1, -1], [1, -1]]
X, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.2)
# #############################################################################
# Compute clustering with MeanShift
# The following bandwidth can be automatically detected using
bandwidth = estimate_bandwidth(X, quantile=0.6, n_samples=5000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
# #############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
my_members = labels == k
cluster_center = cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
https://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#sphx-glr-auto-examples-cluster-plot-mean-shift-py
I can't think of anything else that could be relevant. Maybe someone else will jump in here and offer an alternative idea.

How to do nonlinear data-fitting a function on the experiment data

I have some experiment data. Hereby, I need to fit the following function to determine one of the variable. A Levenberg–Marquardt least-squares algorithm was used in this procedure.
I have used curve fitting option in Igor Pro software. I defined new fit function and tried to define independent and dependent variable.
Nevertheless, I don't know what is the reason that I got the this error:
"The fitting function returned INF for at least one X variable"
My function is :
sin(theta) = -1+2*sqrt(alpha/x)*exp(-beta*(x-alpha)^2)
beta = 1.135e-4;
sin(theta) = [-0.81704 -0.67649 -0.83137 -0.73468 -0.66744 -0.43602 0.45368 0.75802 0.96705 0.99717 ]
x = [72.01 59.99 51.13 45.53 36.15 31.66 30.16 29.01 25.62 23.47 ]
Is there any suggestion to find alpha variable here?
Is there any handy software or program for nonlinear curve fitting?
In gnuplot, it would look like this. The fit is not great, but that's not the "fault" of gnuplot, but apparently this data cannot be fitted with this function very well.
Code:
### nonlinear curve fitting
reset session
$Data <<EOD
72.01 -0.81704
59.99 -0.67649
51.13 -0.83137
45.53 -0.73468
36.15 -0.66744
31.66 -0.43602
30.16 0.45368
29.01 0.75802
25.62 0.96705
23.47 0.99717
EOD
f(x) = -1+2*sqrt(alpha/x)*exp(-beta*(x-alpha)**2)
# initial guessed values
alpha = 25
beta = 1
set fit nolog results
fit f(x) $Data u 1:2 via alpha,beta
plot $Data u 1:2 w lp pt 7, \
f(x) lc rgb "red"
print sprintf("alpha=%g, beta=%g",alpha,beta)
### end of code
Result:
alpha=25.818, beta=0.0195229
If it might be of some use, my equation search on your data turned up a good fit to a standard 4-parameter logistic equation "y = d + (a - d) / (1.0 + pow(x / c, b))" with parameters a = 0.96207949, b = 44.14292256, c = 30.67324939, and d = -0.74830947 yielding RMSE = 0.0565 and R-squared = 0.9943, and I have included code for a Python graphical fitter using this equation.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
theta = [-0.81704, -0.67649, -0.83137, -0.73468, -0.66744, -0.43602, 0.45368, 0.75802, 0.96705, 0.99717]
x = [72.01, 59.99, 51.13, 45.53, 36.15, 31.66, 30.16, 29.01, 25.62, 23.47]
# rename to match previous example code
xData = numpy.array(x)
yData = numpy.array(theta)
# StandardLogistic4Parameter equation from zunzun.com
def func(x, a, b, c, d):
return d + (a - d) / (1.0 + numpy.power(x / c, b))
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Matlab
I slightly changed the function, -1 changed to -gamma and optimize to find gamma
The code is as follow
ydata = [-0.81704 -0.67649 -0.83137 -0.73468 -0.66744 -0.43602 0.45368...
0.75802 0.96705 0.99717 ];
xdata = [72.01 59.99 51.13 45.53 36.15 31.66 30.16 29.01 25.62 23.47 ];
sin_theta = #(alpha, beta, gamma, xdata) -gamma+2.*sqrt(alpha./xdata).*exp(beta.*(xdata-alpha).^2);
%Fitting function as function of array(x) required by lsqcurvefit
f = #(x,xdata) sin_theta(x(1),x(2), x(3),xdata);
% [alpha, beta, gamma]
x0 = [25, 0, 1] ;
options = optimoptions('lsqcurvefit','Algorithm','levenberg-marquardt', 'FunctionTolerance', 1e-30);
[x,resnorm,residual,exitflag,output] = lsqcurvefit(f,x0,xdata,ydata,[], [], options);
% Accuracy
RMSE = sqrt(sum(residual.^2)/length(residual));
alpha = x(1); beta = x(2); gamma = x(3);
%Plotting data
data = linspace(xdata(1),xdata(end));
plot(xdata,ydata,'ro',data,f(x,data),'b-', 'linewidth', 3)
legend('Data','Fitted exponential')
title('Data and Fitted Curve')
set(gca,'FontSize',20)
Result
alpha = 26.0582, beta = -0.0329, gamma = 0.7881 instead of 1, RMSE = 0.1498
Graph

the model learned, but cannot predict?

I follow the tutorial to write some linear regression code about boston price, it worked very well and the loss became smaller, when I wanted to paint the graph in matplotlib, I found the graph not showed as what in my mind.
I searched,but could not solve my question.
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
if __name__ == '__main__':
boston = load_boston()
col_names = ['feature_{}'.format(i) for i in range(boston['data'].shape[1])]
df_full = pd.DataFrame(boston['data'], columns=col_names)
scalers_dict = {}
for col in col_names:
scaler = StandardScaler()
df_full[col] = scaler.fit_transform(df_full[col].values.reshape(-1, 1))
scalers_dict[col] = scaler
x_train, x_test, y_train, y_test = train_test_split(df_full.values, boston['target'], test_size=0.2, random_state=2)
model = torch.nn.Sequential(torch.nn.Linear(x_train.shape[1], 1), torch.nn.ReLU())
criterion = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
n_epochs = 2000
train_loss = []
test_loss = []
x_train = Variable(torch.from_numpy(x_train).float(), requires_grad=True)
y_train = Variable(torch.from_numpy(y_train).float())
for epoch in range(n_epochs):
y_hat = model(x_train)
loss = criterion(y_hat, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss = loss.data ** (1/2)
train_loss.append(epoch_loss)
if (epoch + 1) % 250 == 0:
print("{}:loss = {}".format(epoch + 1, epoch_loss))
order = y_train.argsort()
y_train = y_train[order]
x_train = x_train[order, :]
model.eval()
predicted = model(x_train).detach().numpy()
actual = y_train.numpy()
print('predicted:", predicted[:5].flatten(), actual[:5])
plt.plot(predicted.flatten(), 'r-', label='predicted')
plt.plot(actual, 'g-', label='actual')
plt.show()
why the predict were the same result like [22.4413, 22.4413, ...],
in the picture, it's a horizontal line.
I'm a very beginner to deeplearning, thank you very much for your help!

TensorFlow restoring from NN does not work

I am struggling with restoring values from NN in tensorflow. I tried to follow the examples on net, and here is my code:
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
np.random.seed(1000) # for repro
function_to_learn = lambda x: np.sin(x) + 0.1*np.random.randn(*x.shape)
NUM_HIDDEN_NODES = 2
NUM_EXAMPLES = 1000
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 100
NUM_EPOCHS = 500
all_x = np.float32(np.random.uniform(-2*math.pi, 2*math.pi, (1, NUM_EXAMPLES))).T
np.random.shuffle(all_x)
train_size = int(NUM_EXAMPLES*TRAIN_SPLIT)
trainx = all_x[:train_size]
validx = all_x[train_size:]
trainy = function_to_learn(trainx)
validy = function_to_learn(validx)
plt.figure()
plt.scatter(trainx, trainy, c='green', label='train')
plt.scatter(validx, validy, c='red', label='validation')
plt.legend()
X = tf.placeholder(tf.float32, [None, 1], name="X")
Y = tf.placeholder(tf.float32, [None, 1], name="Y")
w_h = tf.Variable(tf.zeros([1, NUM_HIDDEN_NODES],name="w_h"))
b_h = tf.Variable(tf.zeros([1, NUM_HIDDEN_NODES],name="b_h"))
w_o = tf.Variable(tf.zeros([NUM_HIDDEN_NODES,1],name="w_o"))
b_o = tf.Variable(tf.zeros([1, 1],name="b_o"))
def init_weights(shape, init_method='xavier', xavier_params = (None, None)):
if init_method == 'zeros':
return tf.Variable(tf.zeros(shape, dtype=tf.float32))
elif init_method == 'uniform':
return tf.Variable(tf.random_normal(shape, stddev=0.01, dtype=tf.float32))
def model(X, num_hidden = NUM_HIDDEN_NODES):
w_h = init_weights([1, num_hidden], 'uniform' )
b_h = init_weights([1, num_hidden], 'zeros')
h = tf.nn.sigmoid(tf.matmul(X, w_h) + b_h)
w_o = init_weights([num_hidden, 1], 'xavier', xavier_params=(num_hidden, 1))
b_o = init_weights([1, 1], 'zeros')
return tf.matmul(h, w_o) + b_o
yhat = model(X, NUM_HIDDEN_NODES)
train_op = tf.train.AdamOptimizer().minimize(tf.nn.l2_loss(yhat - Y))
plt.figure()
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for v in tf.all_variables():
print v.name
saver = tf.train.Saver()
errors = []
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for i in range(NUM_EPOCHS):
for start, end in zip(range(0, len(trainx), MINI_BATCH_SIZE), range(MINI_BATCH_SIZE, len(trainx), MINI_BATCH_SIZE)):
sess.run(train_op, feed_dict={X: trainx[start:end], Y: trainy[start:end]})
mse = sess.run(tf.nn.l2_loss(yhat - validy), feed_dict={X:validx})
errors.append(mse)
if i%100 == 0:
print "epoch %d, validation MSE %g" % (i, mse)
print sess.run(w_h)
saver.save(sess,"/Python/tensorflow/res/save_net.ckpt", global_step = i)
print " ******* AFTR *******"
for v in tf.all_variables():
print v.name
plt.plot(errors)
plt.xlabel('#epochs')
plt.ylabel('MSE')
******* to get the restore values, I tried:**
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
NUM_HIDDEN_NODES = 2
#SECOND PART TO GET THE STORED VALUES
w_h = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(1, NUM_HIDDEN_NODES), dtype=tf.float32, name='w_h')
b_h = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(1, NUM_HIDDEN_NODES), dtype=tf.float32, name='b_h')
w_o = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(NUM_HIDDEN_NODES, 1), dtype=tf.float32, name='w_o')
b_o = tf.Variable(np.arange(1).reshape(1, 1), dtype=tf.float32, name='b_o')
saver = tf.train.Saver()
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state("/Python/tensorflow/res/")
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, "/Python/tensorflow/res/save_net.ckpt-400")
print "Model loaded"
else:
print "No checkpoint file found"
print("weights:", sess.run(w_h))
print("biases:", sess.run(b_h))
Your help is greatly appreciated and I am almost giving up on this.
Thanks a lot again
It seems the checkpoint file you want to restore your variables from is different from the current variable/shape of existing code.
Save: (if substitute it with constants from definitions above)
w_h = tf.Variable(tf.zeros([1, 5],name="w_h"))
b_h = tf.Variable(tf.zeros([1, 5],name="b_h"))
w_o = tf.Variable(tf.zeros([5,1],name="w_o"))
b_o = tf.Variable(tf.zeros([1, 1],name="b_o"))
Restore:
w_h = tf.Variable(np.arange(10).reshape(1, 10), dtype=tf.float32, name='w_h')
b_h = tf.Variable(np.arange(10).reshape(1, 10), dtype=tf.float32, name='b_h')
w_o = tf.Variable(np.arange(10).reshape(10, 1), dtype=tf.float32, name='w_o')
b_o = tf.Variable(np.arange(1).reshape(1, 1), dtype=tf.float32, name='b_o')
To prevent these types of problems, try to use functions for training and inference so all your code will same variables and constants.
You are creating two sets of weights, once globally and second time when you call init_weights. The second set of variables is the one that's getting optimized, but both sets are saved.
In your eval code, you are creating this set of variables once, so your restore only restores the first set, which has not been modified after initialization.
The solution is to either factor out model creation code so that exactly same graph is created during training and during eval, or to use meta_graph which will recreate graph structure during restore.

Plot a plane based on a normal vector and a point in Matlab or matplotlib

How would one go plotting a plane in matlab or matplotlib from a normal vector and a point?
For all the copy/pasters out there, here is similar code for Python using matplotlib:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
point = np.array([1, 2, 3])
normal = np.array([1, 1, 2])
# a plane is a*x+b*y+c*z+d=0
# [a,b,c] is the normal. Thus, we have to calculate
# d and we're set
d = -point.dot(normal)
# create x,y
xx, yy = np.meshgrid(range(10), range(10))
# calculate corresponding z
z = (-normal[0] * xx - normal[1] * yy - d) * 1. /normal[2]
# plot the surface
plt3d = plt.figure().gca(projection='3d')
plt3d.plot_surface(xx, yy, z)
plt.show()
For Matlab:
point = [1,2,3];
normal = [1,1,2];
%# a plane is a*x+b*y+c*z+d=0
%# [a,b,c] is the normal. Thus, we have to calculate
%# d and we're set
d = -point*normal'; %'# dot product for less typing
%# create x,y
[xx,yy]=ndgrid(1:10,1:10);
%# calculate corresponding z
z = (-normal(1)*xx - normal(2)*yy - d)/normal(3);
%# plot the surface
figure
surf(xx,yy,z)
Note: this solution only works as long as normal(3) is not 0. If the plane is parallel to the z-axis, you can rotate the dimensions to keep the same approach:
z = (-normal(3)*xx - normal(1)*yy - d)/normal(2); %% assuming normal(3)==0 and normal(2)~=0
%% plot the surface
figure
surf(xx,yy,z)
%% label the axis to avoid confusion
xlabel('z')
ylabel('x')
zlabel('y')
For copy-pasters wanting a gradient on the surface:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import numpy as np
import matplotlib.pyplot as plt
point = np.array([1, 2, 3])
normal = np.array([1, 1, 2])
# a plane is a*x+b*y+c*z+d=0
# [a,b,c] is the normal. Thus, we have to calculate
# d and we're set
d = -point.dot(normal)
# create x,y
xx, yy = np.meshgrid(range(10), range(10))
# calculate corresponding z
z = (-normal[0] * xx - normal[1] * yy - d) * 1. / normal[2]
# plot the surface
plt3d = plt.figure().gca(projection='3d')
Gx, Gy = np.gradient(xx * yy) # gradients with respect to x and y
G = (Gx ** 2 + Gy ** 2) ** .5 # gradient magnitude
N = G / G.max() # normalize 0..1
plt3d.plot_surface(xx, yy, z, rstride=1, cstride=1,
facecolors=cm.jet(N),
linewidth=0, antialiased=False, shade=False
)
plt.show()
The above answers are good enough. One thing to mention is, they are using the same method that calculate the z value for given (x,y). The draw back comes that they meshgrid the plane and the plane in space may vary (only keeping its projection the same). For example, you cannot get a square in 3D space (but a distorted one).
To avoid this, there is a different way by using the rotation. If you first generate data in x-y plane (can be any shape), then rotate it by equal amount ([0 0 1] to your vector) , then you will get what you want. Simply run below code for your reference.
point = [1,2,3];
normal = [1,2,2];
t=(0:10:360)';
circle0=[cosd(t) sind(t) zeros(length(t),1)];
r=vrrotvec2mat(vrrotvec([0 0 1],normal));
circle=circle0*r'+repmat(point,length(circle0),1);
patch(circle(:,1),circle(:,2),circle(:,3),.5);
axis square; grid on;
%add line
line=[point;point+normr(normal)]
hold on;plot3(line(:,1),line(:,2),line(:,3),'LineWidth',5)
It get a circle in 3D:
A cleaner Python example that also works for tricky $z,y,z$ situations,
from mpl_toolkits.mplot3d import axes3d
from matplotlib.patches import Circle, PathPatch
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
from mpl_toolkits.mplot3d import art3d
import numpy as np
def plot_vector(fig, orig, v, color='blue'):
ax = fig.gca(projection='3d')
orig = np.array(orig); v=np.array(v)
ax.quiver(orig[0], orig[1], orig[2], v[0], v[1], v[2],color=color)
ax.set_xlim(0,10);ax.set_ylim(0,10);ax.set_zlim(0,10)
ax = fig.gca(projection='3d')
return fig
def rotation_matrix(d):
sin_angle = np.linalg.norm(d)
if sin_angle == 0:return np.identity(3)
d /= sin_angle
eye = np.eye(3)
ddt = np.outer(d, d)
skew = np.array([[ 0, d[2], -d[1]],
[-d[2], 0, d[0]],
[d[1], -d[0], 0]], dtype=np.float64)
M = ddt + np.sqrt(1 - sin_angle**2) * (eye - ddt) + sin_angle * skew
return M
def pathpatch_2d_to_3d(pathpatch, z, normal):
if type(normal) is str: #Translate strings to normal vectors
index = "xyz".index(normal)
normal = np.roll((1.0,0,0), index)
normal /= np.linalg.norm(normal) #Make sure the vector is normalised
path = pathpatch.get_path() #Get the path and the associated transform
trans = pathpatch.get_patch_transform()
path = trans.transform_path(path) #Apply the transform
pathpatch.__class__ = art3d.PathPatch3D #Change the class
pathpatch._code3d = path.codes #Copy the codes
pathpatch._facecolor3d = pathpatch.get_facecolor #Get the face color
verts = path.vertices #Get the vertices in 2D
d = np.cross(normal, (0, 0, 1)) #Obtain the rotation vector
M = rotation_matrix(d) #Get the rotation matrix
pathpatch._segment3d = np.array([np.dot(M, (x, y, 0)) + (0, 0, z) for x, y in verts])
def pathpatch_translate(pathpatch, delta):
pathpatch._segment3d += delta
def plot_plane(ax, point, normal, size=10, color='y'):
p = Circle((0, 0), size, facecolor = color, alpha = .2)
ax.add_patch(p)
pathpatch_2d_to_3d(p, z=0, normal=normal)
pathpatch_translate(p, (point[0], point[1], point[2]))
o = np.array([5,5,5])
v = np.array([3,3,3])
n = [0.5, 0.5, 0.5]
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.gca(projection='3d')
plot_plane(ax, o, n, size=3)
ax.set_xlim(0,10);ax.set_ylim(0,10);ax.set_zlim(0,10)
plt.show()