I am fairly new to python coding so I apologize in advance for my ignorance. I am trying to create a Dash App that drops outliers using standard deviation. The user selects a standard deviation using RadioItem inputs.
My question is what amendments do I need to make to my code so that the RadioItem value updates max_deviations using a callback?
Import packages, clean the data and define a query
import dash
import plotly.express as px
from dash import Dash, dcc, html, Input, Output, State
import pandas as pd
import numpy as np
app = dash.Dash(__name__)
server = app.server
df=pd.read_csv(r'C:\SVS_GIS\POWER BI\CSV_DATA\QSAS2021.csv', encoding='unicode_escape')
#SET DATE OF VALUATION
df['TIME'] = ((pd.to_datetime(df['Sale Date'], dayfirst=True)
.rsub(pd.to_datetime('01/10/2021', dayfirst=True))
.dt.days
)*-1)
df=df[df['TIME'] >= -365]
df = df.query("(SMA >=1 and SMA <= 3) and (LGA==60)")
prepare dataframe for dropping outliers
data = pd.DataFrame(data=df)
x = df.TIME
y = df.CHANGE
mean = np.mean(y)
standard_deviation = np.std(y)
distance_from_mean = abs(y - mean)
app layout
app.layout = html.Div([
html.Label("Standard Deviation Picker:", style={'fontSize':25, 'textAlign':'center'}),
html.Br(),
html.Label("1.0 = 68%, 2.0 = 95%, 3.0 = 99.7%", style={'fontSize':15,
'textAlign':'center'}),
html.Div(id="radio_items"),
dcc.RadioItems(
options=[{'label': i, 'value': i} for i in [1.0, 2.0, 3.0]],
value=2.0
),
html.Div([
dcc.Graph(id="the_graph")]
)])
callback
#app.callback(
Output("the_graph", "figure"),
Input("radio_items", 'value')
)
def update_graph(max_deviations):
not_outlier = distance_from_mean < max_deviations * standard_deviation
no_outliers = y[not_outlier]
trim_outliers = pd.DataFrame(data=no_outliers)
dff = pd.merge(trim_outliers, df, left_index=True, right_index=True)
return (dff)
fig = px.scatter(dff, x='TIME', y='CHANGE_y',
color ='SMA',
trendline='ols',
size='PV',
height=500,
width=800,
hover_name='SMA',
)
return dcc.Graph(id='the_graph', figure=fig)
if __name__ == '__main__':
app.run_server(debug=False)
Your dcc.RadioItems doesn't have an id prop. Add that, and make sure it matches the ID given in the callback, and you should be good.
Related
I am learning neural network modeling and its uses in time series prediction.
First, thank you for reading this post and for your help :)
On this page there are various NN models (LSTM, CNN etc.) for predicting "traffic volume":
https://michael-fuchs-python.netlify.app/2020/11/01/time-series-analysis-neural-networks-for-forecasting-univariate-variables/#train-validation-split
I got inspired and decided to use/shorten/adapt the code in there for a problem of my own: predicting the bitcoin price.
I have the bitcoin daily prices starting 1.1.2017
in total 2024 daily prices
I use the first 85% of the data for the training data, and the rest as the validation (except the last 10 observation, which I would like to use as test data to see how good my model is)
I would like to use a Feedforward model
My goal is merely having a code that runs.
I have managed so far to have most of my code run. However, I get a strange format for my test forecast results: It should be simply an array of 10 numbers (i.e. predicted prices corresponding to the 10 day at the end of my data). To my surprise what is printed out is a long list of numbers. I need help to find out what changes I need to make to make to the code to make it run.
Thank you for helping me :)
The code is pasted down there, followed by the error:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing #import MinMaxScaler
from sklearn import metrics #import mean_squared_error
import seaborn as sns
sns.set()
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Dense, Flatten
from keras.optimizers import Adam
from keras.models import Sequential
from keras.callbacks import EarlyStopping
tf.__version__
df = pd.read_csv('/content/BTC-USD.csv')
def mean_absolute_percentage_error_func(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def timeseries_evaluation_metrics_func(y_true, y_pred):
print('Evaluation metric results: ')
print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
print(f'MAPE is : {mean_absolute_percentage_error_func(y_true, y_pred)}')
print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')
def univariate_data_prep_func(dataset, start, end, window, horizon):
X = []
y = []
start = start + window
if end is None:
end = len(dataset) - horizon
for i in range(start, end):
indicesx = range(i-window, i)
X.append(np.reshape(dataset[indicesx], (window, 1)))
indicesy = range(i,i+horizon)
y.append(dataset[indicesy])
return np.array(X), np.array(y)
# Generating the test set
test_data = df['close'].tail(10)
df = df.drop(df['close'].tail(10).index)
df.shape
# Defining the target variable
uni_data = df['close']
uni_data.index = df['formatted_date']
uni_data.head()
#scaling
from sklearn import preprocessing
uni_data = uni_data.values
scaler_x = preprocessing.MinMaxScaler()
x_scaled = scaler_x.fit_transform(uni_data.reshape(-1, 1))
# Single Step Style (sss) modeling
univar_hist_window_sss = 50
horizon_sss = 1
# 2014 observations in total
# 2014*0.85=1710 should be part of the training (304 validation)
train_split_sss = 1710
x_train_uni_sss, y_train_uni_sss = univariate_data_prep_func(x_scaled, 0, train_split_sss,
univar_hist_window_sss, horizon_sss)
x_val_uni_sss, y_val_uni_sss = univariate_data_prep_func(x_scaled, train_split_sss, None,
univar_hist_window_sss, horizon_sss)
print ('Length of first Single Window:')
print (len(x_train_uni_sss[0]))
print()
print ('Target horizon:')
print (y_train_uni_sss[0])
BATCH_SIZE_sss = 32
BUFFER_SIZE_sss = 150
train_univariate_sss = tf.data.Dataset.from_tensor_slices((x_train_uni_sss, y_train_uni_sss))
train_univariate_sss = train_univariate_sss.cache().shuffle(BUFFER_SIZE_sss).batch(BATCH_SIZE_sss).repeat()
validation_univariate_sss = tf.data.Dataset.from_tensor_slices((x_val_uni_sss, y_val_uni_sss))
validation_univariate_sss = validation_univariate_sss.batch(BATCH_SIZE_sss).repeat()
n_steps_per_epoch = 55
n_validation_steps = 10
n_epochs = 100
#FFNN architecture
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(8, input_shape=x_train_uni_sss.shape[-2:]),
tf.keras.layers.Dense(units=horizon_sss)])
model.compile(loss='mse',
optimizer='adam')
#fit the model
model_path = '/content/FFNN_model_sss.h5'
keras_callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=0, patience=10,
verbose=1, mode='min'),
tf.keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss',
save_best_only=True,
mode='min', verbose=0)]
history = model.fit(train_univariate_sss, epochs=n_epochs, steps_per_epoch=n_steps_per_epoch,
validation_data=validation_univariate_sss, validation_steps=n_validation_steps, verbose =1,
callbacks = keras_callbacks)
#validation
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# Testing our model
trained_ffnn_model_sss = tf.keras.models.load_model(model_path)
df_temp = df['close']
test_horizon = df_temp.tail(univar_hist_window_sss)
test_history = test_horizon.values
result = []
# Define Forecast length here
window_len = len(test_data)
test_scaled = scaler_x.fit_transform(test_history.reshape(-1, 1))
for i in range(1, window_len+1):
test_scaled = test_scaled.reshape((1, test_scaled.shape[0], 1))
# Inserting the model
predicted_results = trained_ffnn_model_sss.predict(test_scaled)
print(f'predicted : {predicted_results}')
result.append(predicted_results[0])
test_scaled = np.append(test_scaled[:,1:],[[predicted_results]])
result_inv_trans = scaler_x.inverse_transform(result)
result_inv_trans
I believe the problem might have to do with the shapes of data. How exactly I do not yet know.
Data:
click here
Traceback:
click here
Im trying to create a simple bar chart and deploy it within a dropdown menu. My problem is with the axes. I do not know why when I use px.bar the deploy just take one variable (x or y) and make the plot, but when I use px.scatter it works, it takes both variables.
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
app = Dash(__name__)
pf = fr.iloc[2:10,18:25]
pf.columns = [prueba1]
pf.index = ['CH + CHL','CH','CHL(UST)','CHL','CL + CHL','CL','CHL(UST)','CHL']
app.layout = html.Div(children=[
html.Div([dcc.Dropdown(pf.index,'CH', id='pandas-dropdown-2'),
html.Div(id='pandas-output-container')]),
dcc.Graph(id='example-graph')
])
Output('example-graph','figure'),
Input('pandas-dropdown-2', 'value'))
def update_graph(value_input):
pff = pf[pf.index == value_input]
fig = px.bar(pff, x = np.array(prueba1), y = list(pff.values))#,
return fig
__name__ == '__main__':
app.run_server(debug=True, use_reloader=False)
The result is this one:
enter image description here
Why the Y label is count instead of years?
But when I change from px.bar to px.scatter this happens:
enter image description here
It works!!! But I need the bar chart!
I need to compute a numerical (triple) integral, but do not need very high precision on the value, and would therefore like to sacrifice some precision for speed when using nquad. I thought that I might be able to do this by increasing the epsrel and/or epsabs options, but they seem to have no effect. For example (note, this is just an example integrand - I don't actually need to compute this particular integral...):
import numpy as np
from scipy.integrate import nquad
def integrand(l, b, d, sigma=250):
x = d * np.cos(l) * np.cos(b)
y = d * np.sin(l) * np.cos(b)
z = d * np.sin(b)
return np.exp(-0.5 * z**2 / sigma**2) / np.sqrt(2*np.pi * sigma**2)
ranges = [
(0, 2*np.pi),
(0.5, np.pi/2),
(0, 1000.)
]
# No specification of `opts` - use the default epsrel and epsabs:
result1 = nquad(integrand, ranges=ranges, full_output=True)
# Set some `quad` opts:
result2 = nquad(integrand, ranges=ranges, full_output=True,
opts=dict(epsabs=1e-1, epsrel=0, limit=3))
Both outputs are identical:
>>> print(result1)
(4.252394424844468, 1.525272379143154e-12, {'neval': 9261})
>>> print(result2)
(4.252394424844468, 1.525272379143154e-12, {'neval': 9261})
A full example is included here: https://gist.github.com/adrn/b9aa92c236df011dbcdc131aa94ed9f9
Is this not the right approach, or is scipy.integrate ignoring my inputted opts?
From the scipy.integrate.nquad it is stated that opts can only be passed to quad as can be seen here:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.nquad.html
Example of application:
import numpy as np
from scipy.integrate import quad
def integrand(a, sigma=250):
x = 2 * np.sin(a) * np.cos(a)
return x
# No specification of `opts` - use the default epsrel and epsabs:
result1 = quad(integrand,0, 2*np.pi)
# Set some `quad` opts:
result2 = quad(integrand,0, 4*np.pi,epsabs=1e-6, epsrel=1e-6, limit=40)
returns:
result1: (-1.3690011097614755e-16, 4.4205541621600365e-14)
result2: (-1.7062635631484713e-15, 9.096805257467047e-14)
The reason nquad doesn't complain about the presence of options is because nquad includes quad, dbquad and tplquad.
I want to make a linear regression program which visualizes the data to user. I'm using EJML for calculations and ScalaFX for front end. Everything is going fine but when I plot the data using Scatter Chart, the line drawn from the data is set to be rectangles which cover up the original data points. I would like to know how I can change the size, shape and transparency etc. of the plotted points.
Almost all of guides around JavaFX say that I should modify the CSS file (which doesn't automatically exist) in order to style my chart. I don't know how to do that in ScalaFX or even that is it possible to do that way. My result of searching every possible tutorial has been fruitless.
import scalafx.application.JFXApp
import scalafx.scene.Scene
import scalafx.scene.chart.ScatterChart
import scalafx.collections.ObservableBuffer
import scalafx.scene.chart.NumberAxis
import scalafx.scene.chart.XYChart
import scalafx.scene.shape.Line
import org.ejml.simple.SimpleMatrix
import scala.math.pow
import scala.collection.mutable.Buffer
object Plotting extends JFXApp {
/*
* Below are some arbitrary x and y values for a regression line
*/
val xValues = Array(Array(1.0, 1.0, 1.0, 1.0, 1.0, 1.0), Array(14.0, 19.0, 22.0, 26.0, 31.0, 43.0))
val yValues = Array(Array(51.0, 57.0, 66.0, 71.0, 72.0, 84.0))
val temp = yValues.flatten
val wrapper = xValues(1).zip(temp)
/*
* In the lines before stage what happens is that matrices for the x and y values are created, coefficients
* for the regression line are calculated with matrix operations and (x, y) points are calculated for the
* regression line.
*/
val X = new SimpleMatrix(xValues).transpose
val Y = new SimpleMatrix(yValues).transpose
val secondX = new SimpleMatrix(xValues(0).size, 2)
for (i <- 0 until xValues(0).size) {
secondX.set(i, 0, xValues(0)(i))
secondX.set(i, 1, xValues(1)(i))
}
val invertedSecondX = secondX.pseudoInverse()
val B = invertedSecondX.mult(Y)
val graphPoints = Buffer[(Double, Double)]()
for (i <- 0 to xValues(1).max.toInt) {
graphPoints.append((i.toDouble, B.get(0, 0) + i * B.get(1, 0)))
}
stage = new JFXApp.PrimaryStage {
title = "Demo"
scene = new Scene(400, 400) {
val xAxis = NumberAxis()
val yAxis = NumberAxis()
val pData = XYChart.Series[Number, Number](
"Data",
ObservableBuffer(wrapper.map(z => XYChart.Data[Number, Number](z._1, z._2)): _*))
val graph = XYChart.Series[Number, Number](
"RegressionLine",
ObservableBuffer(graphPoints.map(z => XYChart.Data[Number, Number](z._1, z._2)): _*))
val plot = new ScatterChart(xAxis, yAxis, ObservableBuffer(graph, pData))
root = plot
}
}
}
This certainly isn't as well documented as it might be... :-(
Stylesheets are typically placed in your project's resource directory. If you're using SBT (recommended), this would be src/main/resources.
In this example, I've added a stylesheet called MyCharts.css to this directory with the following contents:
/* Blue semi-transparent 4-pointed star, using SVG path. */
.default-color0.chart-symbol {
-fx-background-color: blue;
-fx-scale-shape: true;
-fx-shape: "M 0.0 10.0 L 3.0 3.0 L 10.0 0.0 L 3.0 -3.0 L 0.0 -10.0 L -3.0 -3.0 L -10.0 0.0 L -3.0 3.0 Z ";
-fx-opacity: 0.5;
}
/* Default shape is a rectangle. Here, we round it to become a red circle with a white
* center. Change the radius to control the size.
*/
.default-color1.chart-symbol {
-fx-background-color: red, white;
-fx-background-insets: 0, 2;
-fx-background-radius: 3px;
-fx-padding: 3px;
}
color0 will be used for the first data series (the regression line), color1 for the second (your scatter data). All other series use the default, JavaFX style.
(For more information on using scalable vector graphics (SVG) paths to define custom shapes, refer to the relevant section of the SVG specification.)
To have this stylesheet used by ScalaFX (JavaFX), you have a choice of options. To have them apply globally, add it to the main scene (which is what I've done below). Alternatively, if each chart needs a different style, you can add different stylesheets to specific charts. (BTW, I also added the standard includes import as this prevents many JavaFX-ScalaFX element conversion issues; otherwise, I've made no changes to your sources.)
import scalafx.Includes._
import scalafx.application.JFXApp
import scalafx.scene.Scene
import scalafx.scene.chart.ScatterChart
import scalafx.collections.ObservableBuffer
import scalafx.scene.chart.NumberAxis
import scalafx.scene.chart.XYChart
import scalafx.scene.shape.Line
import org.ejml.simple.SimpleMatrix
import scala.math.pow
import scala.collection.mutable.Buffer
object Plotting extends JFXApp {
/*
* Below are some arbitrary x and y values for a regression line
*/
val xValues = Array(Array(1.0, 1.0, 1.0, 1.0, 1.0, 1.0), Array(14.0, 19.0, 22.0, 26.0, 31.0, 43.0))
val yValues = Array(Array(51.0, 57.0, 66.0, 71.0, 72.0, 84.0))
val temp = yValues.flatten
val wrapper = xValues(1).zip(temp)
/*
* In the lines before stage what happens is that matrices for the x and y values are created, coefficients
* for the regression line are calculated with matrix operations and (x, y) points are calculated for the
* regression line.
*/
val X = new SimpleMatrix(xValues).transpose
val Y = new SimpleMatrix(yValues).transpose
val secondX = new SimpleMatrix(xValues(0).size, 2)
for (i <- 0 until xValues(0).size) {
secondX.set(i, 0, xValues(0)(i))
secondX.set(i, 1, xValues(1)(i))
}
val invertedSecondX = secondX.pseudoInverse()
val B = invertedSecondX.mult(Y)
val graphPoints = Buffer[(Double, Double)]()
for (i <- 0 to xValues(1).max.toInt) {
graphPoints.append((i.toDouble, B.get(0, 0) + i * B.get(1, 0)))
}
stage = new JFXApp.PrimaryStage {
title = "Demo"
scene = new Scene(400, 400) {
// Add our stylesheet.
stylesheets.add("MyCharts.css")
val xAxis = NumberAxis()
val yAxis = NumberAxis()
val pData = XYChart.Series[Number, Number](
"Data",
ObservableBuffer(wrapper.map(z => XYChart.Data[Number, Number](z._1, z._2)): _*))
val graph = XYChart.Series[Number, Number](
"RegressionLine",
ObservableBuffer(graphPoints.map(z => XYChart.Data[Number, Number](z._1, z._2)): _*))
val plot = new ScatterChart(xAxis, yAxis, ObservableBuffer(graph, pData))
root = plot
}
}
}
For further information in the CSS formatting options available (changing shapes, colors, transparency, etc.) refer to the JavaFX CSS Reference Guide.
The result looks like this:
I almost don't dare to add somethig to Mike Allens solution (wich is very good, as always), but this did not work out for me because I could not get my scala to find and/or process the .css file.
I would have done it this way if possible, but I just could not get it to work.
Here is what I came up with:
Suppose I have some data to display:
val xyExampleData: ObservableBuffer[(Double, Double)] = ObservableBuffer(Seq(
1 -> 1,
2 -> 4,
3 -> 9))
Then I convert this to a Series for the LineChart:
val DataPoints = ObservableBuffer(xyExampleData map { case (x, y) => XYChart.Data[Number, Number](x, y) })
val PointsToDisplay = XYChart.Series[Number, Number]("Points", DataPoints)
now I put this again into a Buffer, maybe with some other data from different series.
val lineChartBuffer = ObservableBuffer(PointsToDisplay, ...)
and finally I create my lineChart, wich I call (with lack of creativity) lineChart:
val lineChart = new LineChart(xAxis, yAxis, lineChartBuffer) {...}
The lines between data points can be recoloured now easily with:
lineChart.lookup(".default-color0.chart-series-line").setStyle("-fx-stroke: blue;")
This will change the Line-colour of the FIRST Dataset in the LineChartBuffer.
If you want to change the Line-Properties for the second you call
lineChart.lookup(".default-color1.chart-series-line")...
There is also "-fx-stroke-width: 3px;" to set the with of the line.
"-fx-opacity: 0.1;"
"-fx-stroke-dash-array: 10;"
-fx-fill: blue;"
are also usefull, but dont call the above line repeatedly, because the second call will override the first.
Instead concatenate all the strings into one:
lineChart.lookup(".default-color0.chart-series-line").setStyle("-fx-stroke: blue;-fx-opacity: 0.1;-fx-stroke-dash-array: 10;-fx-fill: blue;")
Now for the formatting of the Symbols at each data-Point:
unfortunately there seems to be no other way than to format each Symbol seperately:
lineChart.lookupAll(".default-color0.chart-line-symbol").asScala foreach { node => node.setStyle("-fx-background-color: blue, white;") }
for this to run you need import scala.collection.JavaConverters._
for the conversion from a java set to a scala set.
One can also make all data-poins from only one data-set invisible, for example:
lineChart.lookupAll(".default-color1.chart-line-symbol").asScala foreach { node => node.setVisible(false) }
To say this is a nice solution would be exaggerated.
And it has the big disadvantage, that you have to recolour or reformat every Symbol after adding a new Datapoint to one of the series in LineChartBuffer. If you don't, the new Symbols will have standard colours and settings.
The Lines stay, ones they are recoloured, I can't say why.
But the good side of it, one can always reformat curves in a Line Chart afterwards like this!
I am trying to write udaf to calculate the percentile values.
I need to write the custom function because existing spark function percentile_approx, approx_percentile and percentile uses rounding differently than my need.
I need to use floor instead of midpoint rounding. Is there anyway I can write it in pyspark?
If not how to achieve this in scala?
I need to calculate the percentile using below method:
def percentile_custom(lst, per):
lst.sorted()
rank = (len(lst)+1)*per
ir = math.floor(rank)
ir1 = math.ceil(rank)
if (ir == ir1):
return lst[ir-1]
else:
fr = rank - ir
ir_qh = lst[ir-1]
ir_qh1 = lst[ir]
inter = ((ir_qh1 - ir_qh)*fr) + ir_qh
return math.floor(inter)
Below is the function for the same I have written in pyspark, let me know in case it didn't work out for you :
from pyspark.sql import Window
import math
import pyspark.sql.types as T
import pyspark.sql.functions as F
def calc_percentile(perc_df, part_col, order_col, p_val=[33,66], num_bins=100, max_bins = 100, perc_col="p_band"):
"""
Calculate percentile with nimber of bins on specified columns
"""
win = Window.partitionBy(*part_col).orderBy(order_col)
def perc_func(col, num, max_bins):
step = max_bins / num
return {(p_tile / step): int(
math.ceil(col * (p_tile / float(max_bins)))
) for p_tile in range(step, max_bins + step, step)}
perc_udf = F.udf(perc_func, T.MapType(T.IntegerType(), T.IntegerType()))
# perc_df.show()
rank_data = perc_df.filter(
F.col(order_col).isNotNull()
).withColumn(
"rank", F.dense_rank().over(win)
)
rank_data.persist()
rank_data.count()
overall_count_data = rank_data.groupBy(
*part_col
).agg(
F.max(
F.col("rank")
).alias("count")
).select(
F.explode(
perc_udf(F.col("count"), F.lit(num_bins), F.lit(max_bins))
).alias("n_tile", "rank"), "count",
*part_col
)
overall_count_data.persist()
overall_count_data.count()
return overall_count_data.join(
rank_data, part_col + ["rank"]
).withColumn(
perc_col,
F.concat(F.lit("P_"), F.col("n_tile").cast("string"))
).groupBy(
*part_col
).pivot(
perc_col, ["P_{0}".format(p_val1) for p_val1 in p_val]
).agg(
F.max(order_col)
).select(
*(
part_col + [F.col("P_{0}".format(p_val1)) for p_val1 in p_val]
)
)