Issue when adding new node in graph - networkx

Im having some issues while trying to add a new node to a graph (with OSMNX)
I need to calculate some distances on some areas that dont have nodes near.
Here is my code:
import networkx as nx
import osmnx as ox
from IPython.display import IFrame
import geopandas as gpd
from shapely.geometry import Point
my_dict = {
'001': {
'y': -31.640224888841907,
'x': -60.672566478771884,
'street_count': 1
}
}
tmp_list = []
for item_key, item_value in my_dict.items() :
tmp_list.append({
'geometry' : Point(item_value['x'], item_value['y']),
'osmid': item_key,
'y' : item_value['y'],
'x' : item_value['x'],
'street_count': item_value ['street_count']
})
my_nodes = gpd.GeoDataFrame(tmp_list)
G = ox.graph_from_place("Santa Fe, Santa Fe, Argentina", network_type="drive", buffer_dist=5000)
nodes= ox.graph_to_gdfs(G, nodes=True, edges=False)
edges= ox.graph_to_gdfs(G, edges=True, nodes=False)
nodes = nodes.append(my_nodes, ignore_index = True)
G2 = ox.graph_from_gdfs(nodes, edges)
m1 = ox.plot_graph_folium(G2, popup_attribute="name", weight=2, color="#8b0000")
dest = (-60.70916, -31.64553)
ori= (-60.66756, - 31.63719)
iniciocercano = ox.nearest_nodes(G2, ori[0], ori[1], return_dist=True)
finalcercano = ox.nearest_nodes(G2, dest[0], dest[1], return_dist=True)
pathDistance = nx.shortest_path_length(G2, iniciocercano[0], finalcercano[0], weight="length")
route = nx.shortest_path(G2, iniciocercano[0], finalcercano[0])
And the error that Im getting is: Input contains NaN.
I also notice that the original graph (G) has: 9423 nodes and 25013 edges. And the new graph (G2) has: 18847 nodes and 25013 which is pretty strange. Somehow the nodes are getting duplicate.
Thank you for your time.

Your my_nodes GeoDataFrame is not indexed by osmid like it needs to be, and like your nodes GeoDataFrame is.

Related

predicting time series: my python code prints out a (very long) list rather than a (small) array

I am learning neural network modeling and its uses in time series prediction.
First, thank you for reading this post and for your help :)
On this page there are various NN models (LSTM, CNN etc.) for predicting "traffic volume":
https://michael-fuchs-python.netlify.app/2020/11/01/time-series-analysis-neural-networks-for-forecasting-univariate-variables/#train-validation-split
I got inspired and decided to use/shorten/adapt the code in there for a problem of my own: predicting the bitcoin price.
I have the bitcoin daily prices starting 1.1.2017
in total 2024 daily prices
I use the first 85% of the data for the training data, and the rest as the validation (except the last 10 observation, which I would like to use as test data to see how good my model is)
I would like to use a Feedforward model
My goal is merely having a code that runs.
I have managed so far to have most of my code run. However, I get a strange format for my test forecast results: It should be simply an array of 10 numbers (i.e. predicted prices corresponding to the 10 day at the end of my data). To my surprise what is printed out is a long list of numbers. I need help to find out what changes I need to make to make to the code to make it run.
Thank you for helping me :)
The code is pasted down there, followed by the error:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing #import MinMaxScaler
from sklearn import metrics #import mean_squared_error
import seaborn as sns
sns.set()
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Dense, Flatten
from keras.optimizers import Adam
from keras.models import Sequential
from keras.callbacks import EarlyStopping
tf.__version__
df = pd.read_csv('/content/BTC-USD.csv')
def mean_absolute_percentage_error_func(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def timeseries_evaluation_metrics_func(y_true, y_pred):
print('Evaluation metric results: ')
print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
print(f'MAPE is : {mean_absolute_percentage_error_func(y_true, y_pred)}')
print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')
def univariate_data_prep_func(dataset, start, end, window, horizon):
X = []
y = []
start = start + window
if end is None:
end = len(dataset) - horizon
for i in range(start, end):
indicesx = range(i-window, i)
X.append(np.reshape(dataset[indicesx], (window, 1)))
indicesy = range(i,i+horizon)
y.append(dataset[indicesy])
return np.array(X), np.array(y)
# Generating the test set
test_data = df['close'].tail(10)
df = df.drop(df['close'].tail(10).index)
df.shape
# Defining the target variable
uni_data = df['close']
uni_data.index = df['formatted_date']
uni_data.head()
#scaling
from sklearn import preprocessing
uni_data = uni_data.values
scaler_x = preprocessing.MinMaxScaler()
x_scaled = scaler_x.fit_transform(uni_data.reshape(-1, 1))
# Single Step Style (sss) modeling
univar_hist_window_sss = 50
horizon_sss = 1
# 2014 observations in total
# 2014*0.85=1710 should be part of the training (304 validation)
train_split_sss = 1710
x_train_uni_sss, y_train_uni_sss = univariate_data_prep_func(x_scaled, 0, train_split_sss,
univar_hist_window_sss, horizon_sss)
x_val_uni_sss, y_val_uni_sss = univariate_data_prep_func(x_scaled, train_split_sss, None,
univar_hist_window_sss, horizon_sss)
print ('Length of first Single Window:')
print (len(x_train_uni_sss[0]))
print()
print ('Target horizon:')
print (y_train_uni_sss[0])
BATCH_SIZE_sss = 32
BUFFER_SIZE_sss = 150
train_univariate_sss = tf.data.Dataset.from_tensor_slices((x_train_uni_sss, y_train_uni_sss))
train_univariate_sss = train_univariate_sss.cache().shuffle(BUFFER_SIZE_sss).batch(BATCH_SIZE_sss).repeat()
validation_univariate_sss = tf.data.Dataset.from_tensor_slices((x_val_uni_sss, y_val_uni_sss))
validation_univariate_sss = validation_univariate_sss.batch(BATCH_SIZE_sss).repeat()
n_steps_per_epoch = 55
n_validation_steps = 10
n_epochs = 100
#FFNN architecture
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(8, input_shape=x_train_uni_sss.shape[-2:]),
tf.keras.layers.Dense(units=horizon_sss)])
model.compile(loss='mse',
optimizer='adam')
#fit the model
model_path = '/content/FFNN_model_sss.h5'
keras_callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=0, patience=10,
verbose=1, mode='min'),
tf.keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss',
save_best_only=True,
mode='min', verbose=0)]
history = model.fit(train_univariate_sss, epochs=n_epochs, steps_per_epoch=n_steps_per_epoch,
validation_data=validation_univariate_sss, validation_steps=n_validation_steps, verbose =1,
callbacks = keras_callbacks)
#validation
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# Testing our model
trained_ffnn_model_sss = tf.keras.models.load_model(model_path)
df_temp = df['close']
test_horizon = df_temp.tail(univar_hist_window_sss)
test_history = test_horizon.values
result = []
# Define Forecast length here
window_len = len(test_data)
test_scaled = scaler_x.fit_transform(test_history.reshape(-1, 1))
for i in range(1, window_len+1):
test_scaled = test_scaled.reshape((1, test_scaled.shape[0], 1))
# Inserting the model
predicted_results = trained_ffnn_model_sss.predict(test_scaled)
print(f'predicted : {predicted_results}')
result.append(predicted_results[0])
test_scaled = np.append(test_scaled[:,1:],[[predicted_results]])
result_inv_trans = scaler_x.inverse_transform(result)
result_inv_trans
I believe the problem might have to do with the shapes of data. How exactly I do not yet know.
Data:
click here
Traceback:
click here

Updating a Dash Callback using RadioItems

I am fairly new to python coding so I apologize in advance for my ignorance. I am trying to create a Dash App that drops outliers using standard deviation. The user selects a standard deviation using RadioItem inputs.
My question is what amendments do I need to make to my code so that the RadioItem value updates max_deviations using a callback?
Import packages, clean the data and define a query
import dash
import plotly.express as px
from dash import Dash, dcc, html, Input, Output, State
import pandas as pd
import numpy as np
app = dash.Dash(__name__)
server = app.server
df=pd.read_csv(r'C:\SVS_GIS\POWER BI\CSV_DATA\QSAS2021.csv', encoding='unicode_escape')
#SET DATE OF VALUATION
df['TIME'] = ((pd.to_datetime(df['Sale Date'], dayfirst=True)
.rsub(pd.to_datetime('01/10/2021', dayfirst=True))
.dt.days
)*-1)
df=df[df['TIME'] >= -365]
df = df.query("(SMA >=1 and SMA <= 3) and (LGA==60)")
prepare dataframe for dropping outliers
data = pd.DataFrame(data=df)
x = df.TIME
y = df.CHANGE
mean = np.mean(y)
standard_deviation = np.std(y)
distance_from_mean = abs(y - mean)
app layout
app.layout = html.Div([
html.Label("Standard Deviation Picker:", style={'fontSize':25, 'textAlign':'center'}),
html.Br(),
html.Label("1.0 = 68%, 2.0 = 95%, 3.0 = 99.7%", style={'fontSize':15,
'textAlign':'center'}),
html.Div(id="radio_items"),
dcc.RadioItems(
options=[{'label': i, 'value': i} for i in [1.0, 2.0, 3.0]],
value=2.0
),
html.Div([
dcc.Graph(id="the_graph")]
)])
callback
#app.callback(
Output("the_graph", "figure"),
Input("radio_items", 'value')
)
def update_graph(max_deviations):
not_outlier = distance_from_mean < max_deviations * standard_deviation
no_outliers = y[not_outlier]
trim_outliers = pd.DataFrame(data=no_outliers)
dff = pd.merge(trim_outliers, df, left_index=True, right_index=True)
return (dff)
fig = px.scatter(dff, x='TIME', y='CHANGE_y',
color ='SMA',
trendline='ols',
size='PV',
height=500,
width=800,
hover_name='SMA',
)
return dcc.Graph(id='the_graph', figure=fig)
if __name__ == '__main__':
app.run_server(debug=False)
Your dcc.RadioItems doesn't have an id prop. Add that, and make sure it matches the ID given in the callback, and you should be good.

How do I Plot multiple Isochrones Polygons using Python OSMnx library?

I'm trying to build isochrones for some schools, to understand the accessibility. My mode of travelling is "walk" and I wanted to create a list of at least 2 travel times, 30 minutes and 1 hour.
Here below is my code:
# import required libraries
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import osmnx as ox
from descartes import PolygonPatch
from shapely.geometry import Point, LineString, Polygon
from IPython.display import IFrame
import folium
import itertools # will be used to perform feature joining
# %matplotlib inline
ox.__version__
# function to get isochrones
def get_isochrone(lon, lat, walk_time=[30,60], speed=4.5):
loc = (lat, lon)
G = ox.graph_from_point(loc, simplify=True, network_type='walk')
# Create nodes geodataframe from Graph network (G)
gdf_nodes = ox.graph_to_gdfs(G, edges=False)
x, y = gdf_nodes['geometry'].unary_union.centroid.xy
center_node = ox.get_nearest_node(G, (y[0], x[0]))
meters_per_minute = speed * 1000 / 60 #km per hour to m per minute
for u, v, k, data in G.edges(data=True, keys=True):
data['time'] = data['length'] / meters_per_minute
# get one color for each isochrone
iso_colors = ox.plot.get_colors(n=len(walk_time), cmap="plasma", start=0, return_hex=True)
# color the nodes according to isochrone then plot the street network
node_colors = {}
for walks_time, color in zip(sorted(walk_time, reverse=True), iso_colors):
subgraph = nx.ego_graph(G, center_node, radius=walks_time, distance="time")
for node in subgraph.nodes():
node_colors[node] = color
nc = [node_colors[node] if node in node_colors else "none" for node in G.nodes()]
ns = [15 if node in node_colors else 0 for node in G.nodes()]
# make isochrone polygons
isochrone_polys = []
for trip_times in sorted(walk_time, reverse=True):
subgraph = nx.ego_graph(G, center_node, radius=trip_times, distance="time")
node_points = [Point(data['x'], data['y']) for node, data in subgraph.nodes(data=True)]
polys = gpd.GeoSeries(node_points).unary_union.convex_hull
isochrone_polys.append(polys)
# adding color
for polygon, fc in zip(isochrone_polys, iso_colors):
patch = PolygonPatch(polygon, fc=fc, ec="none", alpha=0.6, zorder=-1)
# isochrone_polys.add_patch(patch)
return isochrone_polys
When I call the get_isochrone function:
# calling the function with the coordinates
map = coordinates.apply(lambda x: get_isochrone(x.longitude, x.latitude), axis=1)
What is returned is a list of polygons with each school point having polygons equal to the number of items in the travel times list.
I however noticed that the polygons returned for each point are exactly the same.
Just to verify: This script shows that they are the same:
for i in map:
print(i[0])
print(i[1])
print('\n')
Here are the results:
As you can see, each school point returns two polygons with exactly the same coordinates, for different travel times. I have reviewed the python function several times, even changed the travel time to one item in the travel time list like 100, and I still get exactly the same coordinates. I may have missed something and I'll appreciate any help. Thanks!
In addition, I also realized that the add_patch() method in the function adds color to the plot_graph() only. How can I add the colors to the polygons in the Folium map?
it appears that you have used this example: https://github.com/gboeing/osmnx-examples/blob/main/notebooks/13-isolines-isochrones.ipynb
if all you want is polygons, then there is no need to do PolygonPatch
you have refactored incorrectly, for multiple walk times. You only need to generate edges once
pulling it all together:
source some schools
get_isochrone() refactored to your use case. Have changed do it returns a dict that has index and name of point / school being investigated.
generate a geopandas data frame of isochrones
visualise it
data sourcing
import osmnx as ox
import pandas as pd
import warnings
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=PendingDeprecationWarning)
ox.config(use_cache=True, log_console=False)
# get some cities
cities = ["Hereford"] # ["Hereford", "Worcester", "Gloucester"]
cities = ox.geocode_to_gdf([{"city": c, "country": "UK"} for c in cities])
# get some schools
tags = {"amenity": "school"}
schools = pd.concat(
[
ox.geometries.geometries_from_polygon(r["geometry"], tags)
for i, r in cities.iterrows()
]
)
schools = (
schools.loc["way"].dropna(axis=1, thresh=len(schools) / 4).drop(columns=["nodes"])
)
# change polygon to point
schools["geometry"] = schools.to_crs(schools.estimate_utm_crs())[
"geometry"
].centroid.to_crs(schools.crs)
get_isochrone()
# function to get isochrones
def get_isochrone(
lon, lat, walk_times=[15, 30], speed=4.5, name=None, point_index=None
):
loc = (lat, lon)
G = ox.graph_from_point(loc, simplify=True, network_type="walk")
gdf_nodes = ox.graph_to_gdfs(G, edges=False)
center_node = ox.distance.nearest_nodes(G, lon, lat)
meters_per_minute = speed * 1000 / 60 # km per hour to m per minute
for u, v, k, data in G.edges(data=True, keys=True):
data["time"] = data["length"] / meters_per_minute
polys = []
for walk_time in walk_times:
subgraph = nx.ego_graph(G, center_node, radius=walk_time, distance="time")
node_points = [
Point(data["x"], data["y"]) for node, data in subgraph.nodes(data=True)
]
polys.append(gpd.GeoSeries(node_points).unary_union.convex_hull)
info = {}
if name:
info["name"] = [name for t in walk_times]
if point_index:
info["point_index"] = [point_index for t in walk_times]
return {**{"geometry": polys, "time": walk_times}, **info}
integration
WT = [5, 10, 15]
SCHOOLS = 5
# build geopandas data frame of isochrone polygons for each school
isochrones = pd.concat(
[
gpd.GeoDataFrame(
get_isochrone(
r["geometry"].x,
r["geometry"].y,
name=r["name"],
point_index=i,
walk_times=WT,
),
crs=schools.crs,
)
for i, r in schools.head(SCHOOLS).iterrows()
]
)
visualise
warnings.filterwarnings("ignore")
gdf = isochrones.set_index(["time", "point_index"]).copy()
# remove shorter walk time from longer walk time polygon to make folium work better
for idx in range(len(WT)-1,0,-1):
gdf.loc[WT[idx], "geometry"] = (
gdf.loc[WT[idx]]
.apply(
lambda r: r["geometry"].symmetric_difference(
gdf.loc[(WT[idx-1], r.name), "geometry"]
),
axis=1,
)
.values
)
m = gdf.reset_index().explore(column="time", height=300, width=500, scheme="boxplot")
schools.head(SCHOOLS).explore(m=m, marker_kwds={"radius": 3, "color": "red"})
merge overlapping polygons
using this technique https://gis.stackexchange.com/questions/334459/how-to-dissolve-overlapping-polygons-using-geopandas
have visualised in different way
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
# merge overlapping polygons
# https://gis.stackexchange.com/questions/334459/how-to-dissolve-overlapping-polygons-using-geopandas
mergedpolys = gpd.GeoDataFrame(
geometry=isochrones.groupby("time")["geometry"]
.agg(lambda g: g.unary_union)
.apply(lambda g: [g] if isinstance(g, shapely.geometry.Polygon) else g.geoms)
.explode(),
crs=isochrones.crs,
)
# visualize merged polygons
m = None
for i, wt in enumerate(WT[::-1]):
m = mergedpolys.loc[[wt]].explore(
m=m,
color=colors.to_hex(cm.get_cmap("tab20b", len(WT))(i)),
name=wt,
height=300,
width=500,
)
m = schools.head(SCHOOLS).explore(
m=m, marker_kwds={"radius": 3, "color": "red"}, name="schools"
)
folium.LayerControl().add_to(m)
m
im trying to replicate Steel8's answer, but when I run that code I get errors. I think these errors are due to the fact that I am using a different version of osmnx. Could you tell me which version of each package are you using?
import osmnx as ox
import pandas as pd
import warnings
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=PendingDeprecationWarning)
ox.config(use_cache=True, log_console=False)
# get some cities
cities = ["Hereford"] # ["Hereford", "Worcester", "Gloucester"]
cities = ox.geocode_to_gdf([{"city": c, "country": "UK"} for c in cities])
# get some schools
tags = {"amenity": "school"}
schools = pd.concat(
[
ox.geometries.geometries_from_polygon(r["geometry"], tags)
for i, r in cities.iterrows()
]
)
schools = (
schools.loc["way"].dropna(axis=1, thresh=len(schools) / 4).drop(columns=["nodes"])
)
# change polygon to point
schools["geometry"] = schools.to_crs(schools.estimate_utm_crs())[
"geometry"
].centroid.to_crs(schools.crs)
# function to get isochrones
def get_isochrone(
lon, lat, walk_times=[15, 30], speed=4.5, name=None, point_index=None
):
loc = (lat, lon)
G = ox.graph_from_point(loc, simplify=True, network_type="walk")
gdf_nodes = ox.graph_to_gdfs(G, edges=False)
center_node = ox.distance.nearest_nodes(G, lon, lat)
meters_per_minute = speed * 1000 / 60 # km per hour to m per minute
for u, v, k, data in G.edges(data=True, keys=True):
data["time"] = data["length"] / meters_per_minute
polys = []
for walk_time in walk_times:
subgraph = nx.ego_graph(G, center_node, radius=walk_time, distance="time")
node_points = [
Point(data["x"], data["y"]) for node, data in subgraph.nodes(data=True)
]
polys.append(gpd.GeoSeries(node_points).unary_union.convex_hull)
info = {}
if name:
info["name"] = [name for t in walk_times]
if point_index:
info["point_index"] = [point_index for t in walk_times]
return {**{"geometry": polys, "time": walk_times}, **info}
WT = [5, 10, 15]
SCHOOLS = 5
# build geopandas data frame of isochrone polygons for each school
isochrones = pd.concat(
[
gpd.GeoDataFrame(
get_isochrone(
r["geometry"].x,
r["geometry"].y,
name=r["name"],
point_index=i,
walk_times=WT,
),
crs=schools.crs,
)
for i, r in schools.head(SCHOOLS).iterrows()
]
)
warnings.filterwarnings("ignore")
gdf = isochrones.set_index(["time", "point_index"]).copy()
# remove shorter walk time from longer walk time polygon to make folium work better
for idx in range(len(WT)-1,0,-1):
gdf.loc[WT[idx], "geometry"] = (
gdf.loc[WT[idx]]
.apply(
lambda r: r["geometry"].symmetric_difference(
gdf.loc[(WT[idx-1], r.name), "geometry"]
),
axis=1,
)
.values
)
m = gdf.reset_index().explore(column="time", height=300, width=500, scheme="boxplot")
schools.head(SCHOOLS).explore(m=m, marker_kwds={"radius": 3, "color": "red"})
I'm gettin the following error message:
PS C:\Users\IGPOZO\Desktop\INAKI\Isochrones\Python> python test4.py
Traceback (most recent call last):
File "C:\Users\IGPOZO\Anaconda3\envs\isochrones_env\lib\site-packages\pandas\core\indexes\base.py", line 3621, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 144, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index_class_helper.pxi", line 41, in pandas._libs.index.Int64Engine._check_type
KeyError: 'way'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\IGPOZO\Desktop\INAKI\Isochrones\Python\test4.py", line 24, in <module>
schools.loc["way"].dropna(axis=1, thresh=len(schools) / 4).drop(columns=["nodes"])
File "C:\Users\IGPOZO\Anaconda3\envs\isochrones_env\lib\site-packages\pandas\core\indexing.py", line 967, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "C:\Users\IGPOZO\Anaconda3\envs\isochrones_env\lib\site-packages\pandas\core\indexing.py", line 1202, in _getitem_axis
return self._get_label(key, axis=axis)
File "C:\Users\IGPOZO\Anaconda3\envs\isochrones_env\lib\site-packages\pandas\core\indexing.py", line 1153, in _get_label
return self.obj.xs(label, axis=axis)
File "C:\Users\IGPOZO\Anaconda3\envs\isochrones_env\lib\site-packages\pandas\core\generic.py", line 3876, in xs
loc = index.get_loc(key)
File "C:\Users\IGPOZO\Anaconda3\envs\isochrones_env\lib\site-packages\pandas\core\indexes\base.py", line 3623, in get_loc
raise KeyError(key) from err
KeyError: 'way'

How do I solve "ValueError: Found no graph nodes within the requested polygon" in OSMnx

I'm working on isochrones building them for some schools just to show accessibility. Here is my code:
# importing the required libraries
import osmnx as ox
import pandas as pd
import warnings
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point, Polygon
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=PendingDeprecationWarning)
ox.config(use_cache=True, log_console=False)
# reading the data
sec_data = pd.read_csv('H:/WORK/Upwork/Project 7 - Python School Data Analysis/Analysis By Country/Tanzania/CSV Files/secondary.csv')
sec_schools = gpd.GeoDataFrame(
sec_data, geometry=gpd.points_from_xy(sec_data.longitude, sec_data.latitude)
)
sec_schools.crs = "EPSG:4326"
# we'll test it using the first 20 records
sec_schools = sec_schools.head(20)
# function to get isochrones
def get_isochrone(lon, lat, walk_times=[15, 30], speed=4.5, name=None, point_index=None):
loc = (lat, lon)
G = ox.graph_from_point(loc, simplify=True, network_type="walk")
gdf_nodes = ox.graph_to_gdfs(G, edges=False)
center_node = ox.distance.nearest_nodes(G, lon, lat)
meters_per_minute = speed * 1000 / 60 # km per hour to m per minute
for u, v, k, data in G.edges(data=True, keys=True):
data["time"] = data["length"] / meters_per_minute
polys = []
for walk_time in walk_times:
subgraph = nx.ego_graph(G, center_node, radius=walk_time, distance="time")
node_points = [
Point(data["x"], data["y"]) for node, data in subgraph.nodes(data=True)
]
polys.append(gpd.GeoSeries(node_points).unary_union.convex_hull)
info = {}
if name:
info["name"] = [name for t in walk_times]
if point_index:
info["point_index"] = [point_index for t in walk_times]
return {**{"geometry": polys, "time": walk_times}, **info}
# walk time list of minutes
WT = [30, 45, 60]
# build geopandas data frame of isochrone polygons for each school
isochrones = pd.concat(
[
gpd.GeoDataFrame(
get_isochrone(
r["geometry"].x,
r["geometry"].y,
name=r["school name"],
point_index=i,
walk_times=WT,
),
crs=sec_schools.crs,
)
for i, r in sec_schools.iterrows()
]
)
# merge overlapping polygons
# https://gis.stackexchange.com/questions/334459/how-to-dissolve-overlapping-polygons-using-geopandas
mergedpolys = gpd.GeoDataFrame(
geometry=isochrones.groupby("time")["geometry"]
.agg(lambda g: g.unary_union)
.apply(lambda g: [g] if isinstance(g, Polygon) else g.geoms)
.explode(),
crs=isochrones.crs,
)
# visualize merged polygons
m = None
for i, wt in enumerate(WT[::-1]):
m = mergedpolys.loc[[wt]].explore(
m=m,
color=colors.to_hex(cm.get_cmap("tab20b", len(WT))(i)),
name=wt,
height=300,
width=500,
)
m = sec_schools.head(SCHOOLS).explore(
m=m, marker_kwds={"radius": 3, "color": "red"}, name="schools"
)
folium.LayerControl().add_to(m)
When I run the above code, I get an error, "ValueError: Found no graph nodes within the requested polygon". I have a strong reason to believe that this error could be dependent on the place. How can I go about this? I will appreciate any help. I was thinking of try...catch to catch the error, but I don't know where to place in the code, or any other solution I need to do. Here is the GitHub Link to the first 20 schools. Please note that these data is available to the public from their resource page.
In addition also, how would I make travel times over 30 minutes to plot on the map? Like in the above code, there are three travel times, but only polygons for 30 mins travel time are returned, while the rest are not. If there is a way to go past this I will truly appreciate the help.
I see three sub-questions
error "ValueError: Found no graph nodes within the requested polygon".
no such error when using 20 sample points you have provided. Make sure you are using fully upto date omnx
also reference: OSMNX graph from point and geometry information
performance
it is expected that ox.graph_from_point() takes time. This is where time is taken. Having implemented caching using a pickle file so same results are not continuously requested
missing map layers
there are no missing map layers
ox.graph_from_point() has a dist parameter. This defaults to 1000m which is not sufficient for walking for 60 minutes at 4.5km/h. Have amended to define a distance that will be sufficient
import osmnx as ox
import pandas as pd
import warnings
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point, Polygon
import shapely
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
from pathlib import Path
# getting isochrones is expensive, cache
f = Path.cwd().joinpath("pkl_cache")
if not f.is_dir():
f.mkdir()
f = f.joinpath("isochrones.pkl")
if f.exists():
isochrones = pd.read_pickle(f)
else:
isochrones = gpd.GeoDataFrame(columns=["point_index", "name"])
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=PendingDeprecationWarning)
ox.config(use_cache=True, log_console=False)
# reading the data
# sec_data = pd.read_csv('H:/WORK/Upwork/Project 7 - Python School Data Analysis/Analysis By Country/Tanzania/CSV Files/secondary.csv')
# sec_schools = gpd.GeoDataFrame(
# sec_data, geometry=gpd.points_from_xy(sec_data.longitude, sec_data.latitude)
# )
sec_schools = gpd.GeoDataFrame(
pd.read_csv(
"https://raw.githubusercontent.com/Evanskip31/isochrones-polygons/master/first_20_secondary_schools.csv",
index_col=0,
).assign(geometry=lambda d: d["geometry"].apply(shapely.wkt.loads)),
crs="epsg:4326",
)
sec_schools.crs = "EPSG:4326"
# we'll test it using the first few records
SCHOOLS = 20
sec_schools = sec_schools.head(SCHOOLS)
# function to get isochrones
def get_isochrone(
lon, lat, walk_times=[15, 30], speed=4.5, name=None, point_index=None
):
loc = (lat, lon)
# take distance walked into account...
G = ox.graph_from_point(
loc,
simplify=True,
network_type="walk",
dist=(max(walk_times) / 60) * (speed + 1) * 1000,
)
gdf_nodes = ox.graph_to_gdfs(G, edges=False)
center_node = ox.distance.nearest_nodes(G, lon, lat)
meters_per_minute = speed * 1000 / 60 # km per hour to m per minute
for u, v, k, data in G.edges(data=True, keys=True):
data["time"] = data["length"] / meters_per_minute
polys = []
for walk_time in walk_times:
subgraph = nx.ego_graph(G, center_node, radius=walk_time, distance="time")
node_points = [
Point(data["x"], data["y"]) for node, data in subgraph.nodes(data=True)
]
polys.append(gpd.GeoSeries(node_points).unary_union.convex_hull)
info = {}
if name:
info["name"] = [name for t in walk_times]
if point_index is not None:
info["point_index"] = [point_index for t in walk_times]
return {**{"geometry": polys, "time": walk_times}, **info}
# walk time list of minutes
WT = [30, 45, 60]
# build geopandas data frame of isochrone polygons for each school
isochrones = pd.concat(
[isochrones]
+ [
gpd.GeoDataFrame(
get_isochrone(
r["geometry"].x,
r["geometry"].y,
name=r["school name"],
point_index=i,
walk_times=WT,
),
crs=sec_schools.crs,
)
for i, r in sec_schools.loc[
~sec_schools.index.isin(isochrones["point_index"].tolist())
].iterrows()
]
)
# save to act as a cache
isochrones.to_pickle(f)
# merge overlapping polygons
# https://gis.stackexchange.com/questions/334459/how-to-dissolve-overlapping-polygons-using-geopandas
mergedpolys = gpd.GeoDataFrame(
geometry=isochrones.groupby("time")["geometry"]
.agg(lambda g: g.unary_union)
.apply(lambda g: [g] if isinstance(g, Polygon) else g.geoms)
.explode(),
crs=isochrones.crs,
)
# visualize merged polygons
m = None
for i, wt in enumerate(WT[::-1]):
m = mergedpolys.loc[[wt]].explore(
m=m,
color=colors.to_hex(cm.get_cmap("tab20b", len(WT))(i)),
name=wt,
height=300,
width=500,
)
m = sec_schools.head(SCHOOLS).explore(
m=m, marker_kwds={"radius": 3, "color": "red"}, name="schools"
)
folium.LayerControl().add_to(m)
m

Pandas to_csv hands on for data analysis

Question:
Create a series named heights_A with values 176.2, 158.4, 167.6, 156.2, and 161.4. These values represent heights of 5 students of class A.
Label each student as s1, s2, s3, s4, and s5.
Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, and 78.9. These values represent weights of 5 students of class A.
Label each student as s1, s2, s3, s4, and s5.
Create a dataframe named df_A, which contains the height and weight of five students namely s1, s2, s3, s4 and s5.
Label the columns as Student_height and Student_weight, respectively.
Write the contents of df_A to a CSV file named classA.csv.
Note: Use the to_csv method associated with a dataframe.
Verify if the file classA.csv exists in the present directory using command ls -l.
You can also view the contents of the file using the command cat classA.csv
My code:
import pandas as pd
heights_A = pd.Series([176.2, 158.4, 167.6, 156.2,161.4])
heights_A.index = ["S1","S2","S3","S4","S5"]
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4, 78.9])
weights_A.index = ["S1","S2","S3","S4","S5"]
df_A = pd.DataFrame({'Student_height': heights_A,'Student_weight':weights_A}, index = weights_A.index)
df_A.to_csv("classA.csv")
while checking with ls -l and cat classA.csv I can see the expected contents yet the checker does not allow me to proceed. Not sure where I am wrong
Use small letters for s1,s2...
import pandas as pd
heights_A = pd.Series([176.2, 158.4, 167.6, 156.2,161.4])
heights_A.index = ["s1","s2","s3","s4","s5"]
print(heights_A[1])
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4, 78.9])
weights_A.index = ["s1","s2","s3","s4","s5"]
df_A = pd.DataFrame({'Student_height': heights_A,'Student_weight':weights_A}, index = weights_A.index)
df_A.to_csv("classA.csv")
import os
import numpy as np
import pandas as pd
# Creating the Series
heights_A = pd.Series([ 176.2, 158.4, 167.6, 156.2, 161.4 ])
# Creating the row axis labels
heights_A.index = ['s1', 's2', 's3', 's4','s5']
# Creating the Series
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])
# Creating the row axis labels
weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
# Display the shape of dataframe df_A
df_A.shape
df_A = pd.DataFrame({'Student_height': heights_A,'Student_weight':weights_A}, index = weights_A.index)
df_A.to_csv("classA.csv")
os.system("cat classA.csv")
df_A2=pd.read_csv("classA.csv")
print(df_A2)
df_A3=pd.read_csv("classA.csv", index_col=0)
print(df_A3)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index = weights_B.index)
df_B.to_csv("classB.csv",index=False)
os.system("cat classB.csv")
df_B2=pd.read_csv("classB.csv")
print(df_B2)
df_B3=pd.read_csv("classB.csv", header=None)
print(df_B3)
df_B4=pd.read_csv("classB.csv", header=None, skiprows=2)
print(df_B4)