How to define a networkx subgraph only based on out_edges? - networkx

I would like to get a subgraph around a specific node of a a directed graph based on the out_edges or in_edges only.
# This does not work
H_tmp = nx.ego_graph(G, node_name, 2)
H_tmp.out_edges = []
H = nx.ego_graph(H_tmp, node_name, 2)
I tried using nx.ego_graph twice, but I don't know an efficient way to remove all the out_edges or in_edges. Is there a way to tell ego_graph to use only a specific set of edges?

Using the eco_graph function on an undirected graph extracts the out successors of the node. According to the doc, if you only want the predecessors (in_edges), you can apply the eco_graph function on the reverse of your graph. You'll then need to reverse your subgraph. If you want both successors and predecessors, you can specify undirected=True. See example summarizing this below:
import networkx as nx
import matplotlib.pyplot as plt
H = nx.fast_gnp_random_graph(5, 0.3,directed=True)
plt.figure(figsize=(15,10))
plt.subplot(141)
plt.title('Full graph')
nx.draw(H,with_labels=True)
plt.subplot(142)
plt.title('All neighbors around node 2')
H_all=nx.ego_graph(H, 2, 1,undirected=True)
nx.draw(H_all,with_labels=True)#plot
plt.subplot(143)
plt.title('Out subgraph around node 2')
H_out=nx.ego_graph(H, 2, 1)
nx.draw(H_out,with_labels=True) #plot
plt.subplot(144)
plt.title('In subgraph around node 2')
H_in=nx.ego_graph(H.reverse(), 2, 1) #apply eco_graph to reverse graph to get in_edges
H_in_r=H_in.reverse() #reverse subgraph
nx.draw(H_in_r,with_labels=True) #plot

Related

How to generate a triangle free graph in Networkx (with randomseed)?

After checking the documentation on triangles of networkx, I've wondered if there is a more efficient way of generating a triangle free graph than to randomly spawn graphs until a triangle free one happens to emerge, (in particular if one would like to use a constant random seed).
Below is code that spawns graphs until they are triangle free, yet with varying random seeds. For a graph of 10 nodes it already takes roughly 20 seconds.
def create_triangle_free_graph(show_graphs):
seed = 42
nr_of_nodes = 10
probability_of_creating_an_edge = 0.85
nr_of_triangles = 1 # Initialise at 1 to initiate while loop.
while nr_of_triangles > 0:
graph = nx.fast_gnp_random_graph(
nr_of_nodes, probability_of_creating_an_edge
)
triangles = nx.triangles(G).values()
nr_of_triangles = sum(triangles) / 3
print(f"nr_of_triangles={nr_of_triangles}")
return graph
Hence, I would like to ask:
Are there faster ways to generate triangle free graphs (using random seeds) in networkx?
A triangle exists in a graph iff two vertices connected by an edge share one or more neighbours. A triangle-free graph can be expanded by adding edges between nodes that share no neighbours. The empty graph is triangle-free, so there is a straightforward algorithm to create triangle-free graphs.
#!/usr/bin/env python
"""
Create a triangle free graph.
"""
import random
import networkx as nx
from itertools import combinations
def triangle_free_graph(total_nodes):
"""Construct a triangle free graph."""
nodes = range(total_nodes)
g = nx.Graph()
g.add_nodes_from(nodes)
edge_candidates = list(combinations(nodes, 2))
random.shuffle(edge_candidates)
for (u, v) in edge_candidates:
if not set(n for n in g.neighbors(u)) & set(n for n in g.neighbors(v)):
g.add_edge(u, v)
return g
g = triangle_free_graph(10)
print(nx.triangles(g))
The number of edges in the resulting graph is highly dependent on the ordering of edge_candidates. To get a graph with the desired edge density, repeat the process until a graph with equal or higher density is found (and then remove superfluous edges), or until your patience runs out.
cutoff = 0.85
max_iterations = 1e+4
iteration = 0
while nx.density(g) < cutoff:
g = triangle_free_graph(10)
iteration += 1
if iteration == max_iterations:
import warnings
warnings.warn("Maximum number of iterations reached!")
break
# TODO: remove edges until the desired density is achieved

Find neighboring nodes in graph

I've the following graph:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
nx.draw(g,with_labels=True)
plt.show()
g.nodes(data=True)
g.edges(data=True)
I need to create a function myfunction(g, node_list) that returns a subgraph whose nodes have weight < 50.
For example, if I run myfunction(g, [131, 201]), the output should be:
EdgeDataView([(131, 303, {'weight': 20}), (201, 232, {'weight': 10})])
A way to do that is by looping through all the nodes in your list and finding their neighbors with the nx.neighbors function from networkx. You can then set up an if condition to check the weight of the edge between the node of interest and its neighbors. If the condition satisfies your constraint, you can add the neighbor, the edge, and the weight to your subgraph.
See code below:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
fig=plt.figure(figsize=(10,10))
#Plot full graph
plt.subplot(211)
plt.title('Full graph')
labels_g = nx.get_edge_attributes(g,'weight')
pos_g=nx.circular_layout(g)
nx.draw_networkx_edge_labels(g,pos_g,edge_labels=labels_g)
nx.draw(g,pos=pos_g,with_labels=True)
def check_neighbor_weights(g,nodes):
subg=nx.Graph() #Create subgraph
for n in nodes:
subg.add_node(n)
neighbors=g.neighbors(n) #Find all neighbors of node n
for neighs in neighbors:
if g[n][neighs]['weight']<50: #Check if the weigh t is below 50
subg.add_edge(n,neighs,weight=g[n][neighs]['weight'])
return subg
subg=check_neighbor_weights(g,[131,201]) #Returns subgraph of interest
plt.subplot(212)
plt.title('subgraph')
labels_subg = nx.get_edge_attributes(subg,'weight')
pos_subg=nx.circular_layout(subg)
nx.draw_networkx_edge_labels(subg,pos=pos_subg,edge_labels=labels_subg)
nx.draw(subg,pos=pos_subg,with_labels=True)
plt.show()
And the output gives:

How to create heatmap in Python using OSMnx and a list of values corresponding to the graph's nodes?

I've built a simulation model of pedestrians walking on a network using OSMnx and one of the simulation's outputs is a list "Visits" that is corresponding to the nodes in NodesList = list(Graph.nodes).
How can I create an heatmap using those lists and OSMnx?
For example:
NodesList[:5]
Output: [1214630921, 5513510924, 5513510925, 5513510926, 5243527186]
Visits[:5]
Output: [1139, 1143, 1175, 1200, 1226]
P.S.
the type of heatmap is not important (Nodes size, nodes color, etc.)
Since you specified the type of heatmap is not important, I have come up with the following solution.
import osmnx as ox
address_name='Melbourne'
#Import graph
G=ox.graph_from_address(address_name, distance=300)
#Make geodataframes from graph data
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)
import numpy as np
#Create a new column in the nodes geodataframe with number of visits
#I have filled it up with random integers
nodes['visits'] = np.random.randint(0,1000, size=len(nodes))
#Now make the same graph, but this time from the geodataframes
#This will help retain the 'visits' columns
G = ox.save_load.gdfs_to_graph(nodes, edges)
#Then plot a graph where node size and node color are related to the number of visits
nc = ox.plot.get_node_colors_by_attr(G,'visits',num_bins = 5)
ox.plot_graph(G,fig_height=8,fig_width=8,node_size=nodes['visits'], node_color=nc)

Recovering the node with maximum in-degree and retrieving its longitude and latitude

I am new to osmnx, openstreatmap and nominatim. I am trying to retrieve a node with the highest/maximum in-degree using in_degree_centrality networkx function.However, only degree_centrality and betweenness_centrality are present in the extended_stats dicts. How do I locate this node?. Do I have to convert the place network to a directed graph before I could use the in_degree_centrality function in Networkx or osmnx does it directly?
import osmnx as ox, networkx as nx, matplotlib.cm as cm, pandas as pd, numpy as np
place = 'City of Lethbridge, Alberta, Canada'
gdf = ox.gdf_from_place(place)
area = ox.project_gdf(gdf).unary_union.area
G = ox.graph_from_place(place, network_type='drive_service')
# calculate basic and extended network stats, merge them together, and display
stats = ox.basic_stats(G, area=area)
extended_stats = ox.extended_stats(G, ecc=True, bc=True, cc=True)
for key, value in extended_stats.items():
stats[key] = value
pd.Series(stats)
# unpack dicts into individiual keys:values
stats = ox.basic_stats(G, area=area)
for k, count in stats['streets_per_node_counts'].items():
stats['int_{}_count'.format(k)] = count
for k, proportion in stats['streets_per_node_proportion'].items():
stats['int_{}_prop'.format(k)] = proportion
# delete the no longer needed dict elements
del stats['streets_per_node_counts']
del stats['streets_per_node_proportion']
# load as a pandas dataframe
pd.DataFrame(pd.Series(stats)).T
G_projected = ox.project_graph(G)
max_node, max_bc = max(extended_stats['betweenness_centrality'].items(), key=lambda x: x[1])
print(max_node, max_bc)
nc = ['r' if node==max_node else '#336699' for node in G_projected.nodes()]
ns = [50 if node==max_node else 8 for node in G_projected.nodes()]
fig, ax = ox.plot_graph(G_projected, node_size=ns, node_color=nc, node_zorder=2)
G_projected = ox.project_graph(G)
in_degree= in_degree_centrality(G_projected) # computing the in_degree
max_node_deg, max_in_deg= max(in_degree['in_degree_centrality'])
Node with maximum in-degree with its NodeId and longitude and latitude.
Per the OSMnx documentation, in-degree centrality is not included in its built-in network stats calculation. However, as OSMnx produces a NetworkX MultiDiGraph object, you can call NetworkX's implementation to calculate it.
To retrieve the lat-long, you just identify the node with the highest in-degree centrality, then inspect that nodes attributes (including lat-long) in the graph object.

Distinguish classes with different colors Networkx

I have a huge dataset of 80,000 rows , I want to draw a meaningful graph in networkx using 2 dataframes (nodes and edges)
In "nodes", I have : actor1 , category_id(int :numerical value from 0 - 7 describe the type , and fatalities (float representing the number of injured or killed people))
In "edges" : "actor1", "actor2", "interaction: float 64"
my aim is to draw a graph with different colors according to category_id and different sizes based on number of fatalities
I started thid code which run perfectly until I tried to retrieve interaction and fatalities to calculate wights of nodes as follows
nodes = ACLED_to_graph[['actor1','category_id','fatalities']]
edges = ACLED_to_graph[['actor1','actor2','interaction']]
# Initiate the graph
G4 = nx.Graph()
for index, row in nodes.iterrows():
G4.add_node(row['actor1'], category_id=row['category_id'], nodesize=row['fatalities'])
for index, row in edges.iterrows():
G4.add_weighted_edges_from([(row['actor1'], row['actor2'], row['interaction'])])
#greater_than_ = [x for x in G.nodes(data=True) if x[2]['nodesize']>15]
# Sort nodes by degree
sorted(G4.degree, key=lambda x: x[1], reverse=True)
# remove anonymous nodes whose degree are <2 and <200
cond1 = [node for node,degree in G4.degree() if degree>=200]
cond2 = [node for node,degree in G4.degree() if degree<4]
remove = cond1+cond2
G4.remove_nodes_from(remove)
G4.remove_edges_from(remove)
# Customize the layout
pos=nx.spring_layout(G4, k=0.25, iterations=50)
# Define color map for classes
color_map = {0:'#f1f0c0',1:'#f09494', 2:'#eebcbc', 3:'#72bbd0', 4:'#91f0a1', 5:'#629fff', 6:'#bcc2f2',
7:'#eebcbc' }
plt.figure(figsize=(25,25))
options = {
'edge_color': '#FFDEA2',
'width': 1,
'with_labels': True,
'font_weight': 'regular',
}
colors = [color_map[G4.node[node]['category_id']] for node in G4.node]
#sizes = [G.node[node]['interaction'] for node in G]
"""
Using the spring layout :
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
#node_color=colors,
#node_size=sizes,
nx.draw(G4,node_color=colors, **options,cmap=plt.get_cmap('jet'))
ax = plt.gca()
ax.collections[0].set_edgecolor("#555555")
I am also removing some nodes with degrees greater than 200 and less than 3 to simplify the graph and make it more appealing.
I am getting the following error :
colors = [color_map[G4.node[node]['category_id']] for node in G4.node]
KeyError: 'category_id'
without the input data it is a bit hard to tell for sure, but it looks as if you are not constructing the graph nodes with a property 'category_id'. In the for index, row in nodes.iterrows(): you assign the data in the nodes dictionary, key 'category_id' to the property "group".
you can confirm this to be the case by checking what keys are set for an example node in your graph, e.g. print(G4.node['actor1 '].keys()).
To fix this, either
a) change the assignment
for index, row in nodes.iterrows():
G4.add_node(row['actor1'], category_id=row['category_id'], nodesize=row['interaction'])
or b) change the lookup
colors = [color_map[G4.node[node]['group']] for node in G4.node]
Solving mathematical operation using nodes attributes can be summarized as follows :
1 -After the subsetting the dataframe, we initialize the graph
nodes = ACLED_to_graph[['actor1','category_id','interaction']]
edges = ACLED_to_graph[['actor1','actor2','fatalities']]
# Initiate the graph
G8 = nx.Graph()
2- Add edges attributes first (I emphasize the use of from_pandas_edgelist)
for index, row in edges.iterrows():
G8 = nx.from_pandas_edgelist(edges, 'actor1', 'actor2', ['fatalities'])
3- Next we add nodes attributes using add_note, other techniques such as set_nodes_attributes didn't work in pandas
for index, row in nodes.iterrows():
G8.add_node(row['actor1'], category_id=row['category_id'], interaction=row['interaction'])
4- Sort nodes by degree to select the most connected nodes (I am choosing nodes with degrees more than 3 and less than 200)
sorted(G8.degree, key=lambda x: x[1], reverse=True)
# remove anonymous nodes whose degree are <2 and <200
cond1 = [node for node,degree in G8.degree() if degree>=200]
cond2 = [node for node,degree in G8.degree() if degree<3]
remove = cond1+cond2
G8.remove_nodes_from(remove)
G8.remove_edges_from(remove)
5- set the color based on degree (calling node.degree)
node_color = [G8.degree(v) for v in G8]
6- set the edge size based on fatalities
edge_width = [0.15*G8[u][v]['fatalities'] for u,v in G8.edges()]
7- set the node size based on interaction
node_size = [list(nx.get_node_attributes(G8, 'interaction').values()) for v in G8]
I used get.node_attribute instead pandas to access the features which allowed me to list the dictionary and convert it to a matrix of values , ready to compute.
8- Select the most important edges based on atalities
large_edges = [x for x in G8.edges(data=True) if x[2]['fatalities']>=3.0]
9- Finally, draw the network and edges seperately
nx.draw_networkx(G8, pos, node_size=node_size,node_color=node_color, alpha=0.7, with_labels=False, width=edge_width, edge_color='.4', cmap=plt.cm.Blues)
nx.draw_networkx_edges(G8, pos, edgelist=large_edges, edge_color='r', alpha=0.4, width=6)