Find neighboring nodes in graph - networkx

I've the following graph:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
nx.draw(g,with_labels=True)
plt.show()
g.nodes(data=True)
g.edges(data=True)
I need to create a function myfunction(g, node_list) that returns a subgraph whose nodes have weight < 50.
For example, if I run myfunction(g, [131, 201]), the output should be:
EdgeDataView([(131, 303, {'weight': 20}), (201, 232, {'weight': 10})])

A way to do that is by looping through all the nodes in your list and finding their neighbors with the nx.neighbors function from networkx. You can then set up an if condition to check the weight of the edge between the node of interest and its neighbors. If the condition satisfies your constraint, you can add the neighbor, the edge, and the weight to your subgraph.
See code below:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
fig=plt.figure(figsize=(10,10))
#Plot full graph
plt.subplot(211)
plt.title('Full graph')
labels_g = nx.get_edge_attributes(g,'weight')
pos_g=nx.circular_layout(g)
nx.draw_networkx_edge_labels(g,pos_g,edge_labels=labels_g)
nx.draw(g,pos=pos_g,with_labels=True)
def check_neighbor_weights(g,nodes):
subg=nx.Graph() #Create subgraph
for n in nodes:
subg.add_node(n)
neighbors=g.neighbors(n) #Find all neighbors of node n
for neighs in neighbors:
if g[n][neighs]['weight']<50: #Check if the weigh t is below 50
subg.add_edge(n,neighs,weight=g[n][neighs]['weight'])
return subg
subg=check_neighbor_weights(g,[131,201]) #Returns subgraph of interest
plt.subplot(212)
plt.title('subgraph')
labels_subg = nx.get_edge_attributes(subg,'weight')
pos_subg=nx.circular_layout(subg)
nx.draw_networkx_edge_labels(subg,pos=pos_subg,edge_labels=labels_subg)
nx.draw(subg,pos=pos_subg,with_labels=True)
plt.show()
And the output gives:

Related

How to define a networkx subgraph only based on out_edges?

I would like to get a subgraph around a specific node of a a directed graph based on the out_edges or in_edges only.
# This does not work
H_tmp = nx.ego_graph(G, node_name, 2)
H_tmp.out_edges = []
H = nx.ego_graph(H_tmp, node_name, 2)
I tried using nx.ego_graph twice, but I don't know an efficient way to remove all the out_edges or in_edges. Is there a way to tell ego_graph to use only a specific set of edges?
Using the eco_graph function on an undirected graph extracts the out successors of the node. According to the doc, if you only want the predecessors (in_edges), you can apply the eco_graph function on the reverse of your graph. You'll then need to reverse your subgraph. If you want both successors and predecessors, you can specify undirected=True. See example summarizing this below:
import networkx as nx
import matplotlib.pyplot as plt
H = nx.fast_gnp_random_graph(5, 0.3,directed=True)
plt.figure(figsize=(15,10))
plt.subplot(141)
plt.title('Full graph')
nx.draw(H,with_labels=True)
plt.subplot(142)
plt.title('All neighbors around node 2')
H_all=nx.ego_graph(H, 2, 1,undirected=True)
nx.draw(H_all,with_labels=True)#plot
plt.subplot(143)
plt.title('Out subgraph around node 2')
H_out=nx.ego_graph(H, 2, 1)
nx.draw(H_out,with_labels=True) #plot
plt.subplot(144)
plt.title('In subgraph around node 2')
H_in=nx.ego_graph(H.reverse(), 2, 1) #apply eco_graph to reverse graph to get in_edges
H_in_r=H_in.reverse() #reverse subgraph
nx.draw(H_in_r,with_labels=True) #plot

Networkx edge induced subgraph isomorphism

Consider the following snippet:
import networkx as nx
from networkx.algorithms import isomorphism
import matplotlib.pyplot as plt
subg = nx.Graph()
subg.add_nodes_from([0]+[i+1 for i in range(6)])
subg.add_edges_from([(0, i) for i in range(1,7)])
bigg = nx.Graph()
bigg.add_nodes_from([0]+[i+1 for i in range(6)])
bigg.add_edges_from([(0, i) for i in range(1,7)]+[(i,i+1) for i in range(1,6)]+[(1,6)])
nx.draw(subg, with_labels=True)
plt.show()
nx.draw(bigg, with_labels=True)
plt.show()
matcher = isomorphism.GraphMatcher(bigg, subg)
print([x for x in matcher.subgraph_isomorphisms_iter()])
This returns no subgraph isomorphisms due to the fact that there exists no edge (1,6) in subg for example. How does one get networkx to not care about this? I would like to get the mapping of nodes from subg to bigg which respect all the relations defined in subg without respect to whether or not additional relations exist between the nodes in bigg.

Recovering the node with maximum in-degree and retrieving its longitude and latitude

I am new to osmnx, openstreatmap and nominatim. I am trying to retrieve a node with the highest/maximum in-degree using in_degree_centrality networkx function.However, only degree_centrality and betweenness_centrality are present in the extended_stats dicts. How do I locate this node?. Do I have to convert the place network to a directed graph before I could use the in_degree_centrality function in Networkx or osmnx does it directly?
import osmnx as ox, networkx as nx, matplotlib.cm as cm, pandas as pd, numpy as np
place = 'City of Lethbridge, Alberta, Canada'
gdf = ox.gdf_from_place(place)
area = ox.project_gdf(gdf).unary_union.area
G = ox.graph_from_place(place, network_type='drive_service')
# calculate basic and extended network stats, merge them together, and display
stats = ox.basic_stats(G, area=area)
extended_stats = ox.extended_stats(G, ecc=True, bc=True, cc=True)
for key, value in extended_stats.items():
stats[key] = value
pd.Series(stats)
# unpack dicts into individiual keys:values
stats = ox.basic_stats(G, area=area)
for k, count in stats['streets_per_node_counts'].items():
stats['int_{}_count'.format(k)] = count
for k, proportion in stats['streets_per_node_proportion'].items():
stats['int_{}_prop'.format(k)] = proportion
# delete the no longer needed dict elements
del stats['streets_per_node_counts']
del stats['streets_per_node_proportion']
# load as a pandas dataframe
pd.DataFrame(pd.Series(stats)).T
G_projected = ox.project_graph(G)
max_node, max_bc = max(extended_stats['betweenness_centrality'].items(), key=lambda x: x[1])
print(max_node, max_bc)
nc = ['r' if node==max_node else '#336699' for node in G_projected.nodes()]
ns = [50 if node==max_node else 8 for node in G_projected.nodes()]
fig, ax = ox.plot_graph(G_projected, node_size=ns, node_color=nc, node_zorder=2)
G_projected = ox.project_graph(G)
in_degree= in_degree_centrality(G_projected) # computing the in_degree
max_node_deg, max_in_deg= max(in_degree['in_degree_centrality'])
Node with maximum in-degree with its NodeId and longitude and latitude.
Per the OSMnx documentation, in-degree centrality is not included in its built-in network stats calculation. However, as OSMnx produces a NetworkX MultiDiGraph object, you can call NetworkX's implementation to calculate it.
To retrieve the lat-long, you just identify the node with the highest in-degree centrality, then inspect that nodes attributes (including lat-long) in the graph object.

how to compute accuracy of AgglomerativeClustering

hi i use the sample in python of AgglomerativeClustering i try to estimate the performance but it switches the original labels
i try to compare the predicted labels y_hc and the original label y return by make blobs
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt
data,y = make_blobs(n_samples=300, n_features=2, centers=4, cluster_std=2, random_state=50)
plt.figure(2)
# create dendrogram
dendrogram = sch.dendrogram(sch.linkage(data, method='ward'))
plt.title('dendrogram')
# create clusters linkage="average", affinity=metric , linkage = 'ward' affinity = 'euclidean'
hc = AgglomerativeClustering(n_clusters=4, linkage="average", affinity='euclidean')
# save clusters for chart
y_hc = hc.fit_predict(data,y)
plt.figure(3)
# create scatter plot
plt.scatter(data[y==0,0], data[y==0,1], c='red', s=50)
plt.scatter(data[y==1, 0], data[y==1, 1], c='black', s=50)
plt.scatter(data[y==2, 0], data[y==2, 1], c='blue', s=50)
plt.scatter(data[y==3, 0], data[y==3, 1], c='cyan', s=50)
plt.xlim(-15,15)
plt.ylim(-15,15)
plt.scatter(data[y_hc ==0,0], data[y_hc == 0,1], s=10, c='red')
plt.scatter(data[y_hc==1,0], data[y_hc == 1,1], s=10, c='black')
plt.scatter(data[y_hc ==2,0], data[y_hc == 2,1], s=10, c='blue')
plt.scatter(data[y_hc ==3,0], data[y_hc == 3,1], s=10, c='cyan')
for ii in range(4):
print(ii)
i0=y_hc==ii
counts = np.bincount(y[i0])
valCountAtorgLbl = (np.argmax(counts))
accuracy0Tp=100*np.max(counts)/y[y==valCountAtorgLbl].shape[0]
accuracy0Fp = 100 * np.min(counts) / y[y ==valCountAtorgLbl].shape[0]
print([accuracy0Tp,accuracy0Fp])
plt.show()
The clustering does and cannot reproduce the original labels, only the original partitions.
You seem to assume that cluster 1 corresponds to label 1 (in faftz one could be labeled 'iris setosa', and there obviously is no way an unsupervised algorithm will come up with this cluster name...). It usually won't - there probably isn't the same number of clusters and classes there either, and there could be unlabeled noise piintsl You can use the Hungarian algorithm to compute the optimum mapping (or just a greedy matching) to produce a more intuitive color mapping.

keep the scaling while drawing a weighed networkx

when I draw a weighed networkx, it does not really represented the real weight in terms of distance. I was curious if there is any parameters that I am missing or some other problem.
so, I started by making a simulated dataset as following
from pylab import plot,show
from numpy import vstack,array
from numpy.random import rand
from scipy.cluster.vq import kmeans,vq
from scipy.spatial.distance import euclidean
import networkx as nx
from scipy.spatial.distance import pdist, squareform, cdist
# data generation
data = vstack((rand(5,2) + array([12,12]),rand(5,2)))
a = pdist(data, 'euclidean')
def givexy(index1D, VectorLength):
return [index1D%VectorLength, index1D/VectorLength]
import matplotlib.pyplot as plt
plt.plot(data[:,0], data[:,1], 'o')
plt.show()
then, I calculate the euclidean distance among all pairs and use the distance as weight
G = nx.empty_graph(1)
for cnt, item in enumerate(a):
print cnt
G.add_edge(givexy(cnt, 10)[0], givexy(cnt, 10)[1], weight=item, length=0)
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos)
edge_labels=dict([((u,v,),"%.2f" % d['weight'])
for u,v,d in G.edges(data=True)])
nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels)
#~ nx.draw(G,pos,edge_labels=edge_labels)
plt.show()
exit()
you might a get a different plot - because of unknown reason it is random. my main problem is the distance of nodes. for example the distance between node 4 to 8 is 0.82 but it looks longer than the distance of node 7 and 0.
any hint ?
thank you,
The spring layout doesn't explicitly use the weights as distances. Higher weight edges produce shorter edges in general.
Though if you want to specify the positions explicitly you can do that:
from numpy import vstack,array
from numpy.random import rand
from scipy.spatial.distance import euclidean, pdist
import networkx as nx
import matplotlib.pyplot as plt
# data generation
data = vstack((rand(5,2) + array([12,12]),rand(5,2)))
a = pdist(data, 'euclidean')
def givexy(index1D, VectorLength):
return [index1D%VectorLength, index1D/VectorLength]
plt.plot(data[:,0], data[:,1], 'o')
G = nx.Graph()
for cnt, item in enumerate(a):
print cnt
G.add_edge(givexy(cnt, 10)[0], givexy(cnt, 10)[1], weight=item, length=0)
pos={}
for node,row in enumerate(data):
pos[node]=row
nx.draw_networkx(G, pos)
plt.savefig('drawing.png')