Networkx edge induced subgraph isomorphism - networkx

Consider the following snippet:
import networkx as nx
from networkx.algorithms import isomorphism
import matplotlib.pyplot as plt
subg = nx.Graph()
subg.add_nodes_from([0]+[i+1 for i in range(6)])
subg.add_edges_from([(0, i) for i in range(1,7)])
bigg = nx.Graph()
bigg.add_nodes_from([0]+[i+1 for i in range(6)])
bigg.add_edges_from([(0, i) for i in range(1,7)]+[(i,i+1) for i in range(1,6)]+[(1,6)])
nx.draw(subg, with_labels=True)
plt.show()
nx.draw(bigg, with_labels=True)
plt.show()
matcher = isomorphism.GraphMatcher(bigg, subg)
print([x for x in matcher.subgraph_isomorphisms_iter()])
This returns no subgraph isomorphisms due to the fact that there exists no edge (1,6) in subg for example. How does one get networkx to not care about this? I would like to get the mapping of nodes from subg to bigg which respect all the relations defined in subg without respect to whether or not additional relations exist between the nodes in bigg.

Related

How to define a networkx subgraph only based on out_edges?

I would like to get a subgraph around a specific node of a a directed graph based on the out_edges or in_edges only.
# This does not work
H_tmp = nx.ego_graph(G, node_name, 2)
H_tmp.out_edges = []
H = nx.ego_graph(H_tmp, node_name, 2)
I tried using nx.ego_graph twice, but I don't know an efficient way to remove all the out_edges or in_edges. Is there a way to tell ego_graph to use only a specific set of edges?
Using the eco_graph function on an undirected graph extracts the out successors of the node. According to the doc, if you only want the predecessors (in_edges), you can apply the eco_graph function on the reverse of your graph. You'll then need to reverse your subgraph. If you want both successors and predecessors, you can specify undirected=True. See example summarizing this below:
import networkx as nx
import matplotlib.pyplot as plt
H = nx.fast_gnp_random_graph(5, 0.3,directed=True)
plt.figure(figsize=(15,10))
plt.subplot(141)
plt.title('Full graph')
nx.draw(H,with_labels=True)
plt.subplot(142)
plt.title('All neighbors around node 2')
H_all=nx.ego_graph(H, 2, 1,undirected=True)
nx.draw(H_all,with_labels=True)#plot
plt.subplot(143)
plt.title('Out subgraph around node 2')
H_out=nx.ego_graph(H, 2, 1)
nx.draw(H_out,with_labels=True) #plot
plt.subplot(144)
plt.title('In subgraph around node 2')
H_in=nx.ego_graph(H.reverse(), 2, 1) #apply eco_graph to reverse graph to get in_edges
H_in_r=H_in.reverse() #reverse subgraph
nx.draw(H_in_r,with_labels=True) #plot

PyTorch Geometric directed graph shows wrong number of edges when converted to NetworkX undirected format

I loaded the PyTorch Geometric dataset OGB_MAG, converted it into a homogeneous dataset, and then checked its number of nodes as follows:
import torch
import numpy as np
from torch_geometric.datasets import OGB_MAG
import os.path as osp
import networkx as nx
from torch_geometric.utils import to_networkx, to_undirected
path = osp.join('..', 'data', 'OGB_MAG')
dataset = OGB_MAG(path)
data = dataset[0]
homogeneous_data = data.to_homogeneous()
print(homogeneous_data)
--> Data(node_type=[1939743], edge_index=[2, 21111007], edge_type=[21111007])
print(homogeneous_data.has_isolated_nodes())
--> True
Then I converted it to NetworkX format and checked its number of edges:
nx_data = to_networkx(homogeneous_data)
print(nx_data.number_of_edges())
--> 21111007
nx_data = nx_data.to_undirected(reciprocal=False)
print(nx_data.number_of_nodes(), nx_data.number_of_edges())
--> (1939743, 21091072)
print(len(list(nx.isolates(nx_data))))
--> 0
The number of edges is different (21111007 with PyG vs 21091072 with NetworkX after converting to undirected). The number of nodes is the same though. We also see that PyG says there are isolated nodes, but NetworkX says there are none.
Any insight as to why I'm seeing this discrepancy?

Find neighboring nodes in graph

I've the following graph:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
nx.draw(g,with_labels=True)
plt.show()
g.nodes(data=True)
g.edges(data=True)
I need to create a function myfunction(g, node_list) that returns a subgraph whose nodes have weight < 50.
For example, if I run myfunction(g, [131, 201]), the output should be:
EdgeDataView([(131, 303, {'weight': 20}), (201, 232, {'weight': 10})])
A way to do that is by looping through all the nodes in your list and finding their neighbors with the nx.neighbors function from networkx. You can then set up an if condition to check the weight of the edge between the node of interest and its neighbors. If the condition satisfies your constraint, you can add the neighbor, the edge, and the weight to your subgraph.
See code below:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
fig=plt.figure(figsize=(10,10))
#Plot full graph
plt.subplot(211)
plt.title('Full graph')
labels_g = nx.get_edge_attributes(g,'weight')
pos_g=nx.circular_layout(g)
nx.draw_networkx_edge_labels(g,pos_g,edge_labels=labels_g)
nx.draw(g,pos=pos_g,with_labels=True)
def check_neighbor_weights(g,nodes):
subg=nx.Graph() #Create subgraph
for n in nodes:
subg.add_node(n)
neighbors=g.neighbors(n) #Find all neighbors of node n
for neighs in neighbors:
if g[n][neighs]['weight']<50: #Check if the weigh t is below 50
subg.add_edge(n,neighs,weight=g[n][neighs]['weight'])
return subg
subg=check_neighbor_weights(g,[131,201]) #Returns subgraph of interest
plt.subplot(212)
plt.title('subgraph')
labels_subg = nx.get_edge_attributes(subg,'weight')
pos_subg=nx.circular_layout(subg)
nx.draw_networkx_edge_labels(subg,pos=pos_subg,edge_labels=labels_subg)
nx.draw(subg,pos=pos_subg,with_labels=True)
plt.show()
And the output gives:

keep the scaling while drawing a weighed networkx

when I draw a weighed networkx, it does not really represented the real weight in terms of distance. I was curious if there is any parameters that I am missing or some other problem.
so, I started by making a simulated dataset as following
from pylab import plot,show
from numpy import vstack,array
from numpy.random import rand
from scipy.cluster.vq import kmeans,vq
from scipy.spatial.distance import euclidean
import networkx as nx
from scipy.spatial.distance import pdist, squareform, cdist
# data generation
data = vstack((rand(5,2) + array([12,12]),rand(5,2)))
a = pdist(data, 'euclidean')
def givexy(index1D, VectorLength):
return [index1D%VectorLength, index1D/VectorLength]
import matplotlib.pyplot as plt
plt.plot(data[:,0], data[:,1], 'o')
plt.show()
then, I calculate the euclidean distance among all pairs and use the distance as weight
G = nx.empty_graph(1)
for cnt, item in enumerate(a):
print cnt
G.add_edge(givexy(cnt, 10)[0], givexy(cnt, 10)[1], weight=item, length=0)
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos)
edge_labels=dict([((u,v,),"%.2f" % d['weight'])
for u,v,d in G.edges(data=True)])
nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels)
#~ nx.draw(G,pos,edge_labels=edge_labels)
plt.show()
exit()
you might a get a different plot - because of unknown reason it is random. my main problem is the distance of nodes. for example the distance between node 4 to 8 is 0.82 but it looks longer than the distance of node 7 and 0.
any hint ?
thank you,
The spring layout doesn't explicitly use the weights as distances. Higher weight edges produce shorter edges in general.
Though if you want to specify the positions explicitly you can do that:
from numpy import vstack,array
from numpy.random import rand
from scipy.spatial.distance import euclidean, pdist
import networkx as nx
import matplotlib.pyplot as plt
# data generation
data = vstack((rand(5,2) + array([12,12]),rand(5,2)))
a = pdist(data, 'euclidean')
def givexy(index1D, VectorLength):
return [index1D%VectorLength, index1D/VectorLength]
plt.plot(data[:,0], data[:,1], 'o')
G = nx.Graph()
for cnt, item in enumerate(a):
print cnt
G.add_edge(givexy(cnt, 10)[0], givexy(cnt, 10)[1], weight=item, length=0)
pos={}
for node,row in enumerate(data):
pos[node]=row
nx.draw_networkx(G, pos)
plt.savefig('drawing.png')

scipy - how to make a matrix with specific rows and columns

i have the following code:
import scipy as sc
import matplotlib.pyplot as plt
....
MeanSquareDistance1D=lambda n,m: ((m*Lastpoint1d(n)**2).sum())/m
......
data=[]
for i in range(10,110,20):
#mydata=list(sc.mat([[i],[MeanSquareDistance1D(i,2000)]]))
#data.append(mydata)
mydata=(sc.array([i,MeanSquareDistance1D(i,2000)])).tolist() I did it like this
data.append(mydata)
plt.plot(data)
plt.show()
I want the 'mydata' to be a matrix or array(preferable) (i am convering it to a list in order to do the plot) with i lines (5 lines) and 2 columns.
The first column should be the 'i' and the second the value of MeanSquareDistance1D(i,2000).
I am receiving the error 'ValueError: x and y can be no greater than 2-D'
import scipy as sc
import matplotlib.pyplot as plt
....
MeanSquareDistance1D=lambda n,m: ((m*Lastpoint1d(n)**2).sum())/m
......
data=[]
for i in range(10,110,20):
#mydata=list(sc.mat([[i],[MeanSquareDistance1D(i,2000)]]))
#data.append(mydata)
mydata=(sc.array([i,MeanSquareDistance1D(i,2000)])).tolist() I did it like this
data.append(mydata)
plt.plot(data)
plt.show()