when I draw a weighed networkx, it does not really represented the real weight in terms of distance. I was curious if there is any parameters that I am missing or some other problem.
so, I started by making a simulated dataset as following
from pylab import plot,show
from numpy import vstack,array
from numpy.random import rand
from scipy.cluster.vq import kmeans,vq
from scipy.spatial.distance import euclidean
import networkx as nx
from scipy.spatial.distance import pdist, squareform, cdist
# data generation
data = vstack((rand(5,2) + array([12,12]),rand(5,2)))
a = pdist(data, 'euclidean')
def givexy(index1D, VectorLength):
return [index1D%VectorLength, index1D/VectorLength]
import matplotlib.pyplot as plt
plt.plot(data[:,0], data[:,1], 'o')
plt.show()
then, I calculate the euclidean distance among all pairs and use the distance as weight
G = nx.empty_graph(1)
for cnt, item in enumerate(a):
print cnt
G.add_edge(givexy(cnt, 10)[0], givexy(cnt, 10)[1], weight=item, length=0)
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos)
edge_labels=dict([((u,v,),"%.2f" % d['weight'])
for u,v,d in G.edges(data=True)])
nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels)
#~ nx.draw(G,pos,edge_labels=edge_labels)
plt.show()
exit()
you might a get a different plot - because of unknown reason it is random. my main problem is the distance of nodes. for example the distance between node 4 to 8 is 0.82 but it looks longer than the distance of node 7 and 0.
any hint ?
thank you,
The spring layout doesn't explicitly use the weights as distances. Higher weight edges produce shorter edges in general.
Though if you want to specify the positions explicitly you can do that:
from numpy import vstack,array
from numpy.random import rand
from scipy.spatial.distance import euclidean, pdist
import networkx as nx
import matplotlib.pyplot as plt
# data generation
data = vstack((rand(5,2) + array([12,12]),rand(5,2)))
a = pdist(data, 'euclidean')
def givexy(index1D, VectorLength):
return [index1D%VectorLength, index1D/VectorLength]
plt.plot(data[:,0], data[:,1], 'o')
G = nx.Graph()
for cnt, item in enumerate(a):
print cnt
G.add_edge(givexy(cnt, 10)[0], givexy(cnt, 10)[1], weight=item, length=0)
pos={}
for node,row in enumerate(data):
pos[node]=row
nx.draw_networkx(G, pos)
plt.savefig('drawing.png')
Related
I loaded the PyTorch Geometric dataset OGB_MAG, converted it into a homogeneous dataset, and then checked its number of nodes as follows:
import torch
import numpy as np
from torch_geometric.datasets import OGB_MAG
import os.path as osp
import networkx as nx
from torch_geometric.utils import to_networkx, to_undirected
path = osp.join('..', 'data', 'OGB_MAG')
dataset = OGB_MAG(path)
data = dataset[0]
homogeneous_data = data.to_homogeneous()
print(homogeneous_data)
--> Data(node_type=[1939743], edge_index=[2, 21111007], edge_type=[21111007])
print(homogeneous_data.has_isolated_nodes())
--> True
Then I converted it to NetworkX format and checked its number of edges:
nx_data = to_networkx(homogeneous_data)
print(nx_data.number_of_edges())
--> 21111007
nx_data = nx_data.to_undirected(reciprocal=False)
print(nx_data.number_of_nodes(), nx_data.number_of_edges())
--> (1939743, 21091072)
print(len(list(nx.isolates(nx_data))))
--> 0
The number of edges is different (21111007 with PyG vs 21091072 with NetworkX after converting to undirected). The number of nodes is the same though. We also see that PyG says there are isolated nodes, but NetworkX says there are none.
Any insight as to why I'm seeing this discrepancy?
I've the following graph:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
nx.draw(g,with_labels=True)
plt.show()
g.nodes(data=True)
g.edges(data=True)
I need to create a function myfunction(g, node_list) that returns a subgraph whose nodes have weight < 50.
For example, if I run myfunction(g, [131, 201]), the output should be:
EdgeDataView([(131, 303, {'weight': 20}), (201, 232, {'weight': 10})])
A way to do that is by looping through all the nodes in your list and finding their neighbors with the nx.neighbors function from networkx. You can then set up an if condition to check the weight of the edge between the node of interest and its neighbors. If the condition satisfies your constraint, you can add the neighbor, the edge, and the weight to your subgraph.
See code below:
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edge(131,673,weight=673)
g.add_edge(131,201,weight=201)
g.add_edge(131,303,weight=20)
g.add_edge(673,96,weight=96)
g.add_edge(673,205,weight=44)
g.add_edge(673,110,weight=7)
g.add_edge(201,96,weight=96)
g.add_edge(201,232,weight=10)
fig=plt.figure(figsize=(10,10))
#Plot full graph
plt.subplot(211)
plt.title('Full graph')
labels_g = nx.get_edge_attributes(g,'weight')
pos_g=nx.circular_layout(g)
nx.draw_networkx_edge_labels(g,pos_g,edge_labels=labels_g)
nx.draw(g,pos=pos_g,with_labels=True)
def check_neighbor_weights(g,nodes):
subg=nx.Graph() #Create subgraph
for n in nodes:
subg.add_node(n)
neighbors=g.neighbors(n) #Find all neighbors of node n
for neighs in neighbors:
if g[n][neighs]['weight']<50: #Check if the weigh t is below 50
subg.add_edge(n,neighs,weight=g[n][neighs]['weight'])
return subg
subg=check_neighbor_weights(g,[131,201]) #Returns subgraph of interest
plt.subplot(212)
plt.title('subgraph')
labels_subg = nx.get_edge_attributes(subg,'weight')
pos_subg=nx.circular_layout(subg)
nx.draw_networkx_edge_labels(subg,pos=pos_subg,edge_labels=labels_subg)
nx.draw(subg,pos=pos_subg,with_labels=True)
plt.show()
And the output gives:
Consider the following snippet:
import networkx as nx
from networkx.algorithms import isomorphism
import matplotlib.pyplot as plt
subg = nx.Graph()
subg.add_nodes_from([0]+[i+1 for i in range(6)])
subg.add_edges_from([(0, i) for i in range(1,7)])
bigg = nx.Graph()
bigg.add_nodes_from([0]+[i+1 for i in range(6)])
bigg.add_edges_from([(0, i) for i in range(1,7)]+[(i,i+1) for i in range(1,6)]+[(1,6)])
nx.draw(subg, with_labels=True)
plt.show()
nx.draw(bigg, with_labels=True)
plt.show()
matcher = isomorphism.GraphMatcher(bigg, subg)
print([x for x in matcher.subgraph_isomorphisms_iter()])
This returns no subgraph isomorphisms due to the fact that there exists no edge (1,6) in subg for example. How does one get networkx to not care about this? I would like to get the mapping of nodes from subg to bigg which respect all the relations defined in subg without respect to whether or not additional relations exist between the nodes in bigg.
I have doing some siganl processing and I am new to it. I am using scipy.signal to do the calculations.
I am able to find the peak height, width, but I was wondering if I can also find the rise of peak time and decay time. That will be the distance from the left width point to the tallest peak point and then tallest peak point to right width point.
So, far I have this, which is from tutorial
import matplotlib.pyplot as plt
from scipy.misc import electrocardiogram
from scipy.signal import find_peaks
x = electrocardiogram()[2000:4000]
peaks, _ = find_peaks(x, height=0)
plt.plot(x)
plt.plot(peaks, x[peaks], "x")
plt.plot(np.zeros_like(x), "--", color="gray")
plt.show()
esults_full = peak_widths(x, peaks, rel_height=1)
I think I am looking for the first moment or derivative
This is a thing that depends on the type of the signal, for this signal in particular an approach that worked is to find all peaks then filter the peaks by a prominence threshold defined by the the midpoint in the prominence ranges.
Once I have the peaks of interest I used the positions of the previous and next peaks.
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import electrocardiogram
from scipy.signal import find_peaks, peak_prominences
x = electrocardiogram()[2000:3500]
#b, a = butter(4, 0.001, 'high')
#x = lfilter(b, a, x)
peaks, _ = find_peaks(x)
prominences, _, _ = peak_prominences(x, peaks)
selected = prominences > 0.5 * (np.min(prominences) + np.max(prominences))
left = peaks[:-1][selected[1:]]
right = peaks[1:][selected[:-1]]
top = peaks[selected]
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.plot(top, x[top], "x")
plt.plot(left, x[left], ".", markersize=20)
plt.plot(right, x[right], ".", markersize=20)
plt.show()
If you want to use height threshold it is interesting to remove frequencies lower than the signal frequency.
from scipy.signal import butter, lfilter
x = electrocardiogram()
plt.figure(figsize=(14, 4))
b, a = butter(4, 0.01, 'high')
plt.plot(x[2000:10000])
x = lfilter(b, a, x)
plt.plot(x[2000:10000])
plt.legend(['original', 'highpass filtered'])
About coding style preference, if you coming from MATLAB you may be used to everything in the global scope, but I always say that modules are your friends :). I would simply import scipy.signal instead of importing their member functions as global variables, you can use some alias for the modules like import matplotlib.pyplot as plt, and you can find what alias is commong to be used for each module, but this is more for programmer interoperability, not mandatory, so it is that I wrote the code in your style.
The derivatives
You can use rise = (peaks[top] - peaks[left]) / (top - left), and fall = (peaks[top] - peaks[right]) / (top - right), this is not the actual value of the derivatives, but are related featueres features.
Also if you want to find the max de
i have the following code:
import scipy as sc
import matplotlib.pyplot as plt
....
MeanSquareDistance1D=lambda n,m: ((m*Lastpoint1d(n)**2).sum())/m
......
data=[]
for i in range(10,110,20):
#mydata=list(sc.mat([[i],[MeanSquareDistance1D(i,2000)]]))
#data.append(mydata)
mydata=(sc.array([i,MeanSquareDistance1D(i,2000)])).tolist() I did it like this
data.append(mydata)
plt.plot(data)
plt.show()
I want the 'mydata' to be a matrix or array(preferable) (i am convering it to a list in order to do the plot) with i lines (5 lines) and 2 columns.
The first column should be the 'i' and the second the value of MeanSquareDistance1D(i,2000).
I am receiving the error 'ValueError: x and y can be no greater than 2-D'
import scipy as sc
import matplotlib.pyplot as plt
....
MeanSquareDistance1D=lambda n,m: ((m*Lastpoint1d(n)**2).sum())/m
......
data=[]
for i in range(10,110,20):
#mydata=list(sc.mat([[i],[MeanSquareDistance1D(i,2000)]]))
#data.append(mydata)
mydata=(sc.array([i,MeanSquareDistance1D(i,2000)])).tolist() I did it like this
data.append(mydata)
plt.plot(data)
plt.show()