By executing Networkx triadic_census Algorithm, I'm able to get the dictionary of the number of nodes falling on each type of triadic census
triad_census_social=nx.triadic_census(social_graph.to_directed())
Now, I'd like to return the list of triads, who all follow the pattern of census code "201", "120U", or any one of the 16 existing types.
How can I get those node lists under a census count?
There is no function in networkx that allow you to do it, so you should implement it manually. I modified the networkx.algorithms.triads code for you to return triads, not their count:
import networkx as nx
G = nx.DiGraph()
G.add_nodes_from([1,2,3,4,5])
G.add_edges_from([(1,2),(2,3),(2,4),(4,5)])
triad_census_social=nx.triadic_census(G)
# '003': 2,
# '012': 4,
# '021C': 3,
# '021D': 1,
# another: 0
#: The integer codes representing each type of triad.
#:
#: Triads that are the same up to symmetry have the same code.
TRICODES = (1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9,
9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9,
9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15,
11, 15, 15, 16)
#: The names of each type of triad. The order of the elements is
#: important: it corresponds to the tricodes given in :data:`TRICODES`.
TRIAD_NAMES = ('003', '012', '102', '021D', '021U', '021C', '111D', '111U',
'030T', '030C', '201', '120D', '120U', '120C', '210', '300')
#: A dictionary mapping triad code to triad name.
TRICODE_TO_NAME = {i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES)}
def _tricode(G, v, u, w):
"""Returns the integer code of the given triad.
This is some fancy magic that comes from Batagelj and Mrvar's paper. It
treats each edge joining a pair of `v`, `u`, and `w` as a bit in
the binary representation of an integer.
"""
combos = ((v, u, 1), (u, v, 2), (v, w, 4), (w, v, 8), (u, w, 16),
(w, u, 32))
return sum(x for u, v, x in combos if v in G[u])
census = {name: set([]) for name in TRIAD_NAMES}
n = len(G)
m = {v: i for i, v in enumerate(G)}
for v in G:
vnbrs = set(G.pred[v]) | set(G.succ[v])
for u in vnbrs:
if m[u] <= m[v]:
continue
neighbors = (vnbrs | set(G.succ[u]) | set(G.pred[u])) - {u, v}
# Calculate dyadic triads instead of counting them.
for w in neighbors:
if v in G[u] and u in G[v]:
census['102'].add(tuple(sorted([u, v, w])))
else:
census['012'].add(tuple(sorted([u, v, w])))
# Count connected triads.
for w in neighbors:
if m[u] < m[w] or (m[v] < m[w] < m[u] and
v not in G.pred[w] and
v not in G.succ[w]):
code = _tricode(G, v, u, w)
census[TRICODE_TO_NAME[code]].add(tuple(sorted([u, v, w])))
# null triads, I implemented them manually because the original algorithm computes
# them as _number_of_all_possible_triads_ - _number_of_all_found_triads_
for v in G:
vnbrs = set(G.pred[v]) | set(G.succ[v])
not_vnbrs = set(G.nodes()) - vnbrs
for u in not_vnbrs:
unbrs = set(G.pred[u]) | set(G.succ[u])
not_unbrs = set(G.nodes()) - unbrs
for w in not_unbrs:
wnbrs = set(G.pred[w]) | set(G.succ[w])
if v not in wnbrs and len(set([u, v, w])) == 3:
census['003'].add(tuple(sorted([u, v, w])))
# '003': {(1, 3, 4), (1, 3, 5)},
# '012': {(1, 2, 3), (1, 2, 4), (2, 3, 4), (2, 4, 5)},
# '021C': {(1, 2, 3), (1, 2, 4), (2, 4, 5)},
# '021D': {(2, 3, 4)},
# another: empty
Building on vurmux's answer (by fixing the '102' and '012' triads):
import networkx as nx
import itertools
def _tricode(G, v, u, w):
"""Returns the integer code of the given triad.
This is some fancy magic that comes from Batagelj and Mrvar's paper. It
treats each edge joining a pair of `v`, `u`, and `w` as a bit in
the binary representation of an integer.
"""
combos = ((v, u, 1), (u, v, 2), (v, w, 4), (w, v, 8), (u, w, 16),
(w, u, 32))
return sum(x for u, v, x in combos if v in G[u])
G = nx.DiGraph()
G.add_nodes_from([1, 2, 3, 4, 5])
G.add_edges_from([(1, 2), (2, 3), (2, 4), (4, 5)])
#: The integer codes representing each type of triad.
#: Triads that are the same up to symmetry have the same code.
TRICODES = (1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9,
9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9,
9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15,
11, 15, 15, 16)
#: The names of each type of triad. The order of the elements is
#: important: it corresponds to the tricodes given in :data:`TRICODES`.
TRIAD_NAMES = ('003', '012', '102', '021D', '021U', '021C', '111D', '111U',
'030T', '030C', '201', '120D', '120U', '120C', '210', '300')
#: A dictionary mapping triad code to triad name.
TRICODE_TO_NAME = {i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES)}
triad_nodes = {name: set([]) for name in TRIAD_NAMES}
m = {v: i for i, v in enumerate(G)}
for v in G:
vnbrs = set(G.pred[v]) | set(G.succ[v])
for u in vnbrs:
if m[u] > m[v]:
unbrs = set(G.pred[u]) | set(G.succ[u])
neighbors = (vnbrs | unbrs) - {u, v}
not_neighbors = set(G.nodes()) - neighbors - {u, v}
# Find dyadic triads
for w in not_neighbors:
if v in G[u] and u in G[v]:
triad_nodes['102'].add(tuple(sorted([u, v, w])))
else:
triad_nodes['012'].add(tuple(sorted([u, v, w])))
for w in neighbors:
if m[u] < m[w] or (m[v] < m[w] < m[u] and
v not in G.pred[w] and
v not in G.succ[w]):
code = _tricode(G, v, u, w)
triad_nodes[TRICODE_TO_NAME[code]].add(
tuple(sorted([u, v, w])))
# find null triads
all_tuples = set()
for s in triad_nodes.values():
all_tuples = all_tuples.union(s)
triad_nodes['003'] = set(itertools.combinations(G.nodes(), 3)).difference(all_tuples)
Result
# print(triad_nodes)
# {'003': {(1, 3, 4), (1, 3, 5)},
# '012': {(1, 2, 5), (1, 4, 5), (2, 3, 5), (3, 4, 5)},
# '102': set(),
# '021D': {(2, 3, 4)},
# '021U': set(),
# '021C': {(1, 2, 3), (1, 2, 4), (2, 4, 5)},
# '111D': set(),
# '111U': set(),
# '030T': set(),
# '030C': set(),
# '201': set(),
# '120D': set(),
# '120U': set(),
# '120C': set(),
# '210': set(),
# '300': set()}
In agreement with nx.triadic_census
# print(nx.triadic_census(G))
# {'003': 2,
# '012': 4,
# '102': 0,
# '021D': 1,
# '021U': 0,
# '021C': 3,
# '111D': 0,
# '111U': 0,
# '030T': 0,
# '030C': 0,
# '201': 0,
# '120D': 0,
# '120U': 0,
# '120C': 0,
# '210': 0,
# '300': 0}
I want to count elements value of dictionary. I try with this code:
def f_items(data, steps=0):
items = defaultdict(int)
for element in data:
if element in data:
items[element] += 1
else:
items[element] = 1
return items.items()
data = [[1, 2, 3, 'E'], [1, 2, 3, 'E'], [5, 2, 7, 112, 'A'] ]
rdd = sc.parallelize(data)
items = rdd.flatMap(lambda data: [y for y in f_items(data)], True)
print (items.collect())
The output of this code is shown below:
[(1, 1), (2, 1), (3, 1), ('E', 1), (1, 1), (2, 1), (3, 1), ('E', 1), (5, 1), (2, 1), (7, 1), (112, 1), ('A', 1)]
But, it should show the result following:
[(1, 2), (2, 3), (3, 3), ('E', 2), (5, 1), (7, 1), (112, 1), ('A', 1)]
How to achieve this?
Your last step should be a reduceByKey function call on the items rdd.
final_items = items.reduceByKey(lambda x,y: x+y)
print (final_items.collect())
You can look into this link to see some examples of reduceByKey in scala, java and python.
Imagine a function combineSequences: (seqs: Set[Seq[Int]])Set[Seq[Int]] that combines sequences when the last item of first sequence matches the first item of the second sequence. For example, if you have the following sequences:
(1, 2)
(2, 3)
(5, 6, 7, 8)
(8, 9, 10)
(3, 4, 10)
The result of combineSequences would be:
(5, 6, 7, 8, 8, 9, 10)
(1, 2, 2, 3, 3, 4, 10)
Because sequences 1, 2, and 5 combine together. If multiple sequences could combine to create a different result, the decisions is arbitrary. For example, if we have the sequences:
(1, 2)
(2, 3)
(2, 4)
There are two correct answers. Either:
(1, 2, 2, 3)
(2, 4)
Or:
(1, 2, 2, 4)
(2, 3)
I can only think of a very imperative and fairly opaque implementation. I'm wondering if anyone has a solution that would be more idiomatic scala. I've run into related problems a few times now.
Certainly not the most optimized solution but I've gone for readability.
def combineSequences[T]( seqs: Set[Seq[T]] ): Set[Seq[T]] = {
if ( seqs.isEmpty ) seqs
else {
val (seq1, otherSeqs) = (seqs.head, seqs.tail)
otherSeqs.find(_.headOption == seq1.lastOption) match {
case Some( seq2 ) => combineSequences( otherSeqs - seq2 + (seq1 ++ seq2) )
case None =>
otherSeqs.find(_.lastOption == seq1.headOption) match {
case Some( seq2 ) => combineSequences( otherSeqs - seq2 + (seq2 ++ seq1) )
case None => combineSequences( otherSeqs ) + seq1
}
}
}
}
REPL test:
scala> val seqs = Set(Seq(1, 2), Seq(2, 3), Seq(5, 6, 7, 8), Seq(8, 9, 10), Seq(3, 4, 10))
seqs: scala.collection.immutable.Set[Seq[Int]] = Set(List(1, 2), List(2, 3), List(8, 9, 10), List(5, 6, 7, 8), List(3, 4, 10))
scala> combineSequences( seqs )
res10: Set[Seq[Int]] = Set(List(1, 2, 2, 3, 3, 4, 10), List(5, 6, 7, 8, 8, 9, 10))
scala> val seqs = Set(Seq(1, 2), Seq(2, 3, 100), Seq(5, 6, 7, 8), Seq(8, 9, 10), Seq(100, 4, 10))
seqs: scala.collection.immutable.Set[Seq[Int]] = Set(List(100, 4, 10), List(1, 2), List(8, 9, 10), List(2, 3, 100), List(5, 6, 7, 8))
scala> combineSequences( seqs )
res11: Set[Seq[Int]] = Set(List(5, 6, 7, 8, 8, 9, 10), List(1, 2, 2, 3, 100, 100, 4, 10))