Get the list of Triad nodes , who fall under the category of individual Triadic Census - networkx

By executing Networkx triadic_census Algorithm, I'm able to get the dictionary of the number of nodes falling on each type of triadic census
triad_census_social=nx.triadic_census(social_graph.to_directed())
Now, I'd like to return the list of triads, who all follow the pattern of census code "201", "120U", or any one of the 16 existing types.
How can I get those node lists under a census count?

There is no function in networkx that allow you to do it, so you should implement it manually. I modified the networkx.algorithms.triads code for you to return triads, not their count:
import networkx as nx
G = nx.DiGraph()
G.add_nodes_from([1,2,3,4,5])
G.add_edges_from([(1,2),(2,3),(2,4),(4,5)])
triad_census_social=nx.triadic_census(G)
# '003': 2,
# '012': 4,
# '021C': 3,
# '021D': 1,
# another: 0
#: The integer codes representing each type of triad.
#:
#: Triads that are the same up to symmetry have the same code.
TRICODES = (1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9,
9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9,
9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15,
11, 15, 15, 16)
#: The names of each type of triad. The order of the elements is
#: important: it corresponds to the tricodes given in :data:`TRICODES`.
TRIAD_NAMES = ('003', '012', '102', '021D', '021U', '021C', '111D', '111U',
'030T', '030C', '201', '120D', '120U', '120C', '210', '300')
#: A dictionary mapping triad code to triad name.
TRICODE_TO_NAME = {i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES)}
def _tricode(G, v, u, w):
"""Returns the integer code of the given triad.
This is some fancy magic that comes from Batagelj and Mrvar's paper. It
treats each edge joining a pair of `v`, `u`, and `w` as a bit in
the binary representation of an integer.
"""
combos = ((v, u, 1), (u, v, 2), (v, w, 4), (w, v, 8), (u, w, 16),
(w, u, 32))
return sum(x for u, v, x in combos if v in G[u])
census = {name: set([]) for name in TRIAD_NAMES}
n = len(G)
m = {v: i for i, v in enumerate(G)}
for v in G:
vnbrs = set(G.pred[v]) | set(G.succ[v])
for u in vnbrs:
if m[u] <= m[v]:
continue
neighbors = (vnbrs | set(G.succ[u]) | set(G.pred[u])) - {u, v}
# Calculate dyadic triads instead of counting them.
for w in neighbors:
if v in G[u] and u in G[v]:
census['102'].add(tuple(sorted([u, v, w])))
else:
census['012'].add(tuple(sorted([u, v, w])))
# Count connected triads.
for w in neighbors:
if m[u] < m[w] or (m[v] < m[w] < m[u] and
v not in G.pred[w] and
v not in G.succ[w]):
code = _tricode(G, v, u, w)
census[TRICODE_TO_NAME[code]].add(tuple(sorted([u, v, w])))
# null triads, I implemented them manually because the original algorithm computes
# them as _number_of_all_possible_triads_ - _number_of_all_found_triads_
for v in G:
vnbrs = set(G.pred[v]) | set(G.succ[v])
not_vnbrs = set(G.nodes()) - vnbrs
for u in not_vnbrs:
unbrs = set(G.pred[u]) | set(G.succ[u])
not_unbrs = set(G.nodes()) - unbrs
for w in not_unbrs:
wnbrs = set(G.pred[w]) | set(G.succ[w])
if v not in wnbrs and len(set([u, v, w])) == 3:
census['003'].add(tuple(sorted([u, v, w])))
# '003': {(1, 3, 4), (1, 3, 5)},
# '012': {(1, 2, 3), (1, 2, 4), (2, 3, 4), (2, 4, 5)},
# '021C': {(1, 2, 3), (1, 2, 4), (2, 4, 5)},
# '021D': {(2, 3, 4)},
# another: empty

Building on vurmux's answer (by fixing the '102' and '012' triads):
import networkx as nx
import itertools
def _tricode(G, v, u, w):
"""Returns the integer code of the given triad.
This is some fancy magic that comes from Batagelj and Mrvar's paper. It
treats each edge joining a pair of `v`, `u`, and `w` as a bit in
the binary representation of an integer.
"""
combos = ((v, u, 1), (u, v, 2), (v, w, 4), (w, v, 8), (u, w, 16),
(w, u, 32))
return sum(x for u, v, x in combos if v in G[u])
G = nx.DiGraph()
G.add_nodes_from([1, 2, 3, 4, 5])
G.add_edges_from([(1, 2), (2, 3), (2, 4), (4, 5)])
#: The integer codes representing each type of triad.
#: Triads that are the same up to symmetry have the same code.
TRICODES = (1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9,
9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9,
9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15,
11, 15, 15, 16)
#: The names of each type of triad. The order of the elements is
#: important: it corresponds to the tricodes given in :data:`TRICODES`.
TRIAD_NAMES = ('003', '012', '102', '021D', '021U', '021C', '111D', '111U',
'030T', '030C', '201', '120D', '120U', '120C', '210', '300')
#: A dictionary mapping triad code to triad name.
TRICODE_TO_NAME = {i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES)}
triad_nodes = {name: set([]) for name in TRIAD_NAMES}
m = {v: i for i, v in enumerate(G)}
for v in G:
vnbrs = set(G.pred[v]) | set(G.succ[v])
for u in vnbrs:
if m[u] > m[v]:
unbrs = set(G.pred[u]) | set(G.succ[u])
neighbors = (vnbrs | unbrs) - {u, v}
not_neighbors = set(G.nodes()) - neighbors - {u, v}
# Find dyadic triads
for w in not_neighbors:
if v in G[u] and u in G[v]:
triad_nodes['102'].add(tuple(sorted([u, v, w])))
else:
triad_nodes['012'].add(tuple(sorted([u, v, w])))
for w in neighbors:
if m[u] < m[w] or (m[v] < m[w] < m[u] and
v not in G.pred[w] and
v not in G.succ[w]):
code = _tricode(G, v, u, w)
triad_nodes[TRICODE_TO_NAME[code]].add(
tuple(sorted([u, v, w])))
# find null triads
all_tuples = set()
for s in triad_nodes.values():
all_tuples = all_tuples.union(s)
triad_nodes['003'] = set(itertools.combinations(G.nodes(), 3)).difference(all_tuples)
Result
# print(triad_nodes)
# {'003': {(1, 3, 4), (1, 3, 5)},
# '012': {(1, 2, 5), (1, 4, 5), (2, 3, 5), (3, 4, 5)},
# '102': set(),
# '021D': {(2, 3, 4)},
# '021U': set(),
# '021C': {(1, 2, 3), (1, 2, 4), (2, 4, 5)},
# '111D': set(),
# '111U': set(),
# '030T': set(),
# '030C': set(),
# '201': set(),
# '120D': set(),
# '120U': set(),
# '120C': set(),
# '210': set(),
# '300': set()}
In agreement with nx.triadic_census
# print(nx.triadic_census(G))
# {'003': 2,
# '012': 4,
# '102': 0,
# '021D': 1,
# '021U': 0,
# '021C': 3,
# '111D': 0,
# '111U': 0,
# '030T': 0,
# '030C': 0,
# '201': 0,
# '120D': 0,
# '120U': 0,
# '120C': 0,
# '210': 0,
# '300': 0}

Related

How to convert a List[List[Long]] to a List[List[Int]]?

What's the best way to convert a List[List[Long]] to a List[List[Int]] in Scala?
For example, given the following list of type List[List[Long]]
val l: List[List[Long]] = List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
how can it be converted to List[List[Int]]?
You can also use cats lib for that and compose List functors
import cats.Functor
import cats.implicits._
import cats.data._
val l: List[List[Long]] = List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
Functor[List].compose[List].map(l)(_.toInt)
//or
Nested(l).map(_.toInt).value
and one more pure scala approach (not very safe, though)
val res:List[List[Int]] = l.asInstanceOf[List[List[Int]]]
Try l.map(_.map(_.toInt)) like so
val l: List[List[Long]] = List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
l.map(_.map(_.toInt))
which should give
res2: List[List[Int]] = List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
Only if you are completely sure that you won't overflow the Int.
val l1: List[List[Long]] = List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
val l2: List[List[Int]] = l1.map(list => list.map(long => long.toInt))
(Basically, every time you want to transform a List into another List, use map).
can be achieved with simple transformation on collection using map function.
map works by applying a function to each element in the list. in your case nested lists are there. so you need to apply map function 2 times like below example...
val x : List[List[Long]] = List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
println(x)
val y :List[List[Int]]= x.map(a => a.map(_.toInt))
println(y)
Output :
List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))
List(List(11, 10, 11, 10, 11), List(8, 19, 24, 0, 2))

Finding all slices delimited by two elements in Scala `List`

In Scala, what would be the right way of selecting elements of a list based on the position of two elements? Suppose I have the list below and I would like to select all the elements between 2 and 7, including them (note: not greater than/smaller than, but the elements that come after 2 and before 7 in the list):
scala> val l = List(1, 14, 2, 17, 35, 9, 12, 7, 9, 40)
l: List[Int] = List(1, 14, 2, 17, 35, 9, 12, 7, 9, 40)
scala> def someMethod(l: List[Int], from: Int, to: Int) : List[Int] = {
| // some code here
| }
someMethod: (l: List[Int], from: Int, to: Int)List[Int]
scala> someMethod(l, 2, 7)
res0: List[Int] = List(2, 17, 35, 9, 12, 7)
Expected output:
For lists that don't contain 2 and/or 7: an empty list
Input: (1, 2, 2, 2, 3, 4, 7, 8); Output: (2, 2, 2, 3, 4, 7)
Input: (1, 2, 3, 4, 7, 7, 7, 8); Output: (2, 3, 4, 7)
Input: (1, 2, 3, 4, 7, 1, 2, 3, 5, 7, 8); Output: ((2, 3, 4, 7), (2, 3, 5, 7))
Too bad that the regex-engines work only with strings, not with general lists - would be really nice if you could find all matches for something like L.*?R with two arbitrary delimiters L and R. Since it doesn't work with regex, you have to build a little automaton yourself. Here is one way to do it:
#annotation.tailrec
def findDelimitedSlices[A](
xs: List[A],
l: A,
r: A,
revAcc: List[List[A]] = Nil
): List[List[A]] = {
xs match {
case h :: t => if (h == l) {
val idx = xs.indexOf(r)
if (idx >= 0) {
val (s, rest) = xs.splitAt(idx + 1)
findDelimitedSlices(rest, l, r, s :: revAcc)
} else {
revAcc.reverse
}
} else {
findDelimitedSlices(t, l, r, revAcc)
}
case Nil => revAcc.reverse
}
}
Input:
for (example <- List(
List(1, 2, 2, 2, 3, 4, 7, 8),
List(1, 2, 3, 4, 7, 7, 7, 8),
List(1, 2, 3, 4, 7, 1, 2, 3, 5, 7, 8)
)) {
println(findDelimitedSlices(example, 2, 7))
}
Output:
List(List(2, 2, 2, 3, 4, 7))
List(List(2, 3, 4, 7))
List(List(2, 3, 4, 7), List(2, 3, 5, 7))
You're looking for slice:
# l.slice(2, 7)
res1: List[Int] = List(2, 17, 35, 9, 12)
# l.slice(2, 8)
res2: List[Int] = List(2, 17, 35, 9, 12, 7)

Spark - Remove intersecting elements between two array type columns

I have dataframe like this
+---------+--------------------+----------------------------+
| Name| rem1| quota |
+---------+--------------------+----------------------------+
|Customer_3|[258, 259, 260, 2...|[1, 2, 3, 4, 5, 6, 7,..500]|
|Customer_4|[18, 19, 20, 27, ...|[1, 2, 3, 4, 5, 6, 7,..500]|
|Customer_5|[16, 17, 51, 52, ...|[1, 2, 3, 4, 5, 6, 7,..500]|
|Customer_6|[6, 7, 8, 9, 10, ...|[1, 2, 3, 4, 5, 6, 7,..500]|
|Customer_7|[0, 30, 31, 32, 3...|[1, 2, 3, 4, 5, 6, 7,..500]|
I would like to remove list value in rem1 from quota and create as one new column. I have tried.
val dfleft = dfpci_remove2.withColumn("left",$"quota".filter($"rem1"))
<console>:123: error: value filter is not a member of org.apache.spark.sql.ColumnName
Please advise.
You can use a filter in a column in such way, you can write an udf as below
val filterList = udf((a: Seq[Int], b: Seq[Int]) => a diff b)
df.withColumn("left", filterList($"rem1", $"quota") )
This should give you the expected result.
Hope this helps!

Idiomatic scala solution to combining sequences

Imagine a function combineSequences: (seqs: Set[Seq[Int]])Set[Seq[Int]] that combines sequences when the last item of first sequence matches the first item of the second sequence. For example, if you have the following sequences:
(1, 2)
(2, 3)
(5, 6, 7, 8)
(8, 9, 10)
(3, 4, 10)
The result of combineSequences would be:
(5, 6, 7, 8, 8, 9, 10)
(1, 2, 2, 3, 3, 4, 10)
Because sequences 1, 2, and 5 combine together. If multiple sequences could combine to create a different result, the decisions is arbitrary. For example, if we have the sequences:
(1, 2)
(2, 3)
(2, 4)
There are two correct answers. Either:
(1, 2, 2, 3)
(2, 4)
Or:
(1, 2, 2, 4)
(2, 3)
I can only think of a very imperative and fairly opaque implementation. I'm wondering if anyone has a solution that would be more idiomatic scala. I've run into related problems a few times now.
Certainly not the most optimized solution but I've gone for readability.
def combineSequences[T]( seqs: Set[Seq[T]] ): Set[Seq[T]] = {
if ( seqs.isEmpty ) seqs
else {
val (seq1, otherSeqs) = (seqs.head, seqs.tail)
otherSeqs.find(_.headOption == seq1.lastOption) match {
case Some( seq2 ) => combineSequences( otherSeqs - seq2 + (seq1 ++ seq2) )
case None =>
otherSeqs.find(_.lastOption == seq1.headOption) match {
case Some( seq2 ) => combineSequences( otherSeqs - seq2 + (seq2 ++ seq1) )
case None => combineSequences( otherSeqs ) + seq1
}
}
}
}
REPL test:
scala> val seqs = Set(Seq(1, 2), Seq(2, 3), Seq(5, 6, 7, 8), Seq(8, 9, 10), Seq(3, 4, 10))
seqs: scala.collection.immutable.Set[Seq[Int]] = Set(List(1, 2), List(2, 3), List(8, 9, 10), List(5, 6, 7, 8), List(3, 4, 10))
scala> combineSequences( seqs )
res10: Set[Seq[Int]] = Set(List(1, 2, 2, 3, 3, 4, 10), List(5, 6, 7, 8, 8, 9, 10))
scala> val seqs = Set(Seq(1, 2), Seq(2, 3, 100), Seq(5, 6, 7, 8), Seq(8, 9, 10), Seq(100, 4, 10))
seqs: scala.collection.immutable.Set[Seq[Int]] = Set(List(100, 4, 10), List(1, 2), List(8, 9, 10), List(2, 3, 100), List(5, 6, 7, 8))
scala> combineSequences( seqs )
res11: Set[Seq[Int]] = Set(List(5, 6, 7, 8, 8, 9, 10), List(1, 2, 2, 3, 100, 100, 4, 10))

FoldRight function scala

I have this function that uses foldright to append the two lists
def append[T](l1: List[T], l2: List[T]): List[T] = (l1 :\ l2) ((a,b) => a::b)
The scala returns:
val l1 = List(1,2,3,4,5)
val l2 = List(6,7,8,9,10)
println(append(l1,l2))
Result: List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
As starting from right to left, the result should not return opposite? Why returns in this way?
foldRight is execute from right to left, so the iteration is,
1: a is 5, b is 6, 7, 8, 9, 10, result is 5, 6, 7, 8, 9, 10
2: a is 4, b is 5, 6, 7, 8, 9, 10, result is 4, 5, 6, 7, 8, 9, 10
...
final result is 1, 2, 3, ..., 8, 9, 10