extract trees from DAG - perl

I have a DAG expressed as nodes and their successor edges. Reifying it as a nested data structure is possible with a simple recursive function.
#tree1.pl
#!/usr/bin/env perl
use 5.028; use strictures; use Moops; use Kavorka qw(fun); use List::AllUtils qw(first);
class Node :ro {
has label => isa => Str;
has children => isa => ArrayRef[Str];
}
fun N($label, $children) {
return Node->new(label => $label, children => $children);
}
# list is really flat, but
# indentation outlines desired tree structure
our #dag = (
N(N0 => ['N1']),
N(N1 => ['N2']),
N(N2 => ['N3']),
N(N3 => ['N4', 'N5']),
N(N4 => []),
N(N5 => []),
);
fun tree(Node $n) {
return bless [
map {
my $c = $_;
tree(first {
$_->label eq $c
} #dag)
} $n->children->#*
] => $n->label;
}
tree($dag[0]);
# bless([ #N0
# bless([ #N1
# bless([ #N2
# bless([ #N3
# bless([] => 'N4'),
# bless([] => 'N5'),
# ] => 'N3')
# ] => 'N2')
# ] => 'N1')
# ] => 'N0')
That was the trivial case.
In my application, I encounter the complication that the DAG contains multiple nodes with the same label.
our #dag = (
N(N0 => ['N1']),
N(N1 => ['N2']),
︙
N(N1 => ['N6', 'N5']),
︙
Note that this does not mean that there is a multiedge in the proper sense.
This is wrong because now N1 appears to have three equal children.
The N1 nodes must not be collapsed into one node for graph traversal purpose, only for labelling the output tree; so in other words these nodes must be of distinct identity. Let's visualise this with colours.
our #dag = (
N(N0 => ['N1']),
N([N1 => 'red'] => ['N2']),
︙
N([N1 => 'blue'] => ['N6', 'N5']),
︙
The goal is to reify this DAG as two trees. Follow each of the dotted successor edges in separate passes. I achieve this by remembering the index number of one colour on the node when I pass it, and during next tree construction I pick the next colour in order.
#tree2.pl
#!/usr/bin/env perl
use 5.028; use strictures; use Moops; use Kavorka qw(fun); use List::AllUtils qw(first);
class Node :ro {
has label => isa => Str;
has col => isa => Maybe[Str];
has children => isa => ArrayRef[Str];
has col_seen => is => 'rw', isa => Int;
}
fun N($c_l, $children) {
return ref $c_l
? Node->new(label => $c_l->[0], col => $c_l->[1], children => $children)
: Node->new(label => $c_l, children => $children);
}
# indentation outlines desired tree structure
our #dag = (
### start 1st tree
N(N0 => ['N1']),
N([N1 => 'red'] => ['N2']),
N(N2 => ['N3']),
N(N3 => ['N4', 'N5']),
N(N4 => []),
N(N5 => []),
### end 1st tree
### start 2nd tree
# N0
N([N1 => 'blue'] => ['N6', 'N5']),
N(N6 => ['N7']),
N(N7 => ['N4']),
# N4
# N5
### end 2nd tree
);
fun tree(Node $n) {
return bless [
map {
my $c = $_;
my #col = map { $_->col } grep { $_->label eq $c } #dag;
if (#col > 1) {
$n->col_seen($n->col_seen + 1);
die 'exhausted' if $n->col_seen > #col;
tree(first {
$_->label eq $c && $_->col eq $col[$n->col_seen - 1]
} #dag);
} else {
tree(first { $_->label eq $c } #dag);
}
} $n->children->#*
] => $n->label;
}
tree($dag[0]);
# bless([ #N0
# bless([ #N1
# bless([ #N2
# bless([ #N3
# bless([] => 'N4'),
# bless([] => 'N5')
# ] => 'N3')
# ] => 'N2')
# ] => 'N1')
# ] => 'N0')
tree($dag[0]);
# bless([ #N0
# bless([ #N1
# bless([ #N6
# bless([ #N7
# bless([] => 'N4')
# ] => 'N7')
# ] => 'N6'),
# bless([] => 'N5')
# ] => 'N1')
# ] => 'N0')
tree($dag[0]);
# exhausted
That code works, I get two trees.
However, there is a problem with my code when I have several of those nodes with coloured successors. Same code as above, just the input is different:
#tree3.pl
︙
our #dag = (
N(N0 => ['N1']),
N([N1 => 'red'] => ['N2']),
N(N2 => ['N3']),
N(N3 => ['N4', 'N5']),
N(N4 => []),
N(N5 => []),
# N0
N([N1 => 'blue'] => ['N6', 'N5']),
N(N6 => ['N7']),
N(N7 => ['N8', 'N4']),
N([N8 => 'purple'] => ['N5']),
# N5
N([N8 => 'orange'] => []),
N([N8 => 'cyan'] => ['N5', 'N5']),
# N5
# N5
# N4
# N5
);
︙
tree($dag[0]);
# bless([ #N0
# bless([ #N1
# bless([ #N2
# bless([ #N3
# bless([] => 'N4'),
# bless([] => 'N5')
# ] => 'N3')
# ] => 'N2')
# ] => 'N1')
# ] => 'N0')
tree($dag[0]);
# bless([ #N0
# bless([ #N1
# bless([ #N6
# bless([ #N7
# bless([ #N8
# bless([] => 'N5')
# ] => 'N8'),
# bless([] => 'N4')
# ] => 'N7')
# ] => 'N6'),
# bless([] => 'N5')
# ] => 'N1')
# ] => 'N0')
tree($dag[0]);
# exhausted
The problem is that the search exhausts after only two trees, although I should get four:
path through red
path through blue, then purple
path through blue, then orange
path through blue, then cyan
You can answer in any programming language.

Is the following what you were aiming to accomplish? (python 3)
from collections import defaultdict
from itertools import product
class bless:
def __init__(self, label, children):
self.label = label
self.children = children
def __repr__(self):
return self.__str__()
# Just pretty-print stuff
def __str__(self):
formatter = "\n{}\n" if self.children else "{}"
formatted_children = formatter.format(",\n".join(map(str, self.children)))
return "bless([{}] => '{}')".format(formatted_children, self.label)
class Node:
def __init__(self, label, children):
self.label = label
self.children = children
class DAG:
def __init__(self, nodes):
self.nodes = nodes
# Add the root nodes to a singular, generated root node (for simplicity)
# This is not necessary to implement the color-separation logic,
# it simply lessens the number of edge cases I must handle to demonstate
# the logic. Your existing code will work fine without this "hack"
non_root = {child for node in self.nodes for child in node.children}
root_nodes = [node.label for node in self.nodes if node.label not in non_root]
self.root = Node("", root_nodes)
# Make a list of all the trees
self.tree_list = self.make_trees(self.root)
def tree(self):
if self.tree_list:
return self.tree_list.pop(0)
return list()
# This is the meat of the program, and is really the logic you are after
# Its a recursive function that parses the tree top-down from our "made-up"
# root, and makes <bless>s from the nodes. It returns a list of all separately
# colored trees, and if prior (recusive) calls already made multiple trees, it
# will take the cartesian product of each tree per label
def make_trees(self, parent):
# A defaultdict is just a hashtable that's empty values
# default to some data type (list here)
trees = defaultdict(list)
# This is some nasty, inefficient means of fetching the children
# your code already does this more efficiently in perl, and since it
# contributes nothing to the answer, I'm not wasting time refactoring it
for node in (node for node in self.nodes if node.label in parent.children):
# I append the tree(s) found in the child to the list of <label>s trees
trees[node.label] += self.make_trees(node)
# This line serves to re-order the trees since the dictionary doesn't preserve
# ordering, and also restores any duplicated that would be lost
values = [trees[label] for label in parent.children]
# I take the cartesian product of all the lists of trees each label
# is associated with in the dictionary. So if I have
# [N1-subtree] [red-N2-subtree, blue-N2-subtree] [N3-subtree]
# as children of N0, then I'll return:
# [bless(N0, [N1-st, red-N2-st, N3-st]), bless(N0, [N1-st, blue-N2-st, N3-st])]
return [bless(parent.label, prod) for prod in product(*values)]
if __name__ == "__main__":
N0 = Node('N0', ['N1'])
N1a = Node('N1', ['N2'])
N2 = Node('N2', ['N3'])
N3 = Node('N3', ['N4', 'N5'])
N4 = Node('N4', [])
N5 = Node('N5', [])
N1b = Node('N1', ['N6', 'N5'])
N6 = Node('N6', ['N7'])
N7 = Node('N7', ['N8', 'N4'])
N8a = Node('N8', ['N5'])
N8b = Node('N8', [])
N8c = Node('N8', ['N5', 'N5'])
dag = DAG([N0, N1a, N2, N3, N4, N5, N1b, N6, N7, N8a, N8b, N8c])
print(dag.tree())
print(dag.tree())
print(dag.tree())
print(dag.tree())
print(dag.tree())
print(dag.tree())
I explained the logic fairly thoroughly through comments, but just to clarify- I generate all the possible trees at once using a recursive DFS from the root. To ensure there's only one root, I make a "fictional" root that contains all other nodes that do not have a parent, and then start the search on that one node. This is not necessary for the algorithm to work, I just wanted to simplify the logic that didn't directly pertain to your question.
In this DFS, I create a hash table / dictionary of lists per label, and store all the distinct subtrees that can be made from each child in these lists. For most nodes this list will be of length 1 since most nodes will generate a single tree unless their label or a (sub)child has duplicate labels. Regardless, I take the cartesian product of all these lists, and form new bless objects (from each product). I return this list, and the process repeats up the call stack until we finally have our complete list of trees.
All of the printing logic is unnecessary (obviously), but I wanted to make it easier for you to verify if this is indeed the behavior you want. I could not (easily) get it to indent nested blesss, but that should be trivial to manually adjust. The only real part of interest is the make_trees() function, the rest is just setting up things for validation or making the code as easily comparable to your perl code as I could manage.
Formatted output:
bless([
bless([
bless([
bless([
bless([
bless([] => 'N4'),
bless([] => 'N5')
] => 'N3')
] => 'N2')
] => 'N1')
] => 'N0')
] => '')
bless([
bless([
bless([
bless([
bless([
bless([
bless([] => 'N5')
] => 'N8'),
bless([] => 'N4')
] => 'N7')
] => 'N6'),
bless([] => 'N5')
] => 'N1')
] => 'N0')
] => '')
bless([
bless([
bless([
bless([
bless([
bless([] => 'N8'),
bless([] => 'N4')
] => 'N7')
] => 'N6'),
bless([] => 'N5')
] => 'N1')
] => 'N0')
] => '')
bless([
bless([
bless([
bless([
bless([
bless([
bless([] => 'N5'),
bless([] => 'N5')
] => 'N8'),
bless([] => 'N4')
] => 'N7')
] => 'N6'),
bless([] => 'N5')
] => 'N1')
] => 'N0')
] => '')
[]
[]

Related

Why are some hashes initialized using curly braces, and some with parentheses?

I'm looking at the following code demonstrating nested hashes:
my %HoH = (
flintstones => {
husband => "fred",
pal => "barney",
},
jetsons => {
husband => "george",
wife => "jane",
"his boy" => "elroy", # Key quotes needed.
},
simpsons => {
husband => "homer",
wife => "marge",
kid => "bart",
},
);
Why is it that the upper-most hash (starting line 1) is initialized using parentheses, whereas the sub-hashes are initialized using curly braces?
Coming from a python background I must say Perl is quite odd :).
Coming from a Perl background I find Perl quite odd, too.
Use parentheses to initialize a hash (or an array). A hash is a map between a set of strings and a set of scalar values.
%foo = ( "key1", "value1", "key2", "value2", ... ); # % means hash
%foo = ( key1 => "value1", key2 => "value2", ... ); # same thing
Braces are used to define a hash reference. All references are scalar values.
$foo = { key1 => "value1", key2 => "value2", ... }; # $ means scalar
Hashes are not scalar values. Since the values in a hash must be scalars, it is therefore not possible to use a hash as a value of another hash.
%bar = ( key3 => %foo ); # doesn't mean what you think it means
But we can use hash references as values of another hash, because hash references are scalars.
$foo = { key1 => "value1", key2 => "value2" };
%bar = ( key3 => $foo );
%baz = ( key4 => { key5 => "value5", key6 => "value6" } );
And that is why you see parentheses surrounding a list of lists with braces.
The essential difference (....) is used to create a hash. {....} is used to create a hash reference
my %hash = ( a => 1 , b => 2 ) ;
my $hash_ref = { a => 1 , b => 2 } ;
In a bit more detail - {....} makes an anonymous hash and returns a reference to it wich is asigned to the scalar $hash_ref
edited to give a bit more detail
First, the parens do nothing but change precedence here. They never have nothing to do with list creation, hash creation or hash initialisation.
For example, the following two lines are 100% equivalent:
{ a => 1, b => 2 }
{ ( a => 1, b => 2 ) }
For example, the following two lines are 100% equivalent:
sub f { return ( a => 1, b => 2 ) } my %hash = f();
sub f { return a => 1, b => 2 } my %hash = f();
Second, one doesn't initialise a hash using { }; one creates a hash using it. { } is equivalent to my %hash;, except that the hash is anonymous. In other words,
{ LIST }
is basically the same as
do { my %anon = LIST; \%anon }
(but doesn't create a lexical scope).
Anonymous hashes allows one to write
my %HoH = (
flintstones => {
husband => "fred",
pal => "barney",
},
jetsons => {
husband => "george",
wife => "jane",
"his boy" => "elroy",
},
simpsons => {
husband => "homer",
wife => "marge",
kid => "bart",
},
);
instead of
my %flintstones = (
husband => "fred",
pal => "barney",
);
my %jetsons = (
husband => "george",
wife => "jane",
"his boy" => "elroy",
);
my %simpsons = (
husband => "homer",
wife => "marge",
kid => "bart",
);
my %HoH = (
flintstones => \%flinstones,
jetsons => \%jetsons,
simpsons => \%simpsons,
);

Perl hash negation

Can you help me correct the code snippet.
I want to list the server which is type eq xyz but not with namedservers.
our %SERVERS = (
"rajesh1" => {type => 'xyz', sha => 'ram'},
"rajesh2" => {type => 'xyz', sha => 'sita'},
"rajesh3" => {type => 'xyz', named => ["raa"]},
"rajesh4" => {type => 'xxx', named => ["rajjaj"]},
);
while ( my $mServer = each(%SERVERS) )
{
if ("$SERVERS{$mServer}{type}" eq "xyz" && !"$SERVERS{$mServer}{named}" )
{
print "Name of the server is $mServer\n";
}
}
Expected result:
rajesh1
rajesh2
You're missing a semicolon after the definition of %SERVERS.
You start calling it $mServer, then later say $gServer. Pick one!
Get rid of the quotes around $SERVERS{$mServer}{type} and $SERVERS{$mServer}{named} (once you've changed gServer to mServer—you don't need them.
You expect to see "rajesh1 rajesh2", but none of them have type "prod". How is that possible? Assuming you change their type to "prod" …
You expect to see "rajesh1 rajesh2", but you print "Name of the server is $mServer\n" (once you change gServer to mServer). Changing that to just "$mServer\n", and …
… it should work.
Hence:
our %SERVERS = (
"rajesh1" => {type => 'prod', sha => 'ram'},
"rajesh2" => {type => 'prod', sha => 'sita'},
"rajesh3" => {type => 'xyz', named => ["raa"]},
"rajesh4" => {type => 'xxx', named => ["rajjaj"]},
);
while (my $mServer = each %SERVERS) {
if ($SERVERS{$mServer}{type} eq "prod" && !$SERVERS{$mServer}{named}) {
print "$mServer\n";
}
}
Then:
$ perl test.pl
rajesh1
rajesh2
$
Complete sample, catching both of each's return values, which reduces visual clutter:
use strict;
use warnings;
our %SERVERS = (
"rajesh1" => {type => 'xyz', sha => 'ram'},
"rajesh2" => {type => 'xyz', sha => 'sita'},
"rajesh3" => {type => 'xyz', named => ["raa"]},
"rajesh4" => {type => 'xxx', named => ["rajjaj"]},
"rajesh5" => {type => 'prod', sha => 'ram'},
"rajesh6" => {type => 'prod', named => ["jajaja"]},
);
while ( my( $mServer, $mData ) = each %SERVERS ) {
if ($mData->{type} eq "prod" && !$mData->{named}) {
print "Name of the server is $mServer\n";
}
}
You are looking for defined.
if ($SERVERS{$mServer}->{type} eq "xyz" &&
! defined $SERVERS{$mServer}->{named} )
...
You were using an undefined variable $gServer where apparently you meant to use the loop variable $mServer. You should use strict; use warnings; in all your scripts; that makes it easy to catch this mistake (and a slew of others).
I use the indirection operator -> to access the contents of hash references as a matter of preference. I also removed some gratuitous quoting as a stylistic change.

Lazy Attribute Coercion

With Moose, you can have lazy builders on attributes, where the builder is called when the attribute is first accessed if the attribute was not already populated. You can have type coercion of an attribute with coerce, but this is applied whenever the attribute is set, so even on object initialization.
I'm looking for a way to implement lazy coercion, where an attribute may be initially populated, but is only coerced when it is first accessed. This is important when coercion is expensive.
In the following example, I use a union type and method modifiers to do this:
package My::Foo;
use Moose;
has x => (
is => 'rw',
isa => 'ArrayRef | Int',
required => 1
);
around "x" => sub {
my $orig = shift;
my $self = shift;
my $val = $self->$orig(#_);
unless(ref($val)) {
# Do the cocerion
$val = [ map { 1 } 1..$val ];
sleep(1); # in my case this is expensive
}
return $val;
};
1;
my $foo = My::Foo->new( x => 4 );
is_deeply $foo->x, [ 1, 1, 1, 1 ], "x converted from int to array at call time";
However there are a few problems with this:
I dislike the union type + method modifier approach. It goes against the "Best Practices" suggestion to use coercion instead of unions. It isn't declarative.
I need to do this with many attributes across many classes. Therefore some form of DRY is needed. This could be meta-attribute roles, type-coercion, what have you.
Update:
I followed ikegami's suggestion to encapsulate the expensive type coercion inside an object and provide an outer coercion to this object:
package My::ArrayFromInt;
use Moose;
use Moose::Util::TypeConstraints;
subtype 'My::ArrayFromInt::Inner',
as 'ArrayRef[Int]';
coerce 'My::ArrayFromInt::Inner',
from 'Int',
via { return [ (1) x $_ ] };
has uncoerced => (is => 'rw', isa => 'Any', required => 1);
has value => (
is => 'rw',
isa => 'My::ArrayFromInt::Inner',
builder => '_buildValue',
lazy => 1,
coerce => 1
);
sub _buildValue {
my ($self) = #_;
return $self->uncoerced;
}
1;
package My::Foo;
use Moose;
use Moose::Util::TypeConstraints;
subtype 'My::ArrayFromInt::Lazy' => as class_type('My::ArrayFromInt');
coerce 'My::ArrayFromInt::Lazy',
from 'Int',
via { My::ArrayFromInt->new( uncoerced => $_ ) };
has x => (
is => 'rw',
isa => 'My::ArrayFromInt::Lazy',
required => 1,
coerce => 1
);
1;
This works if $foo->x->value is called. However this doesn't solve point #2, as I would need to create My::ArrayFromInt and the ::Lazy subtype for each attribute I would like to transform. And I'd like to avoid calling $foo->x->value if possible.
How about having the typedef along the lines described, then doing
has _x => (
is => 'ro',
isa => 'Int|MyArrayOfInts',
init_arg => 'x',
required => 1,
);
has x => (
is => 'ro',
lazy => 1,
isa => 'MyArrayOfInts',
coerce => 1,
default => sub { $_[0]->_x },
);
It'd make sense to wrap that up into some kind of helper method to create the pair of objects along the lines of
has_lazily_coerced x => (
is => 'ro',
isa => 'TargetType',
);
which would introspect on TargetType to get a list of legal types for the uncoerced shadow attribute and generate the pair of attributes for you.

Moose - coercing from Num to ArrayRef[Num]?

Ok, what am I doing wrong - Moose is ignoring my coercion:
package moo;
use Moose;
use Moose::Util::TypeConstraints;
subtype Bar => as 'ArrayRef[Num]';
coerce 'Bar' =>
from 'Num' => via { [ 10 ] }; # this doesn't seem to be getting called
has x => (
is => 'rw',
isa => 'Bar',
);
package main;
my $m1 = moo->new(x => [ 3 ]); # works
my $m2 = moo->new(x => 5); # doesn't work
Maybe you forgot coerce => 1 while defining x attribute.
has x => ( is => 'rw', isa => 'Bar', coerce => 1 );
`

How do I sort by value from a second level hash, in Perl?

my $hash_ref = {
one => { val => 1, name => 'one' },
three => { val => 3, name => 'three'},
two => { val => 2, name => 'two' },
};
I would like to sort $hash_ref such that a foreach would order them by
$hash_ref->{$key}->{'val'}
one
two
three
Any suggestions?
#sorted_list is an array of references to the sorted hash elements:
#sorted_list = sort { $a->{'val'} <=> $b->{'val'} } values %{$unsorted_hash_ref};
You can use it like so:
#!/usr/bin/perl
my $hash_ref = {
one => { val => 1, name => 'one' },
three => { val => 3, name => 'three' },
two => { val => 2, name => 'two' },
};
foreach $elem ( sort { $a->{'val'} <=> $b->{'val'} } values %{$hash_ref} ) {
print "$elem->{'val'} : $elem->{'name'}\n";
}
Output:
1 : one
2 : two
3 : three
Hash tables don't have any specific order. However, you can sort the keys in an array and use that to iterate through the hash:
my $hash_ref = {
one => { val => 1, name => 'one'},
three => { val => 3, name => 'three'},
two => { val => 2, name => 'two'},
};
use strict;
use warnings;
use Lingua::EN::Words2Nums;
foreach my $key (sort { words2nums($a) <=> words2nums($b) } keys %$hash_ref)
{
# do something with $hash_ref->{$key}
print "processing key $key.\n";
}
You can define anything you like as a sort method; see perldoc -f sort for more details. Conversion from ordinal numerical text to arithmetic values is done with Lingua::EN::Words2Nums (it does cardinal numbers too).
use strict;
use warnings;
my %hash_ref = (
one => { val => 1, name => 'one' },
three => { val => 3, name => 'three'},
two => { val => 2, name => 'two' },
);
foreach my $key(sort {$hash_ref{$a}{val} <=> $hash_ref{$b}{val}} keys %hash_ref) {
my $value = $hash_ref{$key}{val};
my $name = $hash_ref{$key}{name};
print "$name -> $value\n";
}
output:
one -> 1
two -> 2
three -> 3
#!/usr/bin/perl
my $hash_ref = (
one => {val => 1, name => "one"},
three => {val => 3, name => "three"},
two => {val => 2, name => 'two'},
);
foreach $elem( sort {$$hash_ref{$a}{val} <=> $$hash_ref{$b}{val}} keys %$hash_ref){
my $value = $hash_ref->{$elem}{val};
my $name = $hash_ref->{$elem}{name};
print "$name -> $value\n";
}
OutPut:
one -> 1
two -> 2
three -> 3