Create directory tree in Perl that would comply with Fancytree expected JSON format - perl

How to create directory tree in Perl to comply with Fancytree expected JSON format?
This is the Perl part I came up with, that traverses through given path:
sub get_tree
{
my ($gpath) = #_;
my %r;
use File::Find;
my $c = sub {
my $dir = $File::Find::dir;
my $r = \%r;
my $tdir = $dir;
$tdir =~ s|^\Q$gpath\E/?||;
$r = $r->{$_} ||= {} for split m|/|, $tdir;
};
find($c, $gpath);
return \%r;
}
It returns the following result after JSON encode:
{
"dir3":{
},
"dir1":{
"sub-dir2":{
},
"sub-dir1":{
}
},
"dir2":{
"sub-dir1":{
"sub-sub-dir1":{
"sub-sub-sub-dir1":{
}
}
}
}
}
The expected result for Fancytree to comply with its JSON format is:
[
{"parent": "dir3"},
{"parent": "dir2", "child": [
{"parent": "sub-dir1", "child": [
{"parent": "sub-sub-dir1", "child": [
{"parent": "sub-sub-sub-dir1"}
]}
]}
]},
{"parent": "dir1", "child": [
{"parent": "sub-dir1"},
{"parent": "sub-dir1"}
]}
]
The point is to do it in a single run, without post processing, which would be ideal.
Any help of how to achieve that?

You can try,
use strict;
use warnings;
use Data::Dumper;
sub get_tree {
my ($gpath) = #_;
my %r;
my #root;
use File::Find;
my $cb = sub {
my $tdir = $File::Find::dir;
$tdir =~ s|^\Q$gpath\E/?||;
return if $r{$tdir} or !$tdir;
my ($pdir, $cdir) = $tdir =~ m|^ (.+) / ([^/]+) \z|x;
my $c = $r{$tdir} = { parent => $cdir // $tdir };
if (defined $pdir) { push #{ $r{$pdir}{child} }, $c }
else { push #root, $c }
};
find($cb, $gpath);
return \#root;
}
It uses hash for fast lookup of nodes, and complete directory structure is built atop of #root.

Using recursion instead of File::Find, using Path::Tiny to handle paths:
#!/usr/bin/perl
use warnings;
use strict;
use Path::Tiny;
sub get_tree {
my ($struct, $root, #path) = #_;
for my $child (path($root, #path)->children) {
if (-d $child) {
my $base = $child->basename;
push #$struct, { parent => $base };
my $recurse = get_tree($struct->[-1]{child} = [],
$root, #path, $base);
delete $struct->[-1]{child} unless #$recurse;
}
}
return $struct
}
use Test::More tests => 1;
use Test::Deep;
my $expected = bag({parent => 'dir1',
child => bag(
{parent => 'sub-dir1'},
{parent => 'sub-dir2'})},
{parent => 'dir2',
child => bag(
{parent => 'sub-dir1',
child => bag({
parent => 'sub-sub-dir1',
child => bag({
parent => 'sub-sub-sub-dir1'
})})})},
{parent => 'dir3'});
my $tree = get_tree([], 'paths');
cmp_deeply $tree, $expected, 'same';

I guess the following would produce the structure you wanted.
test.pl
use strict;
use warnings;
use JSON;
sub get_json
{
return JSON->new->latin1->pretty->encode(#_);
}
sub get_tree
{
my ($gpath) = #_;
my (%r,#rr);
use File::Find;
my $c = sub {
my $dir = $File::Find::name;
my $r = \%r;
my $rr = \#rr;
my $tdir = $dir;
$tdir =~ s|^\Q$gpath\E/?||;
my $previtem;
for my $item(split m|/|, $tdir) {
if ($previtem) {
$rr=$r->{$previtem}[1]{child}//=[];
$r= $r->{$previtem}[0]{child}//={};
}
$r->{$item} //= [ { }, $rr->[#$rr]= { parent=>$item } ];
$previtem = $item;
}
};
find($c, $gpath);
return \%r,\#rr;
}
my ($r,$rr) = get_tree($ARGV[0]);
print get_json($rr);
output
[
{
"parent" : "test.pl"
},
{
"parent" : "dir1",
"child" : [
{
"parent" : "sub-dir1"
},
{
"parent" : "sub-dir2"
}
]
},
{
"parent" : "dir2",
"child" : [
{
"parent" : "sub-dir1",
"child" : [
{
"parent" : "sub-sub-dir1"
}
]
}
]
},
{
"parent" : "dir3"
}
]
I've run it: perl test.pl .. So you see 'test.pl' in the output
In case you want to traverse only directories, change the find call to:
find({wanted=>$c, preprocess=> sub { grep { -d $_ } #_; } }, $gpath);

Summarizing, here is the final code, that will produce valid JSON object expected by Fancytree out of the box. Thanks to everyone, who was generous to spend time and provide help.
Perl:
#!/usr/bin/perl
use warnings;
use strict;
=head2 get_tree(path, [depth])
Build sorted directory tree in format expected by Fancytree
=item path - The path from which to start searching.
=item depth - The optional parameter to limit the depth.
=cut
use File::Find;
use JSON;
sub get_tree {
my ( $p, $d ) = #_;
my $df = int($d);
my %r;
my #r;
my $wanted = sub {
my $td = $File::Find::name;
if ( -d $td ) {
$td =~ s|^\Q$p\E/?||;
if ( $r{$td} || !$td ) {
return;
}
my ( $pd, $cd ) = $td =~ m|^ (.+) / ([^/]+) \z|x;
my $pp = $p ne '/' ? $p : undef;
my $c = $r{$td} = {
key => "$pp/$td",
title => ( defined($cd) ? $cd : $td )
};
defined $pd ? ( push #{ $r{$pd}{children} }, $c ) : ( push #r, $c );
}
};
my $preprocess = sub {
my $dd = ( $df > 0 ? ( $df + 1 ) : 0 );
if ($dd) {
my $d = $File::Find::dir =~ tr[/][];
if ( $d < $dd ) {
return sort #_;
}
return;
}
sort #_;
};
find(
{
wanted => $wanted,
preprocess => $preprocess
},
$p
);
return \#r;
}
# Retrieve JSON tree of `/home` with depth of `5`
JSON->new->encode(get_tree('/home', 5));
JavaScript:
$('.container').fancytree({
source: $.ajax({
url: tree.cgi,
dataType: "json"
})
});
I'm using it in Authentic Theme for Webmin/Usermin for File Manager.
Try it on the best server management panel of the 21st Century ♥️

Related

Virtual Filesystem in Perl with Fuse

Anybody help me make a virtual file system in Perl.
Very simple, 2 depth level, as
/subdir
subdir-l2
file2.txt
/file1.txt
I try use Fuse.pm, but not understand how create subdir level. I create %files hash, and if go to subdir, recreate it with new records. It's for test only.
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use Fuse;
use POSIX qw(ENOENT EISDIR EINVAL);
my (%files) = (
'.' => {
type => 0040,
mode => 0755,
ctime => 1490603721
},
subdir => {
type => 0040,
mode => 0755,
ctime => 1490603721
},
"file1.txt" => {
type => 0100,
mode => 0755,
ctime => 1490603721
}
);
sub filename_fixup {
my ($file) = shift;
$file =~ s,^/,,;
$file = '.' unless length($file);
return $file;
}
sub getdir {
my $tmp = shift;
if ($tmp eq '/') {
return (keys %files),0;
} else {
(%files) = (
'.' => {
type => 0040,
mode => 0755,
ctime => 1490603721
},
# /subdir/subdir-l2
"subdir-l2" => {
type => 0040,
mode => 0755,
ctime => 1490603721
} ,
# /subdir/a-l2.file
"file2.txt" => {
cont => "File 'al2'.\n",
type => 0100,
mode => 0755,
ctime => 1490603721
}
);
return (keys %files),0;
}
}
sub getattr {
my ($file) = filename_fixup(shift);
$file =~ s,^/,,;
$file = '.' unless length($file);
return -ENOENT() unless exists($files{$file});
my ($size) = exists($files{$file}{cont}) ? length($files{$file}{cont}) : 0;
$size = $files{$file}{size} if exists $files{$file}{size};
my ($modes) = ($files{$file}{type}<<9) + $files{$file}{mode};
my ($dev, $ino, $rdev, $blocks, $gid, $uid, $nlink, $blksize) = (0,0,0,1,0,0,1,1024);
my ($atime, $ctime, $mtime);
$atime = $ctime = $mtime = $files{$file}{ctime};
return ($dev,$ino,$modes,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks);
}
Fuse::main(
mountpoint => "/tmp/123",
getdir => \&getdir,
getattr => \&getattr,
);
one level mount fine, but if go to deeper i get
?????????? ? ? ? ? ? file2.txt
?????????? ? ? ? ? ? subdir-l2
I'm really not a regular user of the Fuse module, neither of FUSE system. Tinkered with this issue out of pure curiosity. Thus, although I can't explain in very much details how to use the plain Fuse module to achieve your goal, I have a working code that does create the wanted filesystem (at least on my system, and seems that it is capable of creating any arbitrary filesystem tree), and I can explain how I got this code working.
So first of all I discovered the Fuse::Simple module on CPAN.
Its SYNOPSIS shows that it provides a really simple API to the Fuse module for creating arbitrary filesystems from a hash structure. Its source code isn't that huge, so I just created 'listing.pl' script file and copied there most of the functions (except fserr that caused a Modification of a read-only value exception), put the main sub contents out, so they will be the main script's flow, hardcoded the filesystem structure ($fs var), and made some little adjustments here and there (like declare vars with my to prevent exceptions), and finally got the filesystem mounted, with all directories listed and files readable. So this is the code I got at last:
#!/usr/bin/env perl
use strict;
use warnings;
use diagnostics;
use Carp;
use Fuse;
use Errno qw(:POSIX); # ENOENT EISDIR etc
use Fcntl qw(:DEFAULT :mode); # S_IFREG S_IFDIR, O_SYNC O_LARGEFILE etc.
use Switch;
my $debug = 0;
my %codecache = ();
my $ctime = time();
my $uid = $>;
my $gid = $) + 0;
my $fs = {
"file1.txt" => "File 1 contents",
"subdir" => {
"subdir-l2" => {},
"file2.txt" => "File 2 contents"
}
};
# some default args
my %args = (
"mountpoint" => "listing",
"debug" => $debug,
"fuse_debug" => 0,
"threaded" => 0,
"/" => $fs
);
# the default subs
my %fs_subs = (
"chmod" => \&fs_not_imp,
"chown" => \&fs_not_imp,
"flush" => \&fs_flush,
"fsync" => \&fs_not_imp,
"getattr" => \&fs_getattr,
"getdir" => \&fs_getdir,
"getxattr" => \&fs_not_imp,
"link" => \&fs_not_imp,
"listxattr" => \&fs_not_imp,
"mkdir" => \&fs_not_imp,
"mknod" => \&fs_not_imp,
"open" => \&fs_open,
"read" => \&fs_read,
"readlink" => \&fs_readlink,
"release" => \&fs_release,
"removexattr" => \&fs_not_imp,
"rmdir" => \&fs_not_imp,
"rename" => \&fs_not_imp,
"setxattr" => \&fs_not_imp,
"statfs" => \&fs_statfs,
"symlink" => \&fs_not_imp,
"truncate" => \&fs_truncate,
"unlink" => \&fs_not_imp,
"utime" => sub{return 0},
"write" => \&fs_write,
);
# except extract these ones back out.
$debug = delete $args{"debug"};
$args{"debug"} = delete( $args{"fuse_debug"} ) || 0;
delete $args{"/"};
# add the functions, if not already defined.
# wrap in debugger if debug is set.
for my $name (keys %fs_subs) {
my $sub = $fs_subs{$name};
# $sub = wrap($sub, $name) if $debug;
$args{$name} ||= $sub;
}
Fuse::main(%args);
sub fetch {
my ($path, #args) = #_;
my $obj = $fs;
for my $elem (split '/', $path) {
next if $elem eq ""; # skip empty // and before first /
$obj = runcode($obj); # if there's anything to run
# the dir we're changing into must be a hash (dir)
return ENOTDIR() unless ref($obj) eq "HASH";
# note that ENOENT and undef are NOT the same thing!
return ENOENT() unless exists $obj->{$elem};
$obj = $obj->{$elem};
}
return runcode($obj, #args);
}
sub runcode {
my ($obj, #args) = #_;
while (ref($obj) eq "CODE") {
my $old = $obj;
if (#args) { # run with these args. don't cache
delete $codecache{$old};
print "running $obj(",quoted(#args),") NO CACHE\n" if $debug;
$obj = saferun($obj, #args);
} elsif (exists $codecache{$obj}) { # found in cache
print "got cached $obj\n" if $debug;
$obj = $codecache{$obj}; # could be undef, or an error, BTW
} else {
print "running $obj() to cache\n" if $debug;
$obj = $codecache{$old} = saferun($obj);
}
if (ref($obj) eq "NOCACHE") {
print "returned a nocache() value - flushing\n" if $debug;
delete $codecache{$old};
$obj = $$obj;
}
print "returning ",ref($obj)," ",
defined($obj) ? $obj : "undef",
"\n" if $debug;
}
return $obj;
}
sub saferun {
my ($sub, #args) = #_;
my $ret = eval { &$sub(#args) };
my $died = $#;
if (ref($died)) {
print "+++ Error $$died\n" if ref($died) eq "ERROR";
return $died;
} elsif ($died) {
print "+++ $died\n";
# stale file handle? moreorless?
return ESTALE();
}
return $ret;
}
sub nocache {
return bless(\ shift, "NOCACHE"); # yup, utter abuse of bless :-)
}
sub dump_open_flags {
my $flags = shift;
printf " flags: 0%o = (", $flags;
for my $bits (
[ O_ACCMODE(), O_RDONLY(), "O_RDONLY" ],
[ O_ACCMODE(), O_WRONLY(), "O_WRONLY" ],
[ O_ACCMODE(), O_RDWR(), "O_RDWR" ],
[ O_APPEND(), O_APPEND(), "|O_APPEND" ],
[ O_NONBLOCK(), O_NONBLOCK(), "|O_NONBLOCK" ],
[ O_SYNC(), O_SYNC(), "|O_SYNC" ],
[ O_DIRECT(), O_DIRECT(), "|O_DIRECT" ],
[ O_LARGEFILE(), O_LARGEFILE(), "|O_LARGEFILE" ],
[ O_NOFOLLOW(), O_NOFOLLOW(), "|O_NOFOLLOW" ],
) {
my ($mask, $flag, $name) = #$bits;
if (($flags & $mask) == $flag) {
$flags -= $flag;
print $name;
}
}
printf "| 0%o !!!", $flags if $flags;
print ")\n";
}
sub accessor {
my $var_ref = shift;
croak "accessor() requires a reference to a scalar var\n"
unless defined($var_ref) && ref($var_ref) eq "SCALAR";
return sub {
my $new = shift;
$$var_ref = $new if defined($new);
return $$var_ref;
}
}
sub fs_not_imp { return -ENOSYS() }
sub fs_flush {
# we're passed a path, but finding my coderef stuff from a path
# is a bit of a 'mare. flush the lot, won't hurt TOO much.
print "Flushing\n" if $debug;
%codecache = ();
return 0;
}
sub easy_getattr {
my ($mode, $size) = #_;
return (
0, 0, # $dev, $ino,
$mode,
1, # $nlink, see fuse.sourceforge.net/wiki/index.php/FAQ
$uid, $gid, # $uid, $gid,
0, # $rdev,
$size, # $size,
$ctime, $ctime, $ctime, # actually $atime, $mtime, $ctime,
1024, 1, # $blksize, $blocks,
);
}
sub fs_getattr {
my $path = shift;
my $obj = fetch($path);
# undef doesn't actually mean "file not found", it could be a coderef
# file-sub which has returned undef.
return easy_getattr(S_IFREG | 0200, 0) unless defined($obj);
switch (ref($obj)) {
case "ERROR" { # this is an error to be returned.
return -$$obj;
}
case "" { # this isn't a ref, it's a real string "file"
return easy_getattr(S_IFREG | 0644, length($obj));
}
# case "CODE" should never happen - already been run by fetch()
case "HASH" { # this is a directory hash
return easy_getattr(S_IFDIR | 0755, 1);
}
case "SCALAR" { # this is a scalar ref. we use these for symlinks.
return easy_getattr(S_IFLNK | 0777, 1);
}
else { # what the hell is this file?!?
print "+++ What on earth is ",ref($obj)," $path ?\n";
return easy_getattr(S_IFREG | 0000, 0);
}
}
}
sub fs_getdir {
my $obj = fetch(shift);
return -$$obj if ref($obj) eq "ERROR"; # THINK this is a good idea.
return -ENOENT() unless ref($obj) eq "HASH";
return (".", "..", sort(keys %$obj), 0);
}
sub fs_open {
# doesn't really need to open, just needs to check.
my $obj = fetch(shift);
my $flags = shift;
dump_open_flags($flags) if $debug;
# if it's undefined, and we're not writing to it, return an error
return -EBADF() unless defined($obj) or ($flags & O_ACCMODE());
switch (ref($obj)) {
case "ERROR" { return -$$obj; }
case "" { return 0 } # this is a real string "file"
case "HASH" { return -EISDIR(); } # this is a directory hash
else { return -ENOSYS(); } # what the hell is this file?!?
}
}
sub fs_read {
my $obj = fetch(shift);
my $size = shift;
my $off = shift;
return -ENOENT() unless defined($obj);
return -$$obj if ref($obj) eq "ERROR";
# any other types of refs are probably bad
return -ENOENT() if ref($obj);
if ($off > length($obj)) {
return -EINVAL();
} elsif ($off == length($obj)) {
return 0; # EOF
}
return substr($obj, $off, $size);
}
sub fs_readlink {
my $obj = fetch(shift);
return -$$obj if ref($obj) eq "ERROR";
return -EINVAL() unless ref($obj) eq "SCALAR";
return $$obj;
}
sub fs_release {
my ($path, $flags) = #_;
dump_open_flags($flags) if $debug;
return 0;
}
sub fs_statfs {
return (
255, # $namelen,
1,1, # $files, $files_free,
1,1, # $blocks, $blocks_avail, # 0,0 seems to hide it from df?
2, # $blocksize,
);
}
sub fs_truncate {
my $obj = fetch(shift, ""); # run anything to set it to ""
return -$$obj if ref($obj) eq "ERROR";
return 0;
}
sub fs_write {
my ($path, $buf, $off) = #_;
my $obj = fetch($path, $buf, $off); # this runs the coderefs!
return -$$obj if ref($obj) eq "ERROR";
return length($buf);
}
Final word: I didn't try to use the module itself (it's not listed in my distro package repository, and I was too lazy (sorry) to install it by cpanm or other way). But I think that if I'll have to just use FUSE with Perl, I'll probably just use Fuse::Simple instead of Fuse, maybe forking it. I'd use plain Fuse only for my academic research, I think...
Hope this helps.

Converting hash to CSV with Text::CSV::Slurp

I have a Perl hash that I need to write to JSON and to CSV formats.
A print Dumper( \%formdata ) of my hash looks like this.
$VAR1 = {
'SPRequest' => {
'xrelease' => '13038',
'macaddr' => '47:00:11:22:00:30',
'name' => 'localhost',
'description' => 'demo'
},
'.submit' => bless( do{\(my $o = 1)}, 'JSON::PP::Boolean' ),
'class' => 'SPRequest',
'22406' => {
'win.profile' => 'production',
'win.os_version' => 'standard',
'win.os_part_size' => '1'
}
};
Here is the snipplet of code I use to produce my json file.
my $form_data_file = "/tmp/${hostname}_${macaddr}.json";
open FH, ">$form_data_file" or die "Could not open $form_data_file. :$!\n";
print FH to_json( \%formdata, {pretty=>1} );
close FH;
I am able to output my JSON to a file which looks like this:
[red#tools-dev1 psong]$ cat /tmp/localhost_47-00-11-22-00-30.json
{
"SPRequest" : {
"xrelease" : "13038",
"macaddr" : "47:00:11:22:00:30",
"name" : "localhost",
"description" : "demo"
},
".submit" : true,
"class" : "SPRequest",
"22406" : {
"win.profile" : "production",
"win.os_version" : "standard",
"win.os_part_size" : "1"
}
}
Here is the code I am using to create my CSV file:
my $form_data_file_csv = "/tmp/${hostname}_${macaddr}.csv";
# Text::CSV::Slurp wants arrayref of hashref
my $ARoHR = [ \%formdata ];
my $csv = Text::CSV::Slurp->create( input => $ARoHR);
open FH, ">$form_data_file_csv" or die "Could not open $form_data_file_csv. :$!\n";
print FH $csv;
close FH;
But the problem is my CSV file which ends up looking like this:
[red#tools-dev1 psong]$ cat /tmp/localhost_47-00-11-22-00-30.csv ; echo
.submit,22406,SPRequest,class
true,HASH(0x8d81918),HASH(0x8d67980),SPRequest
What am I doing wrong here?
UPDATE: Looks like what I was doing wrong was expecting Text::CSV::Slurp to work with a Hash-of-Hash. So I rolled my own:
my #cols;
my #row;
sub hash2cvs {
my $h = shift;
my $p = shift || 'top';
foreach my $k ( keys %{$h} ) {
if (ref $h->{$k} eq ref {}) {
hash2cvs( $h->{$k}, $k );
} else {
if ( $p eq 'top' ) {
push #cols, $k;
} else {
push #cols, "$p.$k";
}
push #row, $h->{$k};
}
}
}
Looks like what I was doing wrong was, expecting Text::CSV::Slurp to work with a Hash-of-Hash. So I rolled my own:
my #cols;
my #row;
sub hash2cvs {
my $h = shift;
my $p = shift || 'top';
foreach my $k ( keys %{$h} ) {
if (ref $h->{$k} eq ref {}) {
hash2cvs( $h->{$k}, ($p eq 'top') ? $k : "$p.$k" );
} else {
if ( $p eq 'top' ) {
push #cols, $k;
} else {
push #cols, "$p.$k";
}
push #row, $h->{$k};
}
}
}

How to get Data::Diver to produce arrays?

The below script will output
$VAR1 = {
'tank' => {
'fs' => {
'fs2b' => undef,
'fs2a' => undef,
'fs2c' => undef
}
}
};
where I really wanted a hash of hash of array like this
$VAR1 = {
'tank' => {
'fs' => [
'fs2a',
'fs2b',
'fs2c'
]
}
};
Question
How would that be done with Data::Diver?
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Data::Diver 'DiveRef';
my #array = ("tank", "tank/fs", "tank/fs/fs2a", "tank/fs/fs2b", "tank/fs/fs2c");
my %hash = ();
foreach my $element (#array) {
DiveRef( \%hash, \( split /\//, $element ) );
}
print Dumper \%hash;
(Code provided by ysth in this answer to another question.)
Update
The array in the code is just an example. The real array have ~100 elements, so the solution can't be hard coded.
DiveVal(\%data, 'tank', 'fs', 0) = 'fs2a';
DiveVal(\%data, 'tank', 'fs', 1) = 'fs2b';
DiveVal(\%data, 'tank', 'fs', 2) = 'fs2c';
or
push #{ DiveVal(\%data, 'tank', 'fs') }, 'fs2a';
push #{ DiveVal(\%data, 'tank', 'fs') }, 'fs2b';
push #{ DiveVal(\%data, 'tank', 'fs') }, 'fs2c';
To get the desired data structure from "tank", "tank/fs", "tank/fs/fs2a", "tank/fs/fs2b", "tank/fs/fs2c", extra information is needed. For you example, you could have the understanding that the data structure is always going to be a HoHoA.
my #data = ("tank", "tank/fs", "tank/fs/fs2a", "tank/fs/fs2b", "tank/fs/fs2c");
my %data;
for (#data) {
my #parts = split qr{/};
if (#parts < 3) {
DiveVal(\%data, map \$_, #parts);
} else {
my $val = pop(#parts);
push #{ DiveVal(\%data, map \$_, #parts) }, $val;
}
}
But which such a limited structure, there's no reason to use Data::Diver. It would be far faster to avoid it.
my #data = ("tank", "tank/fs", "tank/fs/fs2a", "tank/fs/fs2b", "tank/fs/fs2c");
my %data;
for (#data) {
my #parts = split qr{/};
if (#parts == 1) { \( $data{$parts[0]} ); }
elsif (#parts == 2) { \( $data{$parts[0]}{$parts[1]} ); }
else { push #{ $data{$parts[0]}{$parts[1]} }, $parts[2]; }
}
You might even be able to use
my #data = ("tank", "tank/fs", "tank/fs/fs2a", "tank/fs/fs2b", "tank/fs/fs2c");
my %data;
for (#data) {
my #parts = split qr{/};
push #{ $data{$parts[0]}{$parts[1]} }, $parts[2] if #parts == 3;
}

Pass hash to subroutine inside a subroutine already passed that hash

I am working with passing hashes to various subroutines, and I was wondering how to pass a hash to a subroutine and then pass the same hash inside that subroutine to a different subroutine and so on.
For example, the following code works fine.
use strict;
use warnings;
my %hash = (
key1 => 'value1',
key2 => 'value2',
key3 => 'value3',
key4 => '',
);
print %hash, "\n";
check_it(\%hash);
sub check_it {
my $params = shift;
foreach(keys %{$params}){
if($params->{$_}) {
print "'$_' defined as '$params->{$_}'\n";
}
else {
print "'$_' not defined as '$params->{$_}'. Deleting it.\n";
#delete $params->{$_};
$params->{$_} = 'null';
}
}
for ( my $i = 0 ; $i < 7 ; $i++ ) {
print "looping\n";
&check_tags_again(\%hash);
}
}
sub check_tags_again {
my $hash_now = shift;
#check again...
foreach(keys %{$hash_now}){
print "An element of hash: ", $hash_now->{$_}, "\n";
#if(defined $hash_now->{$_}){ delete $hash_now->{$_};}
}
&check_tags_thrice(\%hash);
}
sub check_tags_thrice {
my $hash_now = shift;
#check again...
foreach(keys %{$hash_now}){
print "An element of hash: ", $hash_now->{$_}, "\n";
#if(defined $hash_now->{$_}){ delete $hash_now->{$_};}
}
}
print "final hash:", %hash, "\n";
BUT, when I run the code that follows:
use strict;
use warnings;
use Data::Dumper;
sub process_data {
my $group_size = 10;
my %HoA = (
flintstones => [ "fred", "barney" ],
jetsons => [ "george", "jane", "elroy" ],
simpsons => [ "homer", "marge", "bart" ],
);
&delete_stuff( \%HoA, $group_size );
print "New group:\n";
print Dumper( \%HoA );
undef %HoA;
}
sub delete_stuff {
my $HoARef = shift;
my $group_size = shift;
print "group size in sub $group_size\n";
for ( my $j = 0 ; $j < $group_size ; $j++ ) {
my $dlted = &delete_other_stuff( \%HoA, $j );
print "deleted? '$dlted'\n";
if ( $dlted == 0 ) {
&presence_check( \%HoA, $j );
}
for ( my $i = 0 ; $i < $group_size ; $i++ ) {
}
}
}
sub delete_other_stuff {
my $HoAref = shift;
my $Dex = shift;
return $deleted;
}
sub presence_check {
my $HoAreF = shift;
my $DeX = shift;
}
I get:
Global symbol "%HoA" requires explicit package name at x.pl line 32.
Global symbol "%HoA" requires explicit package name at x.pl line 35.
Execution of x.pl aborted due to compilation errors.
I'm confused because I think it's doing the same thing as the first, but now it claims that %HoA was never initialized.
In delete_stuff, you don't have %HoA, you have $HoARef. If all the subs are expecting a reference to a hash, then you can just use it:
for ( my $j = 0 ; $j < $group_size ; $j++ ) {
my $dlted = &delete_other_stuff( $HoARef, $j );
print "deleted? '$dlted'\n";
if ( $dlted == 0 ) {
&presence_check( $HoARef, $j );
}
...
}
By the way, we're closing on 20 years of Perl 5. There is no reason to call a sub with explicitly passed parameters with an &, which is a Perl 4 holdover.

Prune empty and singleton arrayrefs from complex Perl data structure

I'm trying to neaten up a large data structure in Perl which was read in from JSON. Two stereotypical elements look like this (in JSON):
[
[ [ {'payload':'test'} ], [ [ {'payload':'reply'} ], [] ] ],
[ [ {'payload':'another thread'} ]
]
I want to completely remove that empty arrayref at the bottom of that element, and replace each arrayref containing only a single hashref by the contained hashref. In other words, the result should be this:
[
[ {'payload':'test'}, [ {'payload':'reply'} ] ],
[ {'payload':'another thread'} ]
]
Currently my code is as follows:
use v5.12;
use strict;
use warnings;
use JSON::XS;
use Data::Walk;
sub cleanup {
if (ref $_ eq 'ARRAY') {
if (scalar(#{$_}) == 0) {
die 'mysteriously I never reach this branch!';
while (my ($key,$value) = each #{$Data::Walk::container}) {
if ($value == $_) {
delete ${$Data::Walk::container}[$key]
}
}
} elsif (scalar(#{$_}) == 1 and ref #{$_}[0]) {
$_ = #{$_}[0];
} else {
my $tail = ${$_}[scalar(#{$_})-1];
if (ref $tail eq 'ARRAY' and scalar(#{$tail}) == 0) {
$#{$_}--;
}
}
}
}
sub get {
my $begin = shift;
$begin = 0 unless $begin;
my $end = shift();
$end = $begin + 25 unless $end;
my $threads;
{
local $/;
open(my $f, '<emails.json');
$threads = decode_json <$f>;
close($f);
}
$threads = [ #{$threads}[$begin .. $end] ];
walkdepth(\&eliminate_singleton, $threads);
return $threads;
}
print JSON::XS->new->ascii->pretty->encode(&get('subject:joke'));
and though it succeeds in removing the empty arrayref, it fails to collapse the singletons. How can this code be corrected such that it can collapse the singletons?
I see that you want to remove empty arrays that are elements of arrays, but I don't understand replace each singleton arrayref by a reference to its element. Do you perhaps mean to replace each hash value that is a single-element array with its contents?
So
[
"data1",
[],
"data3",
]
is converted to
[
"data1",
"data3",
]
and
{
"key1" : ["val1", "val2"],
"key2" : ["val3"],
"key3" : ["val4", "val5"],
}
is converted to
{
"key1" : ["val1", "val2"],
"key2" : "val3",
"key3" : ["val4", "val5"],
}
In your program the latter corresponds to "tags" : ["inbox"] becoming "tags" : "inbox".
If that is the case then this version of eliminate_singleton does what you want.
It takes a view from the container node and check whether anything inside needs to be modified. Doing it from the point of view of the node itself can result in nodes being modified while they are being scanned over which will break the program. As it is, looping from the end of an array backwards is safe as it doesn't remove any unvisited nodes.
use Scalar::Util 'reftype';
sub eliminate_singleton {
my $node = $_;
my $type = reftype $node // '';
if ($type eq 'ARRAY') {
for (my $i = $#$node; $i >= 0; $i--) {
my $subnode = $node->[$i];
my $subtype = reftype($subnode) // '';
delete $node->[$i] if $subtype eq 'ARRAY' and #$subnode == 0;
}
}
elsif ($type eq 'HASH') {
for my $k (keys %$node) {
my $subnode = $node->{$k};
my $subtype = reftype($subnode) // '';
if ($subtype eq 'ARRAY' and #$subnode == 1) {
$node->{$k} = $node->{$k}[0];
};
}
}
}