perl printing hash of arrays with out Data::Dumper - perl

Here is the code, I know it is not perfect perl. If you have insight on how I an do better let me know. My main question is how would I print out the arrays without using Data::Dumper?
#!/usr/bin/perl
use Data::Dumper qw(Dumper);
use strict;
use warnings;
open(MYFILE, "<", "move_headers.txt") or die "ERROR: $!";
#First split the list of files and the headers apart
my #files;
my #headers;
my #file_list = <MYFILE>;
foreach my $source_parts (#file_list) {
chomp($source_parts);
my #parts = split(/:/, $source_parts);
unshift(#files, $parts[0]);
unshift(#headers, $parts[1]);
}
# Next get a list of unique headers
my #unique_files;
foreach my $item (#files) {
my $found = 0;
foreach my $i (#unique_files) {
if ($i eq $item) {
$found = 1;
last;
}
}
if (!$found) {
unshift #unique_files, $item;
}
}
#unique_files = sort(#unique_files);
# Now collect the headers is a list per file
my %hash_table;
for (my $i = 0; $i < #files; $i++) {
unshift #{ $hash_table{"$files[$i]"} }, "$headers[$i]";
}
# Process the list with regex
while ((my $key, my $value) = each %hash_table) {
if (ref($value) eq "ARRAY") {
print "$value", "\n";
}
}

The Perl documentation has a tutorial on "Printing of a HASH OF ARRAYS" (without using Data::Dumper)
perldoc perldsc

You're doing a couple things the hard way. First, a hash will already uniqify its keys, so you don't need the loop that does that. It appears that you're building a hash of files, with the values meant to be the headers found in those files. The input data is "filename:header", one per line. (You could use a hash of hashes, since the headers may need uniquifying, but let's let that go for now.)
use strict;
use warnings;
open my $files_and_headers, "<", "move_headers.txt" or die "Can't open move_headers: $!\n";
my %headers_for_file;
while (defined(my $line = <$files_and_headers> )) {
chomp $line;
my($file, $header) = split /:/, $line, 2;
push #{ $headers_for_file{$file} }, $header;
}
# Print the arrays for each file:
foreach my $file (keys %headers_for_file) {
print "$file: #{ $headers_for_file{$file}}\n";
}
We're letting Perl do a chunk of the work here:
If we add keys to a hash, they're always unique.
If we interpolate an array into a print statement, Perl adds spaces between them.
If we push onto an empty hash element, Perl automatically puts an empty anonymous array in the element and then pushes onto that.

An alternative to using Data::Dumper is to use Data::Printer:
use Data::Printer;
p $value;
You can also use this to customise the format of the output. E.g. you can have it all in a single line without the indexes (see the documentation for more options):
use Data::Printer {
index => 0,
multiline => 0,
};
p $value;
Also, as a suggestion for getting unique files, put the elements into a a hash:
my %unique;
#unique{ #files } = #files;
my #unique_files = sort keys %unique;
Actually, you could even skip that step and put everything into %hash_table in one pass:
my %hash_table;
foreach my $source_parts (#file_list) {
chomp($source_parts);
my #parts = split(/:/, $source_parts);
unshift #{ $hash_table{$parts[0]} }, $parts[1];
}

Related

How to check whether one file's value contains in another text file? (perl script)

I would like to check one of the file's values contains on another file. if one of the value contains it will show there is existing bin for that specific, if no, it will show there is no existing bin limit. the problem is I am not sure how to check all values at once.
first DID1 text file value contain :
L84A:D:O:M:
L84C:B:E:D:
second DID text file value contain :
L84A:B:E:Q:X:F:i:M:Y:
L84C:B:E:Q:X:F:i:M:Y:
L83A:B:E:Q:X:F:i:M:Y:
if first 4words value are match, need to check all value for that line.
for example L84A in first text file & second text file value has M . it should print out there is an existing M bin
below is my code :
use strict;
use warnings;
my $filename = 'DID.txt';
my $filename1 = 'DID1.txt';
my $count = 0;
open( FILE2, "<$filename1" )
or die("Could not open log file. $!\n");
while (<FILE2>) {
my ($number) = $_;
chomp($number);
my #values1 = split( ':', $number );
open( FILE, "<$filename" )
or die("Could not open log file. $!\n");
while (<FILE>) {
my ($line) = $_;
chomp($line);
my #values = split( ':', $line );
foreach my $val (#values) {
if ( $val =~ /$values1[0]/ ) {
$count++;
if ( $values[$count] =~ /$values1[$count]/ ) {
print
"Yes ,There is an existing bin & DID\n #values1\n";
}
else {
print "No, There is an existing bin & DID\n";
}
}
}
}
}
I cannot check all value. please help to give any advice on it since this is my first time learning for perl language. Thanks a lot :)
Based on my understanding I write this code:
use strict;
use warnings;
#use ReadWrite;
use Array::Utils qw(:all);
use vars qw($my1file $myfile1cnt $my2file $myfile2cnt #output);
$my1file = "did1.txt"; $my2file = "did2.txt";
We are going to read both first and second files (DID1 and DID2).
readFileinString($my1file, \$myfile1cnt); readFileinString($my2file, \$myfile2cnt);
In first file, as per the OP's request the first four characters should be matched with second file and then if they matched we need to check rest of the characters in the first file with the second one.
while($myfile1cnt=~m/^((\w){4})\:([^\n]+)$/mig)
{
print "<LineStart>";
my $lineChk = $1; my $full_Line = $3; #print ": $full_Line\n";
my #First_values = split /\:/, $full_Line; #print join "\n", #First_values;
If the first four digit matched then,
if($myfile2cnt=~m/^$lineChk\:([^\n]+)$/m)
{
Storing the rest of the content in the same and to be split with colon and getting the characters to be matched with first file contents.
my $FullLine = $1; my #second_values = split /:/, $FullLine;
Then search each letter first and second content which matched line...
foreach my $sngletter(#First_values)
{
If the letters are matched with first and second file its going to be printed.
if( grep {$_ eq "$sngletter"} #second_values)
{
print "Matched: $sngletter\t";
}
}
}
else { print "Not Matched..."; }
This is just information that the line end.
print "<LineEnd>\n"
}
#------------------>Reading a file
sub readFileinString
#------------------>
{
my $File = shift;
my $string = shift;
use File::Basename;
my $filenames = basename($File);
open(FILE1, "<$File") or die "\nFailed Reading File: [$File]\n\tReason: $!";
read(FILE1, $$string, -s $File, 0);
close(FILE1);
}
Read search pattern and data into hash (first field is a key), then go through data and select only field included into pattern for this key.
use strict;
use warnings;
use feature 'say';
my $input1 = 'DID1.txt'; # look for key,pattern(array)
my $input2 = 'DID.txt'; # data - key,elements(array)
my $pattern;
my $data;
my %result;
$pattern = file2hash($input1); # read pattern into hash
$data = file2hash($input2); # read data into hash
while( my($k,$v) = each %{$data} ) { # walk through data
next unless defined $pattern->{$k}; # skip those which is not in pattern hash
my $find = join '|', #{ $pattern->{$k} }; # form search pattern for grep
my #found = grep {/$find/} #{ $v }; # extract only those of interest
$result{$k} = \#found; # store in result hash
}
while( my($k,$v) = each %result ) { # walk through result hash
say "$k has " . join ':', #{ $v }; # output final result
}
sub file2hash {
my $filename = shift;
my %hash;
my $fh;
open $fh, '<', $filename
or die "Couldn't open $filename";
while(<$fh>) {
chomp;
next if /^\s*$/; # skip empty lines
my($key,#data) = split ':';
$hash{$key} = \#data;
}
close $fh;
return \%hash;
}
Output
L84C has B:E
L84A has M

Parsing string in multiline data with positive lookbehind

I am trying to parse data like:
header1
-------
var1 0
var2 5
var3 9
var6 1
header2
-------
var1 -3
var3 5
var5 0
Now I want to get e.g. var3 for header2. Whats the best way to do this?
So far I was parsing my files line-by-line via
open(FILE,"< $file");
while (my $line = <FILE>){
# do stuff
}
but I guess it's not possible to handle multiline parsing properly.
Now I am thinking to parse the file at once but wasn't successful so far...
my #Input;
open(FILE,"< $file");
while (<FILE>){ #Input = <FILE>; }
if (#Input =~ /header2/){
#...
}
The easier way to handle this is "paragraph mode".
local $/ = "";
while (<>) {
my ($header, $body) =~ /^([^\n]*)\n-+\n(.*)/s
or die("Bad data");
my #data = map [ split ], split /\n/, $body;
# ... Do something with $header and #data ...
}
The same can be achieved without messing with $/ as follows:
my #buf;
while (1) {
my $line = <>;
$line =~ s/\s+\z// if !defined($line);
if (!length($line)) {
if (#buf) {
my $header = shift(#buf);
shift(#buf);
my #data = map [ split ], splice(#buf);
# ... Do something with $header and #data ...
}
last if !defined($line);
next;
}
push #buf, $line;
}
(In fact, the second snippet includes a couple of small improvements over the first.)
Quick comments on your attempt:
The while loop is useless because #Input = <FILE> places the remaining lines of the file in #Input.
#Input =~ /header2/ matches header2 against the stringification of the array, which is the stringification of the number of elements in #Input. If you want to check of an element of #Input contains header2, will you will need to loop over the elements of #Inputs and check them individually.
while (<FILE>){ #Input = <FILE>; }
This doesn't make much sense. "While you can read a record from FILE, read all of the data on FILE into #Input". I think what you actually want is just:
my #Input = <FILE>;
if (#Input =~ /header2/){
This is quite strange too. The binding operator (=~) expects scalar operands, so it evaluates both operands in scalar context. That means #Input will be evaluated as the number of elements in #Input. That's an integer and will never match "header2".
A couple of approaches. Firstly a regex approach.
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
my $file = 'file';
open my $fh, '<', $file or die $!;
my $data = join '', <$fh>;
if ($data =~ /header2.+var3 (.+?)\n/s) {
say $1;
} else {
say 'Not found';
}
The key to this is the /s on the m// operator. Without it, the two dots in the regex won't match newlines.
The other approach is more of a line by line parser.
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
my $file = 'file';
open my $fh, '<', $file or die $!;
my $section = '';
while (<$fh>) {
chomp;
# if the line all word characters,
# then we've got a section header.
if ($_ !~ /\W/) {
$section = $_;
next;
}
my ($key, $val) = split;
if ($section eq 'header2' and $key eq 'var3') {
say $val;
last;
}
}
We read the file a line at a time and make a note of the section headers. For data lines, we split on whitespace and check to see if we're in the right section and have the right key.
In both cases, I've switched to using a more standard approach (lexical filehandles, 3-arg open(), or die $!) for opening the file.

Perl - need to store the column values into hash

I want to create a hash with column header as hash key and column values as hash values in Perl.
For example, if my csv file has the following data:
A,B,C,D,E
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15 ...
I want to create a hash as follows:
A=> 1,6,11
B=>2,7,12
c=>3,8,13 ...
So that just by using the header name I can use that column values.
Is there a way in PERL to do this? Please help me.
I was able to store required column values as array using the following script
use strict;
use warnings;
open( IN, "sample.csv" ) or die("Unable to open file");
my $wanted_column = "A";
my #cells;
my #colvalues;
my $header = <IN>;
my #column_names = split( ",", $header );
my $extract_col = 0;
for my $header_line (#column_names) {
last if $header_line =~ m/$wanted_column/;
$extract_col++;
}
while ( my $row = <IN> ) {
last unless $row =~ /\S/;
chomp $row;
#cells = split( ",", $row );
push( #colvalues, $cells[$extract_col] );
}
my $sizeofarray = scalar #colvalues;
print "Size of the coulmn= $sizeofarray";
But I want to do this to all my column.I guess Hash of arrays will be the best solution but I dont know how to implement it.
Text::CSV is a useful helper module for this sort of thing.
use strict;
use warnings;
use Text::CSV;
use Data::Dumper;
my %combined;
open( my $input, "<", "sample.csv" ) or die("Unable to open file");
my $csv = Text::CSV->new( { binary => 1 } );
my #headers = #{ $csv->getline($input) };
while ( my $row = $csv->getline($input) ) {
for my $header (#headers) {
push( #{ $combined{$header} }, shift(#$row) );
}
}
print Dumper \%combined;
Since you requested without a module - you can use split but you need to bear in mind the limitations. CSV format allows for things like commas nested in quotes. split won't handle that case very well.
use strict;
use warnings;
use Data::Dumper;
my %combined;
open( my $input, "<", "sample.csv" ) or die("Unable to open file");
my $line = <$input>;
chomp ( $line );
my #headers = split( ',', $line );
while (<$input>) {
chomp;
my #row = split(',');
for my $header (#headers) {
push( #{ $combined{$header} }, shift(#row) );
}
}
print Dumper \%combined;
Note: Both of these will effectively ignore any extra columns that don't have headers. (And will get confused by duplicate column names).
Another solution by using for loop :
use strict;
use warnings;
my %data;
my #columns;
open (my $fh, "<", "file.csv") or die "Can't open the file : ";
while (<$fh>)
{
chomp;
my #list=split(',', $_);
for (my $i=0; $i<=$#list; $i++)
{
if ($.==1) # collect the columns, if its first line.
{
$columns[$i]=$list[$i];
}
else #collect the data, if its not the first line.
{
push #{$data{$columns[$i]}}, $list[$i];
}
}
}
foreach (#columns)
{
local $"="\,";
print "$_=>#{$data{$_}}\n";
}
Output will be like this :
A=>1,6,11
B=>2,7,12
C=>3,8,13
D=>4,9,14
E=>5,10,15

how to compare the the array values with different file in different directory?

#!/usr/bin/perl
use strict;
use warnings;
use warnings;
use 5.010;
my #names = ("RD", "HD", "MP");
my $flag = 0;
my $filename = 'Sample.txt';
if (open(my $fh, '<', $filename))
{
while (my $row = <$fh>)
{
foreach my $i (0 .. $#names)
{
if( scalar $row =~ / \G (.*?) ($names[$i]) /xg )
{
$flag=1;
}
}
}
if( $flag ==1)
{
say $filename;
}
$flag=0;
}
here i read the content from one file and compare with array values, if file contant matches with array value i just display the file. in the same way how can i access different file from different direcory and compare the array values with same?
Q: How can I access a different file?
A: By specifying a different filename.
By the way: If you are using flags for loop control in Perl, you are doing something wrong. You can specify that this was the last iteration of the loop (in C: break), or that you want to start the next iteration. You can label the loops so that you can break out of as many loops as you like at once:
#!/usr/bin/perl
use 5.010; use warnings;
my #names = qw(RD HD MP);
# unpack command line arguments
my ($filename) = #ARGV;
open my $fh, "<", $filename or die "Oh noes, $filename is bad: $!";
LINE:
while (my $line = <$fh>) {
NAME:
foreach my $name (#names) {
if ($line =~ /\Q$name\E/) { # \QUOT\E the $name to escape everything
say "$filename contains $name";
last LINE;
}
}
}
Other highlights:
using a foreach loop as intended and
removing the (in this context) senseless \G assertion
You can then execute the script as perl script.pl Sample.txt or perl script.pl ../another.dir/foo.bar or whatever.
You can use the ~~ operator in Perl 5.10.
Don't forget to chomp the trailing whitespace.
#!/usr/bin/perl
use 5.010;
use strict;
use warnings;
my #names = ('RD', 'HD', 'MP');
my $other_dir = '/tmp';
my $filename = 'Sample.txt';
if ( open( my $fh, '<', "$other_dir/$filename" ) ) {
ROW:
while ( my $row = <$fh> ) {
chomp $row; # remove trailing \n
if ( $row ~~ #names ) {
say $filename;
last ROW;
}
}
}
close $fh;

How can I get to my anonymous arrays in Perl?

The following code generates a list of the average number of clients connected by subnet. Currently I have to pipe it through sort | uniq | grep -v HASH.
Trying to keep it all in Perl, this doesn't work:
foreach $subnet (keys %{keys %{keys %days}}) {
print "$subnet\n";
}
The source is this:
foreach $file (#ARGV) {
open(FH, $file) or warn("Can't open file $file\n");
if ($file =~ /(2009\d{4})/) {
$dt = $+;
}
%hash = {};
while(<FH>) {
#fields = split(/~/);
$subnet = $fields[0];
$client = $fields[2];
$hash{$subnet}{$client}++;
}
close(FH);
$file = "$dt.csv";
open(FH, ">$file") or die("Can't open $file for output");
foreach $subnet (sort keys %hash) {
$tot = keys(%{$hash{$subnet}});
$days{$dt}{$subnet} = $tot;
print FH "$subnet, $tot\n";
push #{$subnet}, $tot;
}
close(FH);
}
foreach $day (sort keys %days) {
foreach $subnet (sort keys %{$days{$day}}) {
$tot = $i = 0;
foreach $amt (#{$subnet}) {
$i++;
$tot += $amt;
}
print "$subnet," . int($tot/$i) . "\n";
}
}
How can I eliminate the need for the sort | uniq process outside of Perl? The last foreach gets me the subnet ids which are the 'anonymous' names for the arrays. It generates these multiple times (one for each day that subnet was used).
but this seemed easier than combining
spreadsheets in excel.
Actually, modules like Spreadsheet::ParseExcel make that really easy, in most cases. You still have to deal with rows as if from CSV or the "A1" type addressing, but you don't have to do the export step. And then you can output with Spreadsheet::WriteExcel!
I've used these modules to read a spreadsheet of a few hundred checks, sort and arrange and mung the contents, and write to a new one for delivery to an accountant.
In this part:
foreach $subnet (sort keys %hash) {
$tot = keys(%{$hash{$subnet}});
$days{$dt}{$subnet} = $tot;
print FH "$subnet,$tot\n";
push #{$subnet}, $tot;
}
$subnet is a string, but you use it in the last statement as an array reference. Since you don't have strictures on, it treats it as a soft reference to a variable with the name the same as the content of $subnet. Which is okay if you really want to, but it's confusing. As for clarifying the last part...
Update I'm guessing this is what you're looking for, where the subnet value is only saved if it hasn't appeared before, even from another day (?):
use List::Util qw(sum); # List::Util was first released with perl 5.007003 (5.7.3, I think)
my %buckets;
foreach my $day (sort keys %days) {
foreach my $subnet (sort keys %{$days{$day}}) {
next if exists $buckets{$subnet}; # only gives you this value once, regardless of what day it came in
my $total = sum #{$subnet}; # no need to reuse a variable
$buckets{$subnet} = int($total/#{$subnet}; # array in scalar context is number of elements
}
}
use Data::Dumper qw(Dumper);
print Dumper \%buckets;
Building on Anonymous's suggestions, I built a hash of the subnet names to access the arrays:
..
push #{$subnet}, $tot;
$subnets{$subnet}++;
}
close(FH);
}
use List::Util qw(sum); # List::Util was first released with perl 5.007003
foreach my $subnet (sort keys %subnets) {
my $total = sum #{$subnet}; # no need to reuse a variable
print "$subnet," . int($total/#{$subnet}) . "\n"; # array in scalar context is number of elements
}
I am not sure if this is the best solution, but I don't have the duplicates any more.