Retrieve first row from CSV as headers using Text::CSV - perl

I feel like I'm missing something rather obvious, but can't find any answers in the documentation. Still new to OOP with Perl, but I'm using Text::CSV to parse a CSV for later use.
How would I go about extracting the first row and pushing the values to array #headers?
Here's what I have so far:
#!/usr/bin/perl
use warnings;
use diagnostics;
use strict;
use Fcntl ':flock';
use Text::CSV;
my $csv = Text::CSV->new({ sep_char => ',' });
my $file = "sample.csv";
my #headers; # Column names
open(my $data, '<:encoding(utf8)', $file) or die "Could not open '$file' $!\n";
while (my $line = <$data>) {
chomp $line;
if ($csv->parse($line)) {
my $r = 0; # Increment row counter
my $field_count = $csv->fields(); # Number of fields in row
# While data exists...
while (my $fields = $csv->getline( $data )) {
# Parse row into columns
print "Row ".$r.": \n";
# If row zero, process headers
if($r==0) {
# Add value to #columns array
push(#headers,$fields->[$c]);
} else {
# do stuff with records...
}
}
$r++
}
close $data;
You'd think that there would be a way to reference the existing fields in the first row.

Pretty much straight from the documentation, for example.
#!/usr/bin/perl
use strict;
use warnings;
use Text::CSV_XS;
my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
my $file = 'o33.txt';
open my $io, "<", $file or die "$file: $!";
my $header = $csv->getline ($io);
print join("-", #$header), "\n\n";
while (my $row = $csv->getline ($io)) {
print join("-", #$row), "\n";
}
__END__
***contents of o33.txt
lastname,firstname,age,gender,phone
mcgee,bobby,27,M,555-555-5555
kincaid,marl,67,M,555-666-6666
hofhazards,duke,22,M,555-696-6969
Prints:
lastname-firstname-age-gender-phone
mcgee-bobby-27-M-555-555-5555
kincaid-marl-67-M-555-666-6666
hofhazards-duke-22-M-555-696-6969
Update: Thinking about your problem, it may be that you want to address the data by its column name. For that, you might be able to use something (also from the docs), like this:
$csv->column_names ($csv->getline ($io));
while (my $href = $csv->getline_hr ($io)) {
print "lastname is: ", $href->{lastname},
" and gender is: ", $href->{gender}, "\n"
}
Note: You can use Text::CSV instead of Text::CSV_XS, as the former is a wrapper around the latter.

Thought I'd post my results for others.
#!/usr/bin/perl
use warnings;
use diagnostics;
use strict;
use Text::CSV;
sub read_csv {
my $csv = Text::CSV->new({ sep_char => ',' });
my $file = shift;
open(my $data, '<:encoding(utf8)', $file) or die "Could not open '$file' $!\n";
# Process Row Zero
my $header = $csv->getline ($data);
my $field_count = $csv->fields();
# Read the rest of the file
while (my $line = <$data>) {
chomp $line;
# Read line if possible
if ($csv->parse($line)) {
my $r = 0;
# While data exists...
while (my $fields = $csv->getline( $data )) {
# Parse row into columns
print Display->H2;
print "Row ".$r.": ".#$fields." columns. \n";
# Print column values
for(my $c=0; $c<#$fields; $c++) {
print #$header[$c]." : ".#$fields[$c]."\n";
}
$r++
}
}
close $data;
}
}
Cheers

Related

How to filter columns from CSV file based on names of columns

I am using the CSV data like below. I don't want to use user and timestamp from csv file. I may add few columns or delete columns.
I didnt find the any suitable method in Text CSV.
Please let me know if any method or module is available
UniqueId, Name, description, user,timestamp
1,jana,testing,janardar,12-10-2018:00:
sub _filter_common_columns_from_csv{
my $csvfile = shift;
my $CSV = Text::CSV_XS->new(
{
binary => 1,
auto_diag => 3,
allow_quotes => 0,
eol => $/
});
my $_columns ||= do {
open(my $fh, '<', $csvfile) or die $!;
my #cols = #{ $CSV->getline($fh) };
close $fh or die $!;
for (#cols) { s/^\s+//; s/\s+$//; }
\#cols;
};
my #columns = #{ $_columns };
my %deleted;
my #regexes = qw(user timestamp);
foreach my $regex (#regexes) {
foreach my $i (0 .. ($#columns - 1)) {
my $col = $columns[$i];
$deleted{$i} = $col if $col =~ /$regex/;
}
}
my #wanted_columns = grep { !$deleted{$_} } 0 .. $#columns - 1;
my $input_temp = "$ENV{HOME}/output/temp_test.csv";
open my $tem, ">",$input_temp or die "$input_temp: $!";
open(my $fh, '<', $csvfile) or die $!;
while (my $row = $CSV->getline($fh)) {
my #fields = #$row;
$CSV->print($tem, [ #fields[#wanted_columns] ]) or $CSV->error_diag;
}
close $fh or die $!;
close $tem or die $!;
return $input_temp;
}
See getline_hr
use warnings;
use strict;
use feature 'say';
use List::MoreUtils qw(any);
use Text::CSV;
my $file = shift #ARGV || die "Usage: $0 filename\n";
my #exclude_cols = qw(user timestamp);
my $csv = Text::CSV->new ( { binary => 1 } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $fh, '<', $file or die "Can't open $file: $!";
my #cols = #{ $csv->getline($fh) };
my #wanted_cols = grep {
my $name = $_;
not any { $name eq $_ } #exclude_cols;
} #cols;
my $row = {};
$csv->bind_columns (\#{$row}{#cols});
while ($csv->getline($fh)) {
my #wanted_fields = #$row{ #wanted_cols };
say "#wanted_fields";
}
The syntax #$row{#wanted_cols} is for a hash slice, which returns a list of values for the keys in #wanted_cols from the hashref $row.
Actual example using Text::AutoCSV to remove given named columns from arbitrary CSV files like in your posted code (More complicated than the examples in the documentation for only writing specific columns):
#!/usr/bin/perl
use warnings;
use strict;
use Text::AutoCSV qw/remove_accents/;
sub remove_columns {
my ($infile, $outfile, $drop) = #_;
my $csv = Text::AutoCSV->new(in_file => $infile, out_file => $outfile);
# Normalize column names the same way that Text::AutoCSV does
my %drops = map { my $h = remove_accents $_;
$h =~ s/[^[:alnum:]_]//gi;
$h = uc $h;
$h => 1 } #$drop;
my #cols = grep { not exists $drops{$_} } $csv->get_fields_names;
# Hack to avoid reading the file twice.
$csv->{out_fields} = \#cols;
$csv->write();
}
remove_columns "in.csv", "out.csv", [ "user", "timestamp" ];
If you want to modify your CSV in other ways, too, and if SQL would be convenient for those modifications, then consider using DBD::CSV.
You can then open a database handle on your CSV file, select the desired columns with a SELECT query, and write the results with Text::CSV or Text::CSV_XS.
For more details, see the DBD::CSV documentation or e.g. this simple wrapper script for querying CSV files.

Perl - need to store the column values into hash

I want to create a hash with column header as hash key and column values as hash values in Perl.
For example, if my csv file has the following data:
A,B,C,D,E
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15 ...
I want to create a hash as follows:
A=> 1,6,11
B=>2,7,12
c=>3,8,13 ...
So that just by using the header name I can use that column values.
Is there a way in PERL to do this? Please help me.
I was able to store required column values as array using the following script
use strict;
use warnings;
open( IN, "sample.csv" ) or die("Unable to open file");
my $wanted_column = "A";
my #cells;
my #colvalues;
my $header = <IN>;
my #column_names = split( ",", $header );
my $extract_col = 0;
for my $header_line (#column_names) {
last if $header_line =~ m/$wanted_column/;
$extract_col++;
}
while ( my $row = <IN> ) {
last unless $row =~ /\S/;
chomp $row;
#cells = split( ",", $row );
push( #colvalues, $cells[$extract_col] );
}
my $sizeofarray = scalar #colvalues;
print "Size of the coulmn= $sizeofarray";
But I want to do this to all my column.I guess Hash of arrays will be the best solution but I dont know how to implement it.
Text::CSV is a useful helper module for this sort of thing.
use strict;
use warnings;
use Text::CSV;
use Data::Dumper;
my %combined;
open( my $input, "<", "sample.csv" ) or die("Unable to open file");
my $csv = Text::CSV->new( { binary => 1 } );
my #headers = #{ $csv->getline($input) };
while ( my $row = $csv->getline($input) ) {
for my $header (#headers) {
push( #{ $combined{$header} }, shift(#$row) );
}
}
print Dumper \%combined;
Since you requested without a module - you can use split but you need to bear in mind the limitations. CSV format allows for things like commas nested in quotes. split won't handle that case very well.
use strict;
use warnings;
use Data::Dumper;
my %combined;
open( my $input, "<", "sample.csv" ) or die("Unable to open file");
my $line = <$input>;
chomp ( $line );
my #headers = split( ',', $line );
while (<$input>) {
chomp;
my #row = split(',');
for my $header (#headers) {
push( #{ $combined{$header} }, shift(#row) );
}
}
print Dumper \%combined;
Note: Both of these will effectively ignore any extra columns that don't have headers. (And will get confused by duplicate column names).
Another solution by using for loop :
use strict;
use warnings;
my %data;
my #columns;
open (my $fh, "<", "file.csv") or die "Can't open the file : ";
while (<$fh>)
{
chomp;
my #list=split(',', $_);
for (my $i=0; $i<=$#list; $i++)
{
if ($.==1) # collect the columns, if its first line.
{
$columns[$i]=$list[$i];
}
else #collect the data, if its not the first line.
{
push #{$data{$columns[$i]}}, $list[$i];
}
}
}
foreach (#columns)
{
local $"="\,";
print "$_=>#{$data{$_}}\n";
}
Output will be like this :
A=>1,6,11
B=>2,7,12
C=>3,8,13
D=>4,9,14
E=>5,10,15

Perl - have a comma separated output , want to write that in a CSV

Here is the code:
my #col= sort keys %colnames;
print "mRNA,".join(",",#col)."\n";
foreach my $row(keys %rownames){
print "$row";
foreach my $col(#col){
my $num=$mat{$col}->{$row};
$num=~s/(\.\d\d)\d+/$1/;
print ",$num";
}
print "\n";
}
Output:
mRNA,Benzopyrene12h_replica1,Benzopyrene12h_replica2
E2F1,5.01,4.72
REV1,2.76,2.67
POLK,1.21,1.87
POLH,1.49,1.56
POLI,1.94,2.45
Please help me write this output to .csv file.
Something like this might work... Combining with Miller's answer. I didn't test it, just giving you an idea. And it's defiantly could be written more cleanly and less redundant.
use strict;
use warnings;
use autodie;
my $csvFile = Text::CSV->new ( { binary => 1, eol => "\n" } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
my #col= sort keys %colnames;
my #csv;
$csv[0][0] = "mRNA,";
my #joinCol = join(",",#col);
my $i =1;
foreach (#joinCol) {
$csv[0][$i] = $_;
$i++;
}
my $k = 1;
foreach my $row(keys %rownames){
my $j = 0;
print "$row";
$csv[$k][$j] = $row;
foreach my $col(#col){
my $num=$mat{$col}->{$row};
$num=~s/(\.\d\d)\d+/$1/;
print ",$num";
$csv[$k][$j] = $num;
$j++;
}
print "\n";
$k++;
}
open $fh, '>', "new.csv" or die "Couldn't open csv file: $! \n";
for (#csv) {
$csvFile->print($fh, $_);
}
close $fh;
To write to a CSV file, use Text::CSV
use strict;
use warnings;
use autodie;
# Your Data Initialization
my %colnames; # = Something
my %rownames; # = Something else
my %mat; # = a hash of hash
# Prepare CSV
my $csv = Text::CSV->new ( { binary => 1, eol => "\n" } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open $fh, '>', "new.csv";
my #col = sort keys %colnames;
# Output Header
$csv->print($fh, ['mRNA', #col]);
# Output Rows
for my $row (keys %rownames){
my #data = ($row);
for my $col (#col){
my $num = $mat{$col}{$row};
$num =~ s/(\.\d\d)\d+/$1/;
push #data, $num;
}
$csv->print($fh, \#data);
}
close $fh;

Dynamic array of hashes in Perl

I have a CSV file like this:
name,email,salary
a,b#b.com,1000
d,e#e.com,2000
Now, I need to transform this to an array of hash-maps in Perl, so when I do something like:
table[1]{"email"}
it returns e#e.com.
The code I wrote is :
open(DATA, "<$file") or die "Cannot open the file\n";
my #table;
#fetch header line
$line = <DATA>;
my #header = split(',',$line);
#fetch data tuples
while($line = <DATA>)
{
my %map;
my #row = split(',',$line);
for($index = 0; $index <= $#header; $index++)
{
$map{"$header[$index]"} = $row[$index];
}
push(#table, %map);
}
close(DATA);
But I am not getting desired results.. Can u help?? Thanks in advance...
This line
push(#table, %map)
should be
push(#table, \%map)
You want table to be a list of hash references; your code adds each key and value in %map to the list as a separate element.
There is no need to reinvent the wheel here. You can do this with the Text::CSV module.
#!/usr/bin/perl
use strict;
use warnings;
use v5.16;
use Text::CSV;
my $csv = Text::CSV->new;
open my $fh, "<:encoding(utf8)", "data.csv" or die "data.csv: $!";
$csv->column_names( $csv->getline ($fh) );
while (my $row = $csv->getline_hr ($fh)) {
say $row->{email};
}
Something like this perhaps:
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
my #table;
chomp(my $header = <DATA>);
my #cols = split /,/, $header; # Should really use a real CSV parser here
while (<DATA>) {
chomp;
my %rec;
#rec{#cols} = split /,/;
push #table, \%rec;
}
say $table[1]{email};
__END__
name,email,salary
a,b#b.com,1000
d,e#e.com,2000

Split string into variables and use output as element

Well.. I'm stuck again. I've read up quite a few topic with similar problems but not finding a solution for mine. I have a ; delimited csv file and the strings at the 8th column ($elements[7]) is as following: "aaaa;bb;cccc;ddddd;eeee;fffff;gg;". What i'm trying is to split the string based on ; and capture the outputs to variables. Then use those variables in the main csv file in their own column.
So now the file is like:
3d;2f;7j;8k;4s;2b;5g;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
3c;9k;5l;4g;1a;5g;3d;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
4g;1a;5g;2g;7h;3d;8k;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";3d;2f;7j;8k;4s;2b;4g;1a
And i want it like:
3d;2f;7j;8k;4s;2b;5g;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg
3c;9k;5l;4g;1a;5g;3d;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
4g;1a;5g;2g;7h;3d;8k;3d;2f;7j;8k;4s;2b;4g;1a;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
This is my code i've been trying it with. I know.. it's terrible! But i'm hoping someone can help me?
use strict;
use warnings;
my $inputfile = shift || die "Give files\n";
my $outputfile = shift || die "Give output\n";
open my $INFILE, '<', $inputfile or die "In use / Not found :$!\n";
open my $OUTFILE, '>', $outputfile or die "In use :$!\n";
while (<$INFILE>) {
s/"//g;
my #elements = split /;/, $_;
my ($varA, $varB, $varC, $varD, $varE, $varF, $varG, $varH) split (';', $elements[10]);
$elements[16] = $varA;
$elements[17] = $varB;
$elements[18] = $varC;
$elements[19] = $varD;
$elements[20] = $varE;
$elements[21] = $varF;
$elements[22] = $varG;
$elements[23] = $varH;
my $output_line = join(";", #elements);
print $OUTFILE $output_line;
}
close $INFILE;
close $OUTFILE;
exit 0;
I'm confused about the my statement as well, it shouldn't be possible right? I mean the $vars are in a closed part so it shouldn't be possible to write them to $elements?
EDIT
This is how i adjusted the code with TLP's suggestions:
use strict;
use warnings;
use Text::CSV;
my $inputfile = shift || die "Give files\n";
my $outputfile = shift || die "Give output\n";
open my $INFILE, '<', $inputfile or die "In use / Not found :$!\n";
open my $OUTFILE, '>', $outputfile or die "In use :$!\n";
my $csv = Text::CSV->new({ # create a csv object
sep_char => ";", # delimiter
eol => "\n", # adds newline to print
});
while (my $row = $csv->getline($INFILE)) { # $row is an array ref
my $line = splice(#$row, 10, 1); # remove 8th line
$csv->parse($line); # parse the line
push #$row, $csv->fields(); # push newly parsed fields onto main array
$csv->print($OUTFILE, $row);
}
close $INFILE;
close $OUTFILE;
exit 0;
You should use a CSV module, e.g. Text::CSV to parse your data. Here's a brief example on how it can be done. You can replace the file handles I used below with your own.
use strict;
use warnings;
use Text::CSV;
my $csv = Text::CSV->new({ # create a csv object
sep_char => ";", # delimiter
eol => "\n", # adds newline to print
});
while (my $row = $csv->getline(*DATA)) { # $row is an array ref
my $line = splice(#$row, 7, 1); # remove 8th line
$csv->parse($line); # parse the line
push #$row, $csv->fields(); # push newly parsed fields onto main array
$csv->print(*STDOUT, $row);
}
__DATA__
3d;2f;7j;8k;4s;2b;5g;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
3c;9k;5l;4g;1a;5g;3d;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
4g;1a;5g;2g;7h;3d;8k;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";3d;2f;7j;8k;4s;2b;4g;1a
Output:
3d;2f;7j;8k;4s;2b;5g;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
3c;9k;5l;4g;1a;5g;3d;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
4g;1a;5g;2g;7h;3d;8k;3d;2f;7j;8k;4s;2b;4g;1a;aaaa;bb;cccc;ddddd;eeee;fffff;gg;