Read a Web Service output and write it to CSV in Perl - perl

I am trying to read the output of webservice and write to a csv file. The source has many fields and it is quoted with comma delimiter.
A few columns have values on a new line too.
213,"Dan","This is kool","dblee"
214,"Taylot","This is,
not mine","typelee"
Output
213,"Dan","dblee"
214,"Taylot","typelee"
Here I am trying to use get to capture of the output of WS. But facing a problem while trying to read individual fields.This ($csv->getline($fh1)) is not returning an array.
How to make it to return an array. Any inputs.
use Getopt::Long;
use File::Copy;
use Text::CSV;
my $filename = '/mydir/file.txt';
my $filename2 = '/mydir/file_formatted.csv';
unlink $filename;
for ( my $i = 1 ; $i <= 2 ; $i++ )
{
$WS_URL = "https://adesfhr.com/wpi.php?cmd=dsd&PageNumber=$i&pageSize=10&responseType=csv"; `
my $resp = $SSO->get($WS_URL); `
my $data = $resp->content(); `
last if ! $data; `
if ($data) {
my $csv = Text::CSV->new(
{
binary => 1, # Allow special character. Always set this
auto_diag => 1, # Report irregularities immediately
sep_char => ',', #Separatrp
}
);
open( my $fh1, "<:encoding(utf8)", \$data ) or die "$!";
open( my $fh2, '>>', $filename )
or die "Could not open file '$filename' $!";
<$fh1>;
while ( my $row = $csv->getline($fh1) ) {
my $newrow = splice( #$row, 0, 3 );
print "#$newrow\n";
$csv->print( $fh2, $newrow );
}
close $fh2;
close $fh1;
}
}

Related

Compare two files and write matching data from first file using perl

First file
FirstName:LastName:Location:Country:ID
FirstName1:LastName1:Location1:Country1:ID1
FirstName2:LastName2:Location2:Country2:ID2
FirstName3:LastName3:Location3:Country3:ID3
FirstName4:LastName4:Location4:Country4:ID4
Second file
FirstName:LastName:Location:Country:Old_ID
FirstName2:LastName2:Location2:Country2:Old_ID2
FirstName4:LastName4:Location4:Country4:Old_ID4
Have to compare first and second file and print matching rows with data from first file which is have new ID's.
Below script fetches me Old_ID's from second file and not the new ones from first file
use warnings;
use strict;
my $details = 'file2.txt';
my $old_details = 'file1.txt';
my %names;
open my $data, '<', $details or die $!;
while (<$data>)
{
my ($name, #ids) = split;
push #{ $names{$_} }, $name for #ids;
}
open my $old_data, '<', $old_details or die $!;
while (<$old_data>)
{
chomp;
print #{ $names{$_} // [$_] }, "\n";
}
Output:
FirstName:LastName:Location:Country:Old_ID
FirstName2:LastName2:Location2:Country2:Old_ID2
FirstName4:LastName4:Location4:Country4:Old_ID4
Expected output:
FirstName:LastName:Location:Country:ID
FirstName2:LastName2:Location2:Country2:ID2
FirstName4:LastName4:Location4:Country4:ID4
Just try this way:
use strict; # Use strict Pragma
use warnings;
my ($file1, $filecnt1, $file2, $filecnt2) = ""; #Declaring variables
$file1 = "a1.txt"; $file2 = "b1.txt"; #Sample files
readFileinString($file1, \$filecnt1); # Reading first file
readFileinString($file2, \$filecnt2); # Reading second file
$filecnt2=~s/\:Old\_ID/\:ID/g; # Replacing that difference content
my #firstfle = split "\n", $filecnt1; # Move content to array variable to compare
my #secndfle = split "\n", $filecnt2;
my %firstfle = map { $_ => 1 } #firstfle; #Mapping the array into hash variable
my #scdcmp = grep { $firstfle{$_} } #secndfle;
print join "\n", #scdcmp;
#---------------> File reading
sub readFileinString
#--------------->
{
my $File = shift;
my $string = shift;
open(FILE1, "<$File") or die "\nFailed Reading File: [$File]\n\tReason: $!";
read(FILE1, $$string, -s $File, 0);
close(FILE1);
}
#---------------> File Writing
sub writeFileinString
#--------------->
{
my $File = shift;
my $string = shift;
my #cDir = split(/\\/, $File);
my $tmp = "";
for(my $i = 0; $i < $#cDir; $i++)
{
$tmp = $tmp . "$cDir[$i]\\";
mkdir "$tmp";
}
if(-f $File){
unlink($File);
}
open(FILE, ">$File") or die "\n\nFailed File Open for Writing: [$File]\n\nReason: $!\n";
print FILE $$string;
close(FILE);
}

How to filter columns from CSV file based on names of columns

I am using the CSV data like below. I don't want to use user and timestamp from csv file. I may add few columns or delete columns.
I didnt find the any suitable method in Text CSV.
Please let me know if any method or module is available
UniqueId, Name, description, user,timestamp
1,jana,testing,janardar,12-10-2018:00:
sub _filter_common_columns_from_csv{
my $csvfile = shift;
my $CSV = Text::CSV_XS->new(
{
binary => 1,
auto_diag => 3,
allow_quotes => 0,
eol => $/
});
my $_columns ||= do {
open(my $fh, '<', $csvfile) or die $!;
my #cols = #{ $CSV->getline($fh) };
close $fh or die $!;
for (#cols) { s/^\s+//; s/\s+$//; }
\#cols;
};
my #columns = #{ $_columns };
my %deleted;
my #regexes = qw(user timestamp);
foreach my $regex (#regexes) {
foreach my $i (0 .. ($#columns - 1)) {
my $col = $columns[$i];
$deleted{$i} = $col if $col =~ /$regex/;
}
}
my #wanted_columns = grep { !$deleted{$_} } 0 .. $#columns - 1;
my $input_temp = "$ENV{HOME}/output/temp_test.csv";
open my $tem, ">",$input_temp or die "$input_temp: $!";
open(my $fh, '<', $csvfile) or die $!;
while (my $row = $CSV->getline($fh)) {
my #fields = #$row;
$CSV->print($tem, [ #fields[#wanted_columns] ]) or $CSV->error_diag;
}
close $fh or die $!;
close $tem or die $!;
return $input_temp;
}
See getline_hr
use warnings;
use strict;
use feature 'say';
use List::MoreUtils qw(any);
use Text::CSV;
my $file = shift #ARGV || die "Usage: $0 filename\n";
my #exclude_cols = qw(user timestamp);
my $csv = Text::CSV->new ( { binary => 1 } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $fh, '<', $file or die "Can't open $file: $!";
my #cols = #{ $csv->getline($fh) };
my #wanted_cols = grep {
my $name = $_;
not any { $name eq $_ } #exclude_cols;
} #cols;
my $row = {};
$csv->bind_columns (\#{$row}{#cols});
while ($csv->getline($fh)) {
my #wanted_fields = #$row{ #wanted_cols };
say "#wanted_fields";
}
The syntax #$row{#wanted_cols} is for a hash slice, which returns a list of values for the keys in #wanted_cols from the hashref $row.
Actual example using Text::AutoCSV to remove given named columns from arbitrary CSV files like in your posted code (More complicated than the examples in the documentation for only writing specific columns):
#!/usr/bin/perl
use warnings;
use strict;
use Text::AutoCSV qw/remove_accents/;
sub remove_columns {
my ($infile, $outfile, $drop) = #_;
my $csv = Text::AutoCSV->new(in_file => $infile, out_file => $outfile);
# Normalize column names the same way that Text::AutoCSV does
my %drops = map { my $h = remove_accents $_;
$h =~ s/[^[:alnum:]_]//gi;
$h = uc $h;
$h => 1 } #$drop;
my #cols = grep { not exists $drops{$_} } $csv->get_fields_names;
# Hack to avoid reading the file twice.
$csv->{out_fields} = \#cols;
$csv->write();
}
remove_columns "in.csv", "out.csv", [ "user", "timestamp" ];
If you want to modify your CSV in other ways, too, and if SQL would be convenient for those modifications, then consider using DBD::CSV.
You can then open a database handle on your CSV file, select the desired columns with a SELECT query, and write the results with Text::CSV or Text::CSV_XS.
For more details, see the DBD::CSV documentation or e.g. this simple wrapper script for querying CSV files.

extracting regions from a range file in a formatted output perl

I have a input and list file like this:
input.txt file:
>gi|NP_415931.4
MTEQQKLTFTALQQRLDSLMLRDRLRFSRRLHGVKKVKNPDAQQAIFQEMAKEIDQAAGKVLLREAARPEITYPD
>gi|NP_418770.2
MMNKSNFEFLKGVNDFTYAIACAAENNYPDDPNTTLIKMRMFGEATAKHLGLL
>gi|YP_026226.4
MRKFTLNIFTLSLGLAVMPMVEAAPTAQQQLLEQVRLGEATHREDLVQQSLYRLELIDPNNPDVVAARFRSLLRQGDIDGAQKQ
list.txt file:
NP_415931.4: 1-5, 6-8
YP_026226.4: 3-7, 9-9, 10, 12-15
Now, for this time, I want a csv formatted output.csv (with certain added header) as (for the above inputs):
ID,Regions,Length,Sequences
NP_415931.4,1-5,5,MTEQQ
,6-8,3,KLT
YP_026226.4,3-7,5,KFTLN
,9-9,1,F
,10,1,T
,12-15,4,SLGL
that is, it first match the list file headers with those of input files and the matched once's sequences are taken and then it gives the output arranging in the above format.
the excel view of the output.csv would be:
How can I generate the above output.csv file from those inputs?
Thanks
Here is an approach. To summarize: We have a master database file input.txt with all defined sequences. Our job is to extract certain information from this database and write it to a CSV file. The information about what to extract is given in file list.txt.
use feature qw(say);
use strict;
use warnings;
my $input_fn = 'input.txt';
open ( my $fh1, '<', $input_fn ) or die "Could not open file '$input_fn': $!";
my %seqs;
while( my $line = <$fh1> ) {
my ($id ) = $line =~ /gi\|(.*)$/;
chomp( my $seq = <$fh1> );
$seqs{$id} = $seq;
}
close $fh1;
say join ',', qw(ID Regions Length Sequences);
my $list_fn = 'list.txt';
open ( my $fh2, '<', $list_fn ) or die "Could not open file '$list_fn': $!";
while( my $line = <$fh2> ) {
chomp $line;
my ( $id, #regions ) = split /[:,]\s?/, $line;
for my $i (0..$#regions) {
my $region = $regions[$i];
my $start = my $end = $region;
if ( $region =~ /(\d+)-(\d+)/ ) {
$start = $1;
$end = $2;
}
my $name = ($i == 0) ? $id : "";
my $seq = substr( $seqs{$id}, $start - 1, $end - $start + 1);
say join ',', $name, $region, length( $seq ), $seq;
}
}
close $fh2;
Output:
ID,Regions,Length,Sequences
NP_415931.4,1-5,5,MTEQQ
,6-8,3,KLT
YP_026226.4,3-7,5,KFTLN
,9-9,1,F
,10,1,T
,12-15,4,SLGL

Perl - have a comma separated output , want to write that in a CSV

Here is the code:
my #col= sort keys %colnames;
print "mRNA,".join(",",#col)."\n";
foreach my $row(keys %rownames){
print "$row";
foreach my $col(#col){
my $num=$mat{$col}->{$row};
$num=~s/(\.\d\d)\d+/$1/;
print ",$num";
}
print "\n";
}
Output:
mRNA,Benzopyrene12h_replica1,Benzopyrene12h_replica2
E2F1,5.01,4.72
REV1,2.76,2.67
POLK,1.21,1.87
POLH,1.49,1.56
POLI,1.94,2.45
Please help me write this output to .csv file.
Something like this might work... Combining with Miller's answer. I didn't test it, just giving you an idea. And it's defiantly could be written more cleanly and less redundant.
use strict;
use warnings;
use autodie;
my $csvFile = Text::CSV->new ( { binary => 1, eol => "\n" } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
my #col= sort keys %colnames;
my #csv;
$csv[0][0] = "mRNA,";
my #joinCol = join(",",#col);
my $i =1;
foreach (#joinCol) {
$csv[0][$i] = $_;
$i++;
}
my $k = 1;
foreach my $row(keys %rownames){
my $j = 0;
print "$row";
$csv[$k][$j] = $row;
foreach my $col(#col){
my $num=$mat{$col}->{$row};
$num=~s/(\.\d\d)\d+/$1/;
print ",$num";
$csv[$k][$j] = $num;
$j++;
}
print "\n";
$k++;
}
open $fh, '>', "new.csv" or die "Couldn't open csv file: $! \n";
for (#csv) {
$csvFile->print($fh, $_);
}
close $fh;
To write to a CSV file, use Text::CSV
use strict;
use warnings;
use autodie;
# Your Data Initialization
my %colnames; # = Something
my %rownames; # = Something else
my %mat; # = a hash of hash
# Prepare CSV
my $csv = Text::CSV->new ( { binary => 1, eol => "\n" } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open $fh, '>', "new.csv";
my #col = sort keys %colnames;
# Output Header
$csv->print($fh, ['mRNA', #col]);
# Output Rows
for my $row (keys %rownames){
my #data = ($row);
for my $col (#col){
my $num = $mat{$col}{$row};
$num =~ s/(\.\d\d)\d+/$1/;
push #data, $num;
}
$csv->print($fh, \#data);
}
close $fh;

Split string into variables and use output as element

Well.. I'm stuck again. I've read up quite a few topic with similar problems but not finding a solution for mine. I have a ; delimited csv file and the strings at the 8th column ($elements[7]) is as following: "aaaa;bb;cccc;ddddd;eeee;fffff;gg;". What i'm trying is to split the string based on ; and capture the outputs to variables. Then use those variables in the main csv file in their own column.
So now the file is like:
3d;2f;7j;8k;4s;2b;5g;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
3c;9k;5l;4g;1a;5g;3d;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
4g;1a;5g;2g;7h;3d;8k;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";3d;2f;7j;8k;4s;2b;4g;1a
And i want it like:
3d;2f;7j;8k;4s;2b;5g;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg
3c;9k;5l;4g;1a;5g;3d;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
4g;1a;5g;2g;7h;3d;8k;3d;2f;7j;8k;4s;2b;4g;1a;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
This is my code i've been trying it with. I know.. it's terrible! But i'm hoping someone can help me?
use strict;
use warnings;
my $inputfile = shift || die "Give files\n";
my $outputfile = shift || die "Give output\n";
open my $INFILE, '<', $inputfile or die "In use / Not found :$!\n";
open my $OUTFILE, '>', $outputfile or die "In use :$!\n";
while (<$INFILE>) {
s/"//g;
my #elements = split /;/, $_;
my ($varA, $varB, $varC, $varD, $varE, $varF, $varG, $varH) split (';', $elements[10]);
$elements[16] = $varA;
$elements[17] = $varB;
$elements[18] = $varC;
$elements[19] = $varD;
$elements[20] = $varE;
$elements[21] = $varF;
$elements[22] = $varG;
$elements[23] = $varH;
my $output_line = join(";", #elements);
print $OUTFILE $output_line;
}
close $INFILE;
close $OUTFILE;
exit 0;
I'm confused about the my statement as well, it shouldn't be possible right? I mean the $vars are in a closed part so it shouldn't be possible to write them to $elements?
EDIT
This is how i adjusted the code with TLP's suggestions:
use strict;
use warnings;
use Text::CSV;
my $inputfile = shift || die "Give files\n";
my $outputfile = shift || die "Give output\n";
open my $INFILE, '<', $inputfile or die "In use / Not found :$!\n";
open my $OUTFILE, '>', $outputfile or die "In use :$!\n";
my $csv = Text::CSV->new({ # create a csv object
sep_char => ";", # delimiter
eol => "\n", # adds newline to print
});
while (my $row = $csv->getline($INFILE)) { # $row is an array ref
my $line = splice(#$row, 10, 1); # remove 8th line
$csv->parse($line); # parse the line
push #$row, $csv->fields(); # push newly parsed fields onto main array
$csv->print($OUTFILE, $row);
}
close $INFILE;
close $OUTFILE;
exit 0;
You should use a CSV module, e.g. Text::CSV to parse your data. Here's a brief example on how it can be done. You can replace the file handles I used below with your own.
use strict;
use warnings;
use Text::CSV;
my $csv = Text::CSV->new({ # create a csv object
sep_char => ";", # delimiter
eol => "\n", # adds newline to print
});
while (my $row = $csv->getline(*DATA)) { # $row is an array ref
my $line = splice(#$row, 7, 1); # remove 8th line
$csv->parse($line); # parse the line
push #$row, $csv->fields(); # push newly parsed fields onto main array
$csv->print(*STDOUT, $row);
}
__DATA__
3d;2f;7j;8k;4s;2b;5g;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
3c;9k;5l;4g;1a;5g;3d;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";4g;1a;5g;2g;7h;3d;2f;7j
4g;1a;5g;2g;7h;3d;8k;"aaaa;bb;cccc;ddddd;eeee;fffff;gg;";3d;2f;7j;8k;4s;2b;4g;1a
Output:
3d;2f;7j;8k;4s;2b;5g;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
3c;9k;5l;4g;1a;5g;3d;4g;1a;5g;2g;7h;3d;2f;7j;aaaa;bb;cccc;ddddd;eeee;fffff;gg;
4g;1a;5g;2g;7h;3d;8k;3d;2f;7j;8k;4s;2b;4g;1a;aaaa;bb;cccc;ddddd;eeee;fffff;gg;