I need the output in following way in perl - perl

# File-
# a,b,c,d,e,f
# 1,2,3,4,3,2
# 9,8,7,6,5,0
# 2,3,4,6,7,8
# i need output like this:-
# a=1,d=4,c=3,a=9,d=6,c=7,a=2,d=6,c=4
# but my program gives this:-
# a=1,d=4,c=3a=9,d=6,c=7a=2,d=6,c=4 (there is no , after c and a)
my script is :-
open ($fh, 'parse.txt');
my #arr;
my $dummy=<$fh>;
while (<$fh>) {
chomp;
$a = substr $_, 0,1;
$b = substr $_, 6,1;
$c = substr $_, 4,1;
print "a=$a,d=$b,c=$c";
}
close (IN);

my $prefix = '';
while (<$fh>) {
chomp;
my #fields = split /,/;
print $prefix."a=$fields[0],d=$fields[3],c=$fields[2]";
$prefix = ',';
}
print("\n");
or
my #recs;
while (<$fh>) {
chomp;
my #fields = split /,/;
push #recs, "a=$fields[0],d=$fields[3],c=$fields[2]";
}
print(join(',', #recs), "\n");

Instead of printing out the values you could append them to a string and include a comma after the "c" value. Then at the end of the loop, erase the final comma from the string and print it out. There are some scalability problems if your input file is too large. But if it's a reasonable size there shouldn't be any substantial issue.
my $output;
my $dummy=<$fh>;
while (<$fh>) {
chomp;
$a = substr $_, 0,1;
$b = substr $_, 6,1;
$c = substr $_, 4,1;
$output .= "a=$a,d=$b,c=$c,";
}
chop $output;
print $output;

If you have fields with separators split the line and collect needed elements
use warnings;
use strict;
use feature 'say';
my $file = 'parse.txt';
open my $fh, '<', $file or die "Can't open $file: $!";
my $dummy = <$fh>;
my #res;
while (<$fh>)
{
my ($a, $d, $c) = (split /,/)[0,3,2];
push #res, "a=$a,d=$d,c=$c";
}
say join ',', #res;
or pick the order in the assignment
my ($a, $c, $d) = (split /,/)[0,2,3];

Related

how to display the hash value from my sample data

I'm learning perl at the moment, i wanted to ask help to answer this exercise.
My objective is to display the hash value of PartID 1,2,3
the sample output is displaying lot, wafer, program, version, testnames, testnumbers, hilimit, lolimit and partid values only.
sample data
lot=lot123
wafer=1
program=prgtest
version=1
Testnames,T1,T2,T3
Testnumbers,1,2,3
Hilimit,5,6,7
Lolimit,1,2,3
PartID,,,,
1,3,0,5
2,4,3,2
3,5,6,3
This is my code:
#!/usr/bin/perl
use strict;
use Getopt::Long;
my $file = "";
GetOptions ("infile=s" => \$file ) or die("Error in command line arguments\n");
my $lotid = "";
open(DATA, $file) or die "Couldn't open file $file";
while(my $line = <DATA>) {
#print "$line";
if ( $line =~ /^lot=/ ) {
#print "$line \n";
my ($dump, $lotid) = split /=/, $line;
print "$lotid\n";
}
elsif ($line =~ /^program=/ ) {
my ($dump, $progid) = split /=/, $line;
print "$progid \n";
}
elsif ($line =~ /^wafer=/ ) {
my ($dump, $waferid) = split /=/, $line;
print "$waferid \n";
}
elsif ($line =~ /^version=/ ) {
my ($dump, $verid) = split /=/, $line;
print "$verid \n";
}
elsif ($line =~ /^testnames/i) {
my ($dump, #arr) = split /\,/, $line;
foreach my $e (#arr) {
print $e, "\n";
}
}
elsif ($line =~ /^testnumbers/i) {
my ($dump, #arr1) = split /\,/, $line;
foreach my $e1 (#arr1) {
print $e1, "\n";
}
}
elsif ($line =~ /^hilimit/i) {
my ($dump, #arr2) = split /\,/, $line;
foreach my $e2 (#arr2) {
print $e2, "\n";
}
}
elsif ($line =~ /^lolimit/i) {
my ($dump, #arr3) = split /\,/, $line;
foreach my $e3 (#arr3) {
print $e3, "\n";
}
}
}
Kindly help add to my code to display Partid 1,2,3 hash.
So I've rewritten your code a little to use a few more modern Perl idioms (along with some comments to explain what I've done). The bit I've added is near the bottom.
#!/usr/bin/perl
use strict;
# Added 'warnings' which you should always use
use warnings;
# Use say() instead of print()
use feature 'say';
use Getopt::Long;
my $file = "";
GetOptions ("infile=s" => \$file)
or die ("Error in command line arguments\n");
# Use a lexical variable for a filehandle.
# Use the (safer) 3-argument version of open().
# Add $! to the error message.
open(my $fh, '<', $file) or die "Couldn't open file $file: $!";
# Read each record into $_ - which makes the following code simpler
while (<$fh>) {
# Match on $_
if ( /^lot=/ ) {
# Use "undef" instead of a $dump variable.
# split() works on $_ by default.
my (undef, $lotid) = split /=/;
# Use say() instead of print() - less punctuation :-)
say $lotid;
}
elsif ( /^program=/ ) {
my (undef, $progid) = split /=/;
say $progid;
}
elsif ( /^wafer=/ ) {
my (undef, $waferid) = split /=/;
say $waferid;
}
elsif ( /^version=/ ) {
my (undef, $verid) = split /=/;
say $verid;
}
elsif ( /^testnames/i) {
my (undef, #arr) = split /\,/;
# Changed all of these similar pieces of code
# to use the same variable names. As they are
# defined in different code blocks, they are
# completely separate variables.
foreach my $e (#arr) {
say $e;
}
}
elsif ( /^testnumbers/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
elsif ( /^hilimit/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
elsif ( /^lolimit/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
# And here's the new bit.
# If we're on the "partid" line, then read the next
# three lines, split each one and print the first
# element from the list returned by split().
elsif ( /^partid/i) {
say +(split /,/, <$fh>)[0] for 1 .. 3;
}
}
Update: By the way, there are no hashes anywhere in this code :-)
Update 2: I've just realised that you only have three different ways to process the data. So you can simplify your code drastically by using slightly more complex regexes.
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
use Getopt::Long;
my $file = "";
GetOptions ("infile=s" => \$file)
or die ("Error in command line arguments\n");
open(my $fh, '<', $file) or die "Couldn't open file $file: $!";
while (<$fh>) {
# Single value - just print it.
if ( /^(?:lot|program|wafer|version)=/ ) {
my (undef, $value) = split /=/;
say $value;
}
# List of values - split and print.
elsif ( /^(?:testnames|testnumbers|hilimit|lolimit)/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
# Extract values from following lines.
elsif ( /^partid/i) {
say +(split /,/, <$fh>)[0] for 1 .. 3;
}
}

Make the same edit for edit for each column in a multi-column file

I have multiple CSV files with varying numbers of columns that I need to reformat into a fixed-format text file.
At this stage, I hash and unhash the columns that need to be edited, but its tedious and I can't add new columns without changing the program first.
Is there a simpler way of reading, splitting and editing all columns, regardless of the number of columns in the file?
Here is my code thus far:
use strict;
use warnings;
my $input = 'FILENAME.csv';
my $output = 'FILENAME.txt';
open (INPUT, "<", "$input_file") or die "\n !! Cannot open $input_file: $!";
open (OUTPUT, ">>", "$output_file") or die "\n !! Cannot create $output_file: $!";
while ( <INPUT> ) {
my $line = $_;
$line =~ s/\s*$//g;
my ( $a, $b, $c, $d, $e, $f, $g, $h, $i, $j ) = split('\,', $line);
$a = sprintf '%10s', $a;
$b = sprintf '%10s', $b;
$c = sprintf '%10s', $c;
$d = sprintf '%10s', $d;
$e = sprintf '%10s', $e;
$f = sprintf '%10s', $f;
$g = sprintf '%10s', $g;
$h = sprintf '%10s', $h;
$i = sprintf '%10s', $i;
$j = sprintf '%10s', $j;
print OUTPUT "$a$b$c$d$e$f$g$h$i$j\n";
}
close INPUT;
close OUTPUT;
exit;
Do you mean something like this?
perl -aF/,/ -lne 'print map sprintf("%10s", $_), #F' FILENAME.csv > FILENAME.txt
Any time you're using sequential variables, you should be using an array. And in this case, since you only use the array once, you don't even need to do more than hold it temporarily.
Also: Use lexical filehandles, it's better practice.
#!/usr/bin/env perl
use strict;
use warnings;
my $input_file = 'FILENAME.csv';
my $output_file = 'FILENAME.txt';
my $format = '%10s';
open( my $input_fh, "<", $input_file ) or die "\n !! Cannot open $input_file: $!";
open( my $output_fh, ">>", $output_file ) or die "\n !! Cannot create $output_file: $!";
while (<$input_fh>) {
print {$output_fh} join "", map { sprintf $format, $_ } split /,/;
}
close $input_fh;
close $output_fh;
exit;

Perl : Need to append two columns if the ID's are repeating

If id gets repeated I am appending app1, app2 and printing it once.
Input:
id|Name|app1|app2
1|abc|234|231|
2|xyz|123|215|
1|abc|265|321|
3|asd|213|235|
Output:
id|Name|app1|app2
1|abc|234,265|231,321|
2|xyz|123|215|
3|asd|213|235|
Output I'm getting:
id|Name|app1|app2
1|abc|234,231|
2|xyz|123,215|
1|abc|265,321|
3|asd|213,235|
My Code:
#! usr/bin/perl
use strict;
use warnings;
my $basedir = 'E:\Perl\Input\\';
my $file ='doctor.txt';
my $counter = 0;
my %RepeatNumber;
my $pos=0;
open(OUTFILE, '>', 'E:\Perl\Output\DoctorOpFile.csv') || die $!;
open(FH, '<', join('', $basedir, $file)) || die $!;
my $line = readline(FH);
unless ($counter) {
chomp $line;
print OUTFILE $line;
print OUTFILE "\n";
}
while ($line = readline(FH)) {
chomp $line;
my #obj = split('\|',$line);
if($RepeatNumber{$obj[0]}++) {
my $str1= join("|",$obj[0]);
my $str2=join(",",$obj[2],$obj[3]);
print OUTFILE join("|",$str1,$str2);
print OUTFILE "\n";
}
}
This should do the trick:
use strict;
use warnings;
my $file_in = "doctor.txt";
open (FF, "<$file_in");
my $temp = <FF>; # remove first line
my %out;
while (<FF>)
{
my ($id, $Name, $app1, $app2) = split /\|/, $_;
$out{$id}[0] = $Name;
push #{$out{$id}[1]}, $app1;
push #{$out{$id}[2]}, $app2;
}
foreach my $key (keys %out)
{
print $key, "|", $out{$key}[0], "|", join (",", #{$out{$key}[1]}), "|", join (",", #{$out{$key}[2]}), "\n";
}
EDIT
To see what the %out contains (in case it's not clear), you can use
use Data::Dumper;
and print it via
print Dumper(%out);
I'd tackle it like this:
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use 5.14.0;
my %stuff;
#extract the header row.
#use the regex to remove the linefeed, because
#we can't chomp it inline like this.
#works since perl 5.14
#otherwise we could just chomp (#header) later.
my ( $id, #header ) = split( /\|/, <DATA> =~ s/\n//r );
while (<DATA>) {
#turn this row into a hash of key-values.
my %row;
( $id, #row{#header} ) = split(/\|/);
#print for diag
print Dumper \%row;
#iterate each key, and insert into $row.
foreach my $key ( keys %row ) {
push( #{ $stuff{$id}{$key} }, $row{$key} );
}
}
#print for diag
print Dumper \%stuff;
print join ("|", "id", #header ),"\n";
#iterate ids in the hash
foreach my $id ( sort keys %stuff ) {
#join this record by '|'.
print join('|',
$id,
#turn inner arrays into comma separated via map.
map {
my %seen;
#use grep to remove dupes - e.g. "abc,abc" -> "abc"
join( ",", grep !$seen{$_}++, #$_ )
} #{ $stuff{$id} }{#header}
),
"\n";
}
__DATA__
id|Name|app1|app2
1|abc|234|231|
2|xyz|123|215|
1|abc|265|321|
3|asd|213|235|
This is perhaps a bit overkill for your application, but it should handle arbitrary column headings and arbitary numbers of duplicates. I'll coalesce them though - so the two abc entries don't end up abc,abc.
Output is:
id|Name|app1|app2
1|abc|234,265|231,321
2|xyz|123|215
3|asd|213|235
Another way of doing it which doesn't use a hash (in case you want to be more memory efficient), my contribution lies under the opens:
#!/usr/bin/perl
use strict;
use warnings;
my $basedir = 'E:\Perl\Input\\';
my $file ='doctor.txt';
open(OUTFILE, '>', 'E:\Perl\Output\DoctorOpFile.csv') || die $!;
select(OUTFILE);
open(FH, '<', join('', $basedir, $file)) || die $!;
print(scalar(<FH>));
my #lastobj = (undef);
foreach my $obj (sort {$a->[0] <=> $b->[0]}
map {chomp;[split('|')]} <FH>) {
if(defined($lastobj[0]) &&
$obj[0] eq $lastobj[0])
{#lastobj = (#obj[0..1],
$lastobj[2].','.$obj[2],
$lastobj[3].','.$obj[3])}
else
{
if($lastobj[0] ne '')
{print(join('|',#lastobj),"|\n")}
#lastobj = #obj[0..3];
}
}
print(join('|',#lastobj),"|\n");
Note that split, without it's third argument ignores empty elements, which is why you have to add the last bar. If you don't do a chomp, you won't need to supply the bar or the trailing hard return, but you would have to record $obj[4].

How to extract the last element of a string and use it to grow an array inside a loop

I have a dataset like this:
10001;02/07/98;TRIO;PI;M^12/12/59^F^^SP^09/12/55
;;;;;M1|F1|SP1;11;10;12;10;12;11;1.82;D16S539
;;;;;M1|F1|SP1;8;8;8;8;10;8;3.45;D7S820
;;;;;M1|F1|SP1;14;12;12;11;14;11;1.57;D13S317
;;;;;M1|F1|SP1;12;12;13;12;13;8;3.27;D5S818
;;;;;M1|F1|SP1;12;12;12;12;12;8;1.51;CSF1PO
;;;;;M1|F1|SP1;8;11;11;11;11;8;1.79;TPOX
;;;;;M1|F1|SP1;6;9;9;6;8;6;1.31;TH01
I'm trying to extract the last element of the lines which does not start with a number, i.e. all lines except the first one. I want to put these values inside an array called #markers.
I'm trying that by the following code:
#!usr/bin/perl
use warnings;
use strict;
open FILE, 'test' || die $!;
while (my $line = <FILE>) {
my #fields = (split /;/), $line;
if ($line !~ m/^[0-9]+/) {
my #markers = splice #fields, 0, #fields - 1;
}
}
But that does not work. Can anyone help please?
Thanks
You create a new variable named #markers every pass of the loop.
my #fields = (split /;/), $line; means (my #fields = (split /;/, $_)), $line;. You meant my #fields = (split /;/, $line);
'test' || die $! is the same as just 'test'.
use strict;
use warnings;
open my $FILE, '<', 'test'
or die $!;
my #markers;
while (<$FILE>) {
chomp;
next if /^\s*\z/; # Skip blank lines.
my #fields = split /;/;
push #markers, $fields[-1]
if $fields[0] eq '';
}
You aren't using function split() correctly. I have fixed it in the code below and printed the values:
#!/usr/bin/perl
use warnings;
use strict;
open FILE, 'test' || die $!;
while (my $line = <FILE>) {
my #fields = split( /;/, $line);
if ($line !~ m/^[0-9]+/) {
print "$fields[-1]";
# my #markers = splice #fields, 0, #fields - 1;
}
}

How can I select records from a CSV file based on timestamps in Perl?

I have CSV file which has timestamp in the first column as shown below sample:
Time,head1,head2,...head3
00:00:00,24,22,...,n
00:00:01,34,55,...,n
and so on...
I want to filter out the data with specific time range like from 11:00:00 to 16:00:00 with the header and put into an array. I have written the below code to get the header in an array.
#!/usr/bin/perl -w
use strict;
my $start = $ARGV[0];
my $end = $ARGV[1];
my $line;
$line =<STDIN>;
my $header = [ split /[,\n]/, $line ];
I need help on how to filter data from file with selected time range and create an array of that.
I kind of cheated. A proper program would probably use DateTime and then compare with DateTime's compare function. If you're only expecting input in this format, my "cheat" should work.
#!/usr/bin/perl
use strict;
use warnings;
use Text::CSV;
use DateTime;
my $start = 110000;
my $end = 160000;
my $csv = Text::CSV->new () or die "Cannot use CSV: ".Text::CSV->error_diag ();
my #lines;
open my $fh, "<:encoding(utf8)", "file.csv" or die "Error opening file: $!";
while ( my $row = $csv->getline( $fh ) ) {
my $time = $row->[0];
$time =~ s/\://g;
if ($time >= $start and $time <= $end) {
push #lines, $row;
}
}
$csv->eof or $csv->error_diag();
close $fh;
#do something here with #lines
just a start
my $start="01:00:01";
my $end = "11:00:01";
while(<>){
chomp;
if ( /$start/../$end/ ){
#s = split /,/ ;
# do what you want with #s here.
}
}
#!/usr/bin/perl -w
use strict;
my $start = '11:00:00';
my $end = '16:00:00';
my #data;
chomp ($_ = <STDIN>); # remove trailing newline character
push #data, [split /,/]; # add header
while(<>) {
chomp;
my #line = split /,/;
next if $line[0] lt $start or $line[0] gt $end;
push #data, [#line]; # $data[i][j] contains j-th element of i-th line.
}