Perl code modifie logs - perl

I want to program a perl script to change logs format. I want to remove --- from logs. Then separate the CRLF by |.
basically I want to obtain this result :
INFO|[ACTIVE] ExecuteThread: '0' for queue: 'weblogic.kernel.Default (self-tuning)'|JB173F3N|17/02/15 14:32:03:930|Inbound Message | ID: 5 Response-Code: 200 | Encoding: UTF-8 | Content-Type: application/soap+xml; charset=utf-8 | Headers: {connection=[close], Content-Length=[650], content-type=[application/soap+xml; charset=utf-8], Date=[Tue, 17 Feb 2015 13:32:03 GMT], Server=[Apache], X-Powered-By=[Servlet/2.5 JSP/2.1]} | Payload: <?xml version="1.0" encoding="UTF-8"?> | <soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope"><soap:Header/><soap:Body><con:Reponse xmlns:con="http://www.erdfdistribution.fr/linky/types/smc/consultation"><con:IdPRM>19136758109411</con:IdPRM><con:CR><dico:Statut xmlns:dico="http://www.erdfdistribution.fr/linky/types/dico">Rejet</dico:Statut><dico:HorEmission xmlns:dico="http://www.erdfdistribution.fr/linky/types/dico">2015-02-17T14:32:03.887+01:00</dico:HorEmission><dico:Detail xmlns:dico="http://www.erdfdistribution.fr/linky/types/dico"><dico:Code>REJ016</dico:Code></dico:Detail></con:CR></con:Reponse></soap:Body></soap:Envelope>
Instead of this One:
INFO|[ACTIVE] ExecuteThread: '0' for queue: 'weblogic.kernel.Default (self-tuning)'|JB173F3N|17/02/15 14:32:03:930|Inbound Message
----------------------------
ID: 5
Response-Code: 200
Encoding: UTF-8
Content-Type: application/soap+xml; charset=utf-8
Headers: {connection=[close], Content-Length=[650], content-type=[application/soap+xml; charset=utf-8], Date=[Tue, 17 Feb 2015 13:32:03 GMT], Server=[Apache], X-Powered-By=[Servlet/2.5 JSP/2.1]}
Payload: <?xml version="1.0" encoding="UTF-8"?>
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope"><soap:Header/><soap:Body><con:Reponse xmlns:con="http://www.erdfdistribution.fr/linky/types/smc/consultation"><con:IdPRM>19136758109411</con:IdPRM><con:CR><dico:Statut xmlns:dico="http://www.erdfdistribution.fr/linky/types/dico">Rejet</dico:Statut><dico:HorEmission xmlns:dico="http://www.erdfdistribution.fr/linky/types/dico">2015-02-17T14:32:03.887+01:00</dico:HorEmission><dico:Detail xmlns:dico="http://www.erdfdistribution.fr/linky/types/dico"><dico:Code>REJ016</dico:Code></dico:Detail></con:CR></con:Reponse></soap:Body></soap:Envelope>
--------------------------------------
My code doesnt do this, it makes something like clustering in the same line :(
This is my code :
#!/usr/bin/perl
use strict;
use warnings;
use File::Basename;
use Time::Piece;
my $num_args = $#ARGV + 1;
if ($num_args != 2) {
print "\nUsage: Modif_Log.pl inputDirectory outputDirectory\n";
exit;
}
my $inputDirectory=$ARGV[0];
my $outputDirectory=$ARGV[1];
my #liste = glob($inputDirectory."*.log*");
my $today = localtime->strftime('%d%m');
foreach my $s (#liste){
my $inbound ="";
my $outbound ="";
my $id ="";
my $encoding ="";
my $httpMethod ="";
my $contentType ="";
my $headers ="";
my $payload ="";
my $responseCode ="";
my $theAdress ="";
my $others ="";
open ( FILE, $inputDirectory.basename($s) ) || die "can't open file!";
if (-M $inputDirectory.basename($s) < 1 && $s =~ $today) {
print "Processing ".$s."\n";
my #lines = <FILE>;
close (FILE);
my $outputFileName = basename($s);
$outputFileName =~ s/_[0-9]{6}//;
open(my $outputFile, '>', $outputDirectory.$outputFileName) or die "can't open file!";
foreach my $line (#lines) {
chomp($line);
if ($line =~ /Inbound/i) { $inbound .= $line."|"; }
elsif ($line =~ /Outbound/i) { $outbound .= $line."|"; }
elsif ($line =~ /^ID:/) { $id .= $line."|"; }
elsif ($line =~ /^Encoding :/) { $encoding .= $line."|"; }
elsif ($line =~ /^Http-Method:/) { $httpMethod .= $line."|"; }
elsif ($line =~ /^Content-Type:/) { $contentType .= $line."|"; }
elsif ($line =~ /^Headers:/) { $headers .= $line."|"; }
elsif ($line =~ /^Payload:/) { $payload .= $line."|"; }
elsif ($line =~ /^Response-Code:/) { $responseCode .= $line."|"; }
elsif ($line =~ /^Address:/) { $theAdress .= $line."|"; }
elsif ($line !~ /--/) { $others .= $line."|"; }
else { ; }
}
if ($inbound ne "") { print $outputFile $inbound."\n"; }
if ($outbound ne "") { print $outputFile $outbound."\n"; }
if ($id ne "") { print $outputFile $id."\n"; }
if ($encoding ne "") { print $outputFile $encoding."\n"; }
if ($httpMethod ne "") { print $outputFile $httpMethod."\n"; }
if ($contentType ne "") { print $outputFile $contentType."\n"; }
if ($headers ne "") { print $outputFile $headers."\n"; }
if ($payload ne "") { print $outputFile $payload."\n"; }
if ($responseCode ne "") { print $outputFile $responseCode."\n"; }
if ($theAdress ne "") { print $outputFile $theAdress."\n"; }
if ($others ne "") { print $outputFile $others."\n"; }
close $outputFile;
print "Finished Processing ".$s."\n";
} else {
print $s." is older than one day\n";
}
}
Can you please help me ? Perl is turning me creasy

Remove bunch of if-statements and change your forloop as following:
my $buf = q{};
my $last = q{};
my $sep_count = 0;
my $line_number = 0;
foreach my $line (#lines) {
# remove CRLF, chomp only eliminate LF
$line =~ s/\R+//;
$line_number++;
if ($line =~ /^-+$/) {
# if the line is a separator
$sep_count++;
if ($sep_count & 1) {
# begin sep ($sep_count is an odd number)
$buf = $last;
}
else {
# end sep ($sep_count is an even number)
print {$outputFile} "$buf\n";
}
}
else {
if ($sep_count & 1) {
$buf .= ' | ' . $line;
}
else {
# flush $last except for the first line
print {$outputFile} "$last\n" if $line_number != 1;
}
# keep last line (INFO...) to concat
$last = $line;
}
}
print {$outputFile} "$last\n";

Your list of strings are just values that must appear in a line of the input file for it to be included in the output. There is no need to store lines in different variables according to which criterion it matched
This program appears to do what you need. It builds a regular expression from the list of strings so that they can all be tested in a single match. The lines to be printed are accumulated in array #output and printed to the output file when the whole input file has been processed
Note that I've used rel2abs to append a file name to a directory. It takes account of several cases that simple string concatenation doesn't allow for, as well as making the code clearer
I haven't been able to test this except to make sure that it compiles
#!/usr/bin/perl
use strict;
use warnings;
use File::Basename;
use Time::Piece;
use File::Spec::Functions 'rel2abs';
if ( #ARGV != 2 ) {
die "\nUsage: Modif_Log.pl input_dir output_dir\n";
}
my ( $input_dir, $output_dir ) = #ARGV;
my $today = localtime->strftime('%d%m');
my #liste = glob rel2abs( '*.log*', $input_dir );
my #wanted = (
qr/Inbound/i,
qr/Outbound/i,
qr/^ID:/,
qr/^Response-Code:/,
qr/^Encoding :/,
qr/^Http-Method:/,
qr/^Content-Type:/,
qr/^Headers:/,
qr/^Payload:/,
qr/^Address:/,
);
my $wanted = join '|', #wanted;
$wanted = qr/(?:$wanted)/;
for my $input_file ( #liste ) {
unless ( -M $input_file < 1 and $input_file =~ $today ) {
warn qq{"$input_file" is older than one day\n};
next;
}
warn qq{Processing "$input_file"\n};
open my $in_fh, '<', $input_file die qq{Unable to open "$input_file" for input: $!};
my #output;
while ( <$fh> ) {
next unless /$wanted/;
chomp;
push #output, $_;
}
my $output_file_name = basename($input_file);
$output_file_name =~ s/_[0-9]{6}//;
my $output_file = rel2abs($output_file_name, $output_dir);
open my $out_fh, '>', $output_file
or die qq{Unable to open "$output_file" for output: $!};
print $out_fh join(' | ', #output), "\n";
warn qq{Finished Processing "$input_file"\n};
}

Related

how to display the hash value from my sample data

I'm learning perl at the moment, i wanted to ask help to answer this exercise.
My objective is to display the hash value of PartID 1,2,3
the sample output is displaying lot, wafer, program, version, testnames, testnumbers, hilimit, lolimit and partid values only.
sample data
lot=lot123
wafer=1
program=prgtest
version=1
Testnames,T1,T2,T3
Testnumbers,1,2,3
Hilimit,5,6,7
Lolimit,1,2,3
PartID,,,,
1,3,0,5
2,4,3,2
3,5,6,3
This is my code:
#!/usr/bin/perl
use strict;
use Getopt::Long;
my $file = "";
GetOptions ("infile=s" => \$file ) or die("Error in command line arguments\n");
my $lotid = "";
open(DATA, $file) or die "Couldn't open file $file";
while(my $line = <DATA>) {
#print "$line";
if ( $line =~ /^lot=/ ) {
#print "$line \n";
my ($dump, $lotid) = split /=/, $line;
print "$lotid\n";
}
elsif ($line =~ /^program=/ ) {
my ($dump, $progid) = split /=/, $line;
print "$progid \n";
}
elsif ($line =~ /^wafer=/ ) {
my ($dump, $waferid) = split /=/, $line;
print "$waferid \n";
}
elsif ($line =~ /^version=/ ) {
my ($dump, $verid) = split /=/, $line;
print "$verid \n";
}
elsif ($line =~ /^testnames/i) {
my ($dump, #arr) = split /\,/, $line;
foreach my $e (#arr) {
print $e, "\n";
}
}
elsif ($line =~ /^testnumbers/i) {
my ($dump, #arr1) = split /\,/, $line;
foreach my $e1 (#arr1) {
print $e1, "\n";
}
}
elsif ($line =~ /^hilimit/i) {
my ($dump, #arr2) = split /\,/, $line;
foreach my $e2 (#arr2) {
print $e2, "\n";
}
}
elsif ($line =~ /^lolimit/i) {
my ($dump, #arr3) = split /\,/, $line;
foreach my $e3 (#arr3) {
print $e3, "\n";
}
}
}
Kindly help add to my code to display Partid 1,2,3 hash.
So I've rewritten your code a little to use a few more modern Perl idioms (along with some comments to explain what I've done). The bit I've added is near the bottom.
#!/usr/bin/perl
use strict;
# Added 'warnings' which you should always use
use warnings;
# Use say() instead of print()
use feature 'say';
use Getopt::Long;
my $file = "";
GetOptions ("infile=s" => \$file)
or die ("Error in command line arguments\n");
# Use a lexical variable for a filehandle.
# Use the (safer) 3-argument version of open().
# Add $! to the error message.
open(my $fh, '<', $file) or die "Couldn't open file $file: $!";
# Read each record into $_ - which makes the following code simpler
while (<$fh>) {
# Match on $_
if ( /^lot=/ ) {
# Use "undef" instead of a $dump variable.
# split() works on $_ by default.
my (undef, $lotid) = split /=/;
# Use say() instead of print() - less punctuation :-)
say $lotid;
}
elsif ( /^program=/ ) {
my (undef, $progid) = split /=/;
say $progid;
}
elsif ( /^wafer=/ ) {
my (undef, $waferid) = split /=/;
say $waferid;
}
elsif ( /^version=/ ) {
my (undef, $verid) = split /=/;
say $verid;
}
elsif ( /^testnames/i) {
my (undef, #arr) = split /\,/;
# Changed all of these similar pieces of code
# to use the same variable names. As they are
# defined in different code blocks, they are
# completely separate variables.
foreach my $e (#arr) {
say $e;
}
}
elsif ( /^testnumbers/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
elsif ( /^hilimit/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
elsif ( /^lolimit/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
# And here's the new bit.
# If we're on the "partid" line, then read the next
# three lines, split each one and print the first
# element from the list returned by split().
elsif ( /^partid/i) {
say +(split /,/, <$fh>)[0] for 1 .. 3;
}
}
Update: By the way, there are no hashes anywhere in this code :-)
Update 2: I've just realised that you only have three different ways to process the data. So you can simplify your code drastically by using slightly more complex regexes.
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
use Getopt::Long;
my $file = "";
GetOptions ("infile=s" => \$file)
or die ("Error in command line arguments\n");
open(my $fh, '<', $file) or die "Couldn't open file $file: $!";
while (<$fh>) {
# Single value - just print it.
if ( /^(?:lot|program|wafer|version)=/ ) {
my (undef, $value) = split /=/;
say $value;
}
# List of values - split and print.
elsif ( /^(?:testnames|testnumbers|hilimit|lolimit)/i) {
my (undef, #arr) = split /\,/;
foreach my $e (#arr) {
say $e;
}
}
# Extract values from following lines.
elsif ( /^partid/i) {
say +(split /,/, <$fh>)[0] for 1 .. 3;
}
}

Perl output format

I'm reading a log file and grouping it based on the 'Program' name and in turn its ID.
LOG FILE
------------------------------------------
DEV: COM-1258
Program:Testing
Reviewer:Jackie
Description:New Entries
rev:r145201
------------------------------------------
QA: COM-9696
Program:Testing
Reviewer:Poikla
Description:Some random changes
rev:r112356
------------------------------------------
JIRA: COM-1234
Program:Development
Reviewer:John Wick
Description:Genral fix
rev:r345676
------------------------------------------
JIRA:COM-1234
Program:Development
Reviewer:None
Description:Updating Received
rev:r909276
------------------------------------------
JIRA: COM-6789
Program:Testing
Reviewer:Balise Mat
Description:Audited
rev:r876391
------------------------------------------
JIRA: COM-8585
Program:Testing
Reviewer:Gold frt
Description: yet to be reviewed
rev:r565639
The code I have,
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Terse = 1;
my $file = "log.txt";
open FH, $file or die "Couldn't open file: [$!]\n";
my $data = {};
my $hash = {};
while (<FH>)
{
my $line = $_;
chomp $line;
if ($line =~ m/(-){2,}/)
{
my $program = $hash->{Program} || '';
my $jira = $hash->{JIRA} || $hash->{QA} || $hash->{DEV} ||
+'';
if ($program && $jira)
{
push #{$data->{$program}{$jira}}, $hash;
$hash = {};
}
}
else
{
if ($line =~ m/:/)
{
my ($key, $value) = split /:\s*/, $line;
$hash->{$key} = $value;
}
elsif ($line =~ m#/# && exists $hash->{Files})
{
$hash->{Files} .= "\n$line";
}
}
}
print 'data = ' . Dumper($data);
foreach my $prg (sort keys %{$data})
{
print "===========================================================
+=\n";
print " PROGRAM : $prg
+ \n";
print "===========================================================
+=\n";
foreach my $jira (sort keys %{$data->{$prg}})
{
print "******************\n";
print "JIRA ID : $jira\n";
print "******************\n";
foreach my $hash (#{$data->{$prg}{$jira}})
{
foreach my $key (keys %{$hash})
{
# print the data except Program and JIRA
next if $key =~ m/(Program|JIRA|DEV|QA)/;
print " $key => $hash->{$key}\n";
}
print "\n";
}
}
}
I have a requirement to print the output in the below format and currently unable to do so with my logic, any ideas would be really helpful.
PROGRAM: Development
Change IDs:
1.JIRA
a.COM-1234
PROGRAM: Testing
Change IDs:
1.JIRA
a.COM-6789
b.COM-8585
2.QA
a.COM-9696
3.DEV
a.COM-1258
I would write this
use strict;
use warnings 'all';
use List::Util 'uniq';
my $file = 'log.txt';
open my $fh, $file or die "Couldn't open file: [$!]\n";
my #data;
{
my %item;
while ( <$fh> ) {
chomp;
if ( eof or /\-{2,}/ ) {
push #data, { %item } if keys %item;
%item = ();
}
else {
my ( $key, $value ) = split /\s*:\s*/;
next unless $value;
$item{$key} = $value;
$item{jira} = $key if grep { $key eq $_ } qw/ JIRA DEV QA /;
}
}
}
my %data;
{
for my $item ( #data ) {
my ($prog, $jira) = #{$item}{qw/ Program jira /};
push #{ $data{$prog}{$jira} }, $item->{$jira};
}
}
for my $prog ( sort keys %data ) {
printf "PROGRAM: %s\n", $prog;
print "Change IDs:\n";
my $n = 1;
for my $jira ( qw/ JIRA QA DEV / ) {
next unless my $codes = $data{$prog}{$jira};
printf "%d.%s\n", $n++, $jira;
my $l = 'a';
printf " %s.%s\n", $l++, $_ for sort(uniq(#$codes));
}
print "\n";
}
output
PROGRAM: Development
Change IDs:
1.JIRA
a.COM-1234
PROGRAM: Testing
Change IDs:
1.JIRA
a.COM-6789
b.COM-8585
2.QA
a.COM-9696
3.DEV
a.COM-1258
#!/usr/bin/perl -w
use strict;
use warnings;
use Data::Dumper;
my $file = 'test';
my $hash;
my $id_hash = ();
my $line_found = 0;
my $line_count = 1;
my $ID;
my $ID_num;
open (my $FH, '<', "$file") or warn $!;
while (my $line = <$FH> ) {
chomp($line);
if ( $line =~ m/------------------------------------------/){
$line_found = 1;
$line_count++;
next;
}
if ( $line_found ) {
$line =~ m/(.*?):(.*)/;
$ID = $1;
$ID_num = $2;
$line_found = 0;
}
if ( $line =~ m/Program:(.*)/ ) {
my $pro = $1;
push #{$hash->{$pro}->{$ID}}, ($ID_num) ;
}
$line_count++;
}
close $FH;
foreach my $pro (keys %$hash){
# print Dumper($pro);
print "PROGRAM:\t$pro\nChange IDs:\n";
foreach my $ids (keys $hash->{$pro}){
print "\t1. $ids\n";
foreach my $id (values $hash->{$pro}->{$ids}){
print "\t\ta. $id\n";
}
}
}
OUTPUT
PROGRAM: Testing
Change IDs:
1. QA
a. COM-9696
1. DEV
a. COM-1258
1. JIRA
a. COM-6789
a. COM-8585
PROGRAM: Development
Change IDs:
1. JIRA
a. COM-1234
a. COM-1234
Just change the output to your need!!

Perl Script is giving error uninialized varilable access

Code runs sometimes, sometimes gives error on linux host.
Need to check why has is not printing,
Error, messages: Use of uninitialized value in sprintf at ./fa_list.pl line 139, line
Can someone check, why I'm getting error?
use Getopt::Long;
my $sid = '9999';
my $Fa_VSan_Map = 'Fa_VSan_Map';
sub usage {
my $message = $_[0];
if (defined $message && length $message) {
$message .= "\n"
unless $message =~ /\n$/;
}
my $command = $0;
$command =~ s#^.*/##;
print STDERR (
$message,
"usage: $command -sid xxx -outf FA_Mapping\n" .
"Where -sid: is primary SID to show mappings.\n" .
" -outf: Output File prefix.\n" .
" -Reserved...\n"
);
die("\n")
}
GetOptions( 'sid=i' => \$sid, 'outf=s' => \$Fa_VSan_Map) or
usage("Invalid commmand line options.");
print($sid);
my $outf = "$Fa_VSan_Map$sid.csv";
my $outf1 = "Fa_VSan_Map1$sid.csv";
my ($mydir,$dir_port,$dir_port_wwpn,$FaWWPN);
my (%FA,%FAH,%FAC,%VSAN);
my ($wwpn,$host,$port,$fcid,$logged,$fab);
# 50:00:09:72:08:4b:05:89, => cdc02-core1-1.yyyyy.xxxx.com,CISCO,fc3/12,VS251,50:00:09:72:08:4b:05:89,,8,Active
# cdc02-core-1-2.yyyyy.xxxx.com,CISCO,fc1/29,VS251,50:00:09:73:00:1c:e1:1c,,8,Active
sub LoadVSAN()
{
my $vsanf = "VSAN$sid.csv";
print ($vsanf);
open (VSAN, "<", $vsanf) or die "Could not open $!";
while (<VSAN>) {
if (/Active/) {
my #array = split /,/;
print (#array);
my $key = $array[4];
$key =~ s/://g;
my #line_arranged = ($array[3],$array[2],$array[0],$array[6],$array[7]);
$VSAN{$key} = \#line_arranged;
print($key, ": ", #{$VSAN{$key}}, "\n");
}
}
close VSAN;
}
LoadVSAN;
# foreach my $key (%VSAN) {
# print(${VSAN{$key}}[0]); print("\n");
# ${$VSAN{$FaWwpn}}[0]
# }
open (OUT, ">", $outf) or die "Could not open $outf $!";
open( OUT1, ">",$outf1) or die "Could not open $outf1 $!";
my $sidtxt = "sidcfg.fa$sid.txt";
my $cmd = 'symcfg -sid ' . $sid . ' list -fa all -v > ' . $sidtxt;
system($cmd);
open ( SYM, "<" , $sidtxt ) or die "Could not open $sidtxt $!";
while ( <SYM>) {
chomp ;
if (/Director Identification:/) {
$mydir = $_;
$mydir =~ s/\s+Director Identification: //;
$mydir =~ s/FA-//;
}
elsif (/Director Port:/) {
$port = $_;
$port =~ s/\s+Director Port: //;
$dir_port = sprintf '%04d_%03s_%03d', int($sid), $mydir, int($port);
}
elsif (/WWN Port Name/) {
$wwpn = $_;
$wwpn =~ s/\s+WWN Port Name\s+: //;
$dir_port_wwpn = sprintf '%s,%s', $dir_port, $wwpn;
$FA{$dir_port} = $wwpn;
}
}
close(SYM);
$sidtxt = 'symaccess.ll.' . $sid . '.txt';
$cmd = 'symaccess -sid ' . $sid . ' list logins > ' . $sidtxt;
#print($cmd);
system($cmd );
open ( SYM, "<" , $sidtxt ) or die "Could not open $sidtxt $!";
while ( <SYM>) {
chomp ;
if (/Director Identification/) {
$mydir = $_;
$mydir =~ s/Director Identification\s+:\s+//;
$mydir =~ s/FA-//;
}
elsif (/Director Port/) {
$port = $_;
$port =~ s/Director Port\s+:\s+//;
$dir_port = sprintf '%04d_%03s_%03d', int($sid),$mydir, int($port);
}
elsif (/Fibre/) {
($wwpn,undef, $host,$port,$fcid,$logged,$fab) = split;
my $host_port;
if( lc($host) eq 'null') {
$host_port = substr($wwpn,10,6);
}
else {
$host_port = $host . '_' . $port . '_' . substr($wwpn,12,4);
}
if (exists $FAH{$dir_port}) {
$FAH{$dir_port} .= ':' . $host_port;
$FAC{$dir_port} += 1;
} else {
$FAH{$dir_port} = $host_port;
$FAC{$dir_port} = 1;
}
if ( $logged eq "Yes") {
my $line = sprintf ( '%s,%s,%s,%s', $dir_port, $FA{$dir_port}, $host_port, $fcid);
print (OUT1 $line . "\n");
}
}
}
print OUT "Fa,FaWWPN,VSan,HostCount,PERCENT_BUSY,HostNames\n";
my $PERCENT_BUSY=10.0;
foreach my $fa ( keys %FAC) {
my $formula = '=VLOOKUP(B2,Sheet1!A$2:F$600,6,FALSE)';
my $FaWwpn = lc($FA{$fa});
#print($FaWwpn . ": " . $VSAN{$FaWwpn}->[0] . "\n" );
## Below is line 139
my $line = sprintf ('%s,%s,%s,%s,%3.2f,%s', $fa, $FaWwpn, ${$VSAN{$FaWwpn}}[0], $FAC{$fa}, $PERCENT_BUSY, lc($FAH{$fa}));
print OUT $line . "\n";
#print $line . "\n";
}
close(SYM);
I believe there is problem with lc($FAH{$fa}).
have you checked you initialized $FAH in your code ?

stockholm to fasta format - include accession id in every header

Hello I've multiple sequences in stockholm format, at the top of every alignment there is a accession ID, for ex: '#=GF AC PF00406' and '//' --> this is the end of the alignment. When I'm converting the stockholm format to fasta format I need PF00406 in the header of every sequence of the particular alignment. Some times there will be multiple stockholm alignments in one file. I tried to modify the following perl script, it gave me bizarre results, any help will be greatly appreciated.
my $columns = 60;
my $gapped = 0;
my $progname = $0;
$progname =~ s/^.*?([^\/]+)$/$1/;
my $usage = "Usage: $progname [<Stockholm file(s)>]\n";
$usage .= " [-h] print this help message\n";
$usage .= " [-g] write gapped FASTA output\n";
$usage .= " [-s] sort sequences by name\n";
$usage .= " [-c <cols>] number of columns for FASTA output (default is $columns)\n";
# parse cmd-line opts
my #argv;
while (#ARGV) {
my $arg = shift;
if ($arg eq "-h") {
die $usage;
} elsif ($arg eq "-g") {
$gapped = 1;
} elsif ($arg eq "-s"){
$sorted = 1;
} elsif ($arg eq "-c") {
defined ($columns = shift) or die $usage;
} else {
push #argv, $arg;
}
}
#ARGV = #argv;
my %seq;
while (<>) {
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
#seq =~ s/[\.\-]//g unless $gapped;
$seq{$name} .= $seq;
}
}
printseq();
sub printseq {
if($sorted){
foreach $key (sort keys %seq){
print ">$key\n";
for (my $i = 0; $i < length $seq{$key}; $i += $columns){
print substr($seq{$key}, $i, $columns), "\n";
}
}
} else{
while (my ($name, $seq) = each %seq) {
print ">$name\n";
for (my $i = 0; $i < length $seq; $i += $columns) {
print substr ($seq, $i, $columns), "\n";
}
}
}
%seq = ();
}
Depending on the how much variation there is in the line with the accessionID, you might need to modify the regex, but this works for your example file
my %seq;
my $aln;
while (<>) {
if ($_ =~ /#=GF AC (\w+)/) {
$aln = $1;
}
elsif ($_ =~ /^\s*\/\/\s*$/){
$aln = '';
}
next unless /\S/;
next if /^\s*\#/;
if (/^\s*\/\//) { printseq() }
else {
chomp;
my ($name, $seq) = split;
$name = $name . ' ' . $aln;
$seq{$name} .= $seq;
}
}
printseq();

Extracting multiple lines of record/data using a subroutine or functions

Can you show me how to create a subroutine or function using this code?
Basically I want to make my code into a subroutine so I'll be able to re-use it without making my script too long.
Here is my script:
#!/usr/local/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Carp qw(croak);
my #fields;
my ($tmp_var, $rec_type, $country, $header, $Combline, $records, $line);
my $filename = 'data5.txt';
open (my $input_fh, '<', $filename ) or croak "Can't open $filename: $!";
open my $OUTPUTA, ">", 'drp1.txt' or die $!;
open my $OUTPUTB, ">", 'drp2.txt' or die $!;
while (<$input_fh>) {
$line = _trim($_);
#fields = split (/\|/, $line);
$rec_type = $fields[0];
$country = $fields[1];
my $string = substr $fields[1], 0, 1;
$header = $line if(/^INVHDR/);
if ($rec_type eq 'INVDET') {
if ($string eq 'I') {
$records = $header . $line;
print $OUTPUTA $records, scalar <$input_fh>;
}
else {
$records = $header . $line;
print $OUTPUTB $records, scalar <$input_fh>;
}
}
}
close $OUTPUTA or die $!;
close $OUTPUTB or die $!;
sub _trim {
my $word = shift;
if ( $word ) {
$word =~ s/\s*\|/\|/g; #remove trailing spaces
$word =~ s/"//g; #remove double quotes
}
return $word;
}
This is the part of the script that I wanted to put in a subroutine or function:
$line = _trim($_);
#fields = split (/\|/, $line);
$rec_type = $fields[0];
$country = $fields[1];
my $string = substr $fields[1], 0, 1;
$header = $line if (/^INVHDR/);
if ($rec_type eq 'INVDET') {
if ($string eq 'I') {
$records = $header . $line;
print $OUTPUTA $records, scalar <$input_fh>;
}
else {
$records = $header . $line;
print $OUTPUTB $records, scalar <$input_fh>;
}
}
I would suggest breaking it out a little differently and expand on your _trim function, turning it into a parse function:
use strict;
use warnings;
open( my $input_fh, '<', 'data5.txt' ) or die "Can't open $filename: $!";
open( my $OUTPUTA, '>', 'drp1.txt' ) or die $!;
open( my $OUTPUTB, '>', 'drp2.txt' ) or die $!;
my $header = '';
while (<$input_fh>) {
if ($_ =~ /^INVHDR/) {
$header = $_;
}
if ($_ =~ /^INVDET/) {
my #data = parse($_);
my $line = $header . join('|', #data);
# scalar <$input_fh> is almost certainly not doing what you expect,
# though I'm not sure what you're try to accomplish with it
if ( $data[1] =~ /^I/ ) {
print $OUTPUTA $line;
} else {
print $OUTPUTB $line;
}
}
}
sub parse {
my $input = shift || return;
my $input =~ s/"//g; # remove double quotes
# Here I've combined the removal of trailing spaces with the split.
my #fields = split( m{\s*\|}, $input );
return #fields;
}