formatting output data to an excel file - perl

This program gets numeric values from the web for each of the values in my #values array
I want these values to be printed out in a table which looks like
il9 il8 il7
2012 v1 b1
2011 v2 b2
2010 v3 b3
.
.
2000 v12 b12
where v1 .. v12 are values for the first variable in #values etc. here is my program please help me structure it. Is there an escape character that could take me back to the first line of the program in perl
thanks
#!/usr/bin/perl -w
use strict;
use LWP::UserAgent;
use URI;
my $browser = LWP::UserAgent->new;
$browser->timeout(10);
$browser->env_proxy;
open(OUT, ">out");
my $i = 2013;
while ($i-- > 2000){print OUT "$i\n"}
my $a = 2013 ;
my $base = 'http://webtools.mf.uni-lj.si/public/summarisenumbers.php';
my #values = ('il9', 'il8', 'il6' );
foreach my $value (#values) {
print OUT "$value \n"
while ($a-- > 2000){
my $b = $a + 1;
my $c = $b + 1;
my $query = '?query=('.$value.')'.$a.'[dp] NOT '.$b.'[dp] NOT '.$c.'[dp]';
my $add = $base.$query;
#my $url = URI->new($add);
#my $response = $browser->get($url);
#if($response->is_success) {print OUT $response->decoded_content;}
#else {die $response->status_line};
print OUT "$query\n";
} $a = 2013; print OUT
}
close(OUT);

Pay more attention to formatting/indentation and variable naming - it will help you a lot.
#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
my $base_url = 'http://webtools.mf.uni-lj.si/public/summarisenumbers.php';
my #values = ( 'il9', 'il8', 'il6' );
my $stat_data = {};
my $browser = LWP::UserAgent->new;
$browser->timeout(10);
$browser->env_proxy;
for my $value ( #values ) {
for my $year ( 2010 .. 2013 ) {
my $query = '?query=(' . $value . ')' . $year .'[dp] NOT ' . ($year+1) . '[dp] NOT ' . ($year+2) .'[dp]';
my $url = "$base_url$query";
my $response = $browser->get( $url );
if( $response->is_success ) {
## store the fetched content in a hash structure
$stat_data->{$year}->{$value} = $response->decoded_content;
}
else {
die $response->status_line;
}
}
}
## print the header
print "\t", join( "\t", #values ), "\n";
## print the data by the years in reverse order
for my $year ( reverse sort keys %{$stat_data} ) {
print $year;
for my $value ( #values ) {
print "\t", $stat_data->{$year}->{$value};
}
print "\n";
}

Related

Unable to retrieve multiple column values from file in Perl

I have a file with following contents:
TIME
DATE TIME DAY
191227 055526 FRI
RC DEV SERVER
RC1 SERVER1
RC2 SERVER2
RC3 SERVER3
END
I am fetching argument values from this file, say if I pass DATE as an argument to the script I am getting corresponding value of the DATE. i.e., 191227
When I pass multiple arguments say DATE, DAY I should get values:
DATE=191227
DAY=FRI
But what I am getting here is:
DATE=191227
DAY=NULL
And if I pass RC as an argument I should get:
RC=RC1,RC2,RC3
The script looks below:
#!/usr/bin/perl
use strict;
use Data::Dumper;
print Dumper(\#ARGV);
foreach my $name(#ARGV){
print "NAME:$name\n";
my ($result, $start, $stop, $width) = "";
while(my $head = <STDIN>)
{
if( $head =~ (m/\b$name\b/g))
{
$start = (pos $head) - length($name);
$stop = (pos $head);
my $line = <STDIN>;
pos $head = $stop+1;
$head =~ (m/\b/g);
$width = (pos $head) - $start;
$result = substr($line,$start,$width);
}
}
$result =~ s/^\s*(.*?)\s*$/$1/;
print "$name=";
$result = "NULL" if ( $result eq "" );
print "$result\n";
}
Can someone please help me to get values if I pass multiple arguments also if suppose argument value have data in multiple lines it should be printed comma separated values (ex: for RC, RC=RC1,RC2,RC3).
Here is an example, assuming the input file is named file.txt and the values are starting at the same horizontal position as the keys:
package Main;
use feature qw(say);
use strict;
use warnings;
use Data::Dumper qw(Dumper);
my $self = Main->new(fn => 'file.txt', params => [#ARGV]);
$self->read_file();
$self->print_values();
sub read_file {
my ( $self ) = #_;
my $fn = $self->{fn};
open ( my $fh, '<', $fn ) or die "Could not open file '$fn': $!";
local $/ = ""; #Paragraph mode
my #blocks = <$fh>;
close $fh;
$self->{values} = {};
for my $block (#blocks) {
$self->parse_block( $block );
}
}
sub parse_block {
my ( $self, $block ) = #_;
my #lines = split /\n/, $block;
my $header = shift #lines;
my ($keys, $startpos) = $self->get_block_keys( $header );
for my $line ( #lines ) {
for my $key (#$keys) {
my $startpos = $startpos->{$key};
my $str = substr $line, $startpos;
my ( $value ) = $str =~ /^(\S+)/;
if ( defined $value ) {
push #{$self->{values}{$key}}, $value;
}
}
}
}
sub get_block_keys {
my ( $self, $header ) = #_;
my $values = $self->{values};
my #keys;
my %spos;
while ($header =~ /(\S+)/g) {
my $key = $1;
my $startpos = $-[1];
$spos{$key} = $startpos;
push #keys, $key;
}
for my $key (#keys) {
if ( !(exists $values->{$key}) ) {
$values->{$key} = [];
}
}
return (\#keys, \%spos);
}
sub new {
my ( $class, %args ) = #_;
return bless \%args, $class;
}
sub print_values {
my ( $self ) = #_;
my $values = $self->{values};
for my $key (#{$self->{params}}) {
my $value = "<NO VALUE FOUND>";
if ( exists $values->{$key} ) {
$value = join ",", #{$values->{$key}};
}
say "$key=$value";
}
}
Edit
If you want to read the file from STDIN instead, change the following part of the code:
# [...]
my $self = Main->new(params => [#ARGV]);
$self->read_file();
$self->print_values();
sub read_file {
my ( $self ) = #_;
local $/ = ""; #Paragraph mode
my #blocks = <STDIN>;
$self->{values} = {};
for my $block (#blocks) {
$self->parse_block( $block );
}
}
# [...]

Perl output format

I'm reading a log file and grouping it based on the 'Program' name and in turn its ID.
LOG FILE
------------------------------------------
DEV: COM-1258
Program:Testing
Reviewer:Jackie
Description:New Entries
rev:r145201
------------------------------------------
QA: COM-9696
Program:Testing
Reviewer:Poikla
Description:Some random changes
rev:r112356
------------------------------------------
JIRA: COM-1234
Program:Development
Reviewer:John Wick
Description:Genral fix
rev:r345676
------------------------------------------
JIRA:COM-1234
Program:Development
Reviewer:None
Description:Updating Received
rev:r909276
------------------------------------------
JIRA: COM-6789
Program:Testing
Reviewer:Balise Mat
Description:Audited
rev:r876391
------------------------------------------
JIRA: COM-8585
Program:Testing
Reviewer:Gold frt
Description: yet to be reviewed
rev:r565639
The code I have,
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Terse = 1;
my $file = "log.txt";
open FH, $file or die "Couldn't open file: [$!]\n";
my $data = {};
my $hash = {};
while (<FH>)
{
my $line = $_;
chomp $line;
if ($line =~ m/(-){2,}/)
{
my $program = $hash->{Program} || '';
my $jira = $hash->{JIRA} || $hash->{QA} || $hash->{DEV} ||
+'';
if ($program && $jira)
{
push #{$data->{$program}{$jira}}, $hash;
$hash = {};
}
}
else
{
if ($line =~ m/:/)
{
my ($key, $value) = split /:\s*/, $line;
$hash->{$key} = $value;
}
elsif ($line =~ m#/# && exists $hash->{Files})
{
$hash->{Files} .= "\n$line";
}
}
}
print 'data = ' . Dumper($data);
foreach my $prg (sort keys %{$data})
{
print "===========================================================
+=\n";
print " PROGRAM : $prg
+ \n";
print "===========================================================
+=\n";
foreach my $jira (sort keys %{$data->{$prg}})
{
print "******************\n";
print "JIRA ID : $jira\n";
print "******************\n";
foreach my $hash (#{$data->{$prg}{$jira}})
{
foreach my $key (keys %{$hash})
{
# print the data except Program and JIRA
next if $key =~ m/(Program|JIRA|DEV|QA)/;
print " $key => $hash->{$key}\n";
}
print "\n";
}
}
}
I have a requirement to print the output in the below format and currently unable to do so with my logic, any ideas would be really helpful.
PROGRAM: Development
Change IDs:
1.JIRA
a.COM-1234
PROGRAM: Testing
Change IDs:
1.JIRA
a.COM-6789
b.COM-8585
2.QA
a.COM-9696
3.DEV
a.COM-1258
I would write this
use strict;
use warnings 'all';
use List::Util 'uniq';
my $file = 'log.txt';
open my $fh, $file or die "Couldn't open file: [$!]\n";
my #data;
{
my %item;
while ( <$fh> ) {
chomp;
if ( eof or /\-{2,}/ ) {
push #data, { %item } if keys %item;
%item = ();
}
else {
my ( $key, $value ) = split /\s*:\s*/;
next unless $value;
$item{$key} = $value;
$item{jira} = $key if grep { $key eq $_ } qw/ JIRA DEV QA /;
}
}
}
my %data;
{
for my $item ( #data ) {
my ($prog, $jira) = #{$item}{qw/ Program jira /};
push #{ $data{$prog}{$jira} }, $item->{$jira};
}
}
for my $prog ( sort keys %data ) {
printf "PROGRAM: %s\n", $prog;
print "Change IDs:\n";
my $n = 1;
for my $jira ( qw/ JIRA QA DEV / ) {
next unless my $codes = $data{$prog}{$jira};
printf "%d.%s\n", $n++, $jira;
my $l = 'a';
printf " %s.%s\n", $l++, $_ for sort(uniq(#$codes));
}
print "\n";
}
output
PROGRAM: Development
Change IDs:
1.JIRA
a.COM-1234
PROGRAM: Testing
Change IDs:
1.JIRA
a.COM-6789
b.COM-8585
2.QA
a.COM-9696
3.DEV
a.COM-1258
#!/usr/bin/perl -w
use strict;
use warnings;
use Data::Dumper;
my $file = 'test';
my $hash;
my $id_hash = ();
my $line_found = 0;
my $line_count = 1;
my $ID;
my $ID_num;
open (my $FH, '<', "$file") or warn $!;
while (my $line = <$FH> ) {
chomp($line);
if ( $line =~ m/------------------------------------------/){
$line_found = 1;
$line_count++;
next;
}
if ( $line_found ) {
$line =~ m/(.*?):(.*)/;
$ID = $1;
$ID_num = $2;
$line_found = 0;
}
if ( $line =~ m/Program:(.*)/ ) {
my $pro = $1;
push #{$hash->{$pro}->{$ID}}, ($ID_num) ;
}
$line_count++;
}
close $FH;
foreach my $pro (keys %$hash){
# print Dumper($pro);
print "PROGRAM:\t$pro\nChange IDs:\n";
foreach my $ids (keys $hash->{$pro}){
print "\t1. $ids\n";
foreach my $id (values $hash->{$pro}->{$ids}){
print "\t\ta. $id\n";
}
}
}
OUTPUT
PROGRAM: Testing
Change IDs:
1. QA
a. COM-9696
1. DEV
a. COM-1258
1. JIRA
a. COM-6789
a. COM-8585
PROGRAM: Development
Change IDs:
1. JIRA
a. COM-1234
a. COM-1234
Just change the output to your need!!

Printing all HTML Tables with certain string for multiple files (perl)

I am trying to print all the HTML tables containing the string "kcat" for each xml file in a directory but I am having some trouble. Note that each file in the directory (named kcat_tables) has at least one HTML table with kcat in it. I am running this program on an ubuntu virtual machine. Here is my code:
#!/usr/bin/perl
use warnings;
use strict;
use File::Slurp;
use Path::Iterator::Rule;
use HTML::TableExtract;
use utf8::all;
my #papers_dir_path = qw(/home/bob/kinase/kcat_tables);
my $rule = Path::Iterator::Rule->new;
$rule->name("*.nxml");
$rule->skip_dirs(".");
my $xml;
my $it = $rule->iter(#papers_dir_path);
while ( my $file = $it->() ) {
$xml = read_file($file);
my $te = HTML::TableExtract->new();
$te->parse($xml);
foreach my $ts ( $te->tables ) {
if ( $ts =~ /kcat/i ) {
print "Table (", join( ',', $ts->coords ), "):\n";
foreach my $row ( $ts->rows ) {
print join( ',', #$row ), "\n";
}
}
}
}
Any ideas on how I should fix this? Thanks in advance! Also, I am fairly new to the PERL language so a simple, comprehensible answer would be very much appreciated.
You cannot apply a regex to an object, as you do in:
if ( $ts =~ /kcat/i ) {
I'd suggest, parsing the tables in 'tree' mode. For this, you'd have to install two additional perl modules: HTML::TreeBuilder and HTML::ElementTable. Enable it like this:
use HTML::TableExtract 'tree';
Here's the fixed while loop:
while ( my $file = $it->() ) {
$xml = read_file($file);
my $te = HTML::TableExtract->new();
$te->parse($xml);
foreach my $ts ( $te->tables ) {
my $tree = $ts->tree or die $!;
if ( $tree->as_text =~ /kcat/i ) {
print "Table (", join( ',', $ts->coords ), "):\n";
# update 18.2.2015: pretty print the table
foreach my $row ($ts->rows) {
print join ' | ', map {sprintf "%22s", $_->as_text} #{$row};
print "\n";
# which is the same as
# foreach my $cell (#${$row}) { do something with $cell->as_text }
}
}
}
}
$tree is an HTML::ElementTable object. The code above works with your sample.

Renaming files using hash table in perl

I have made a perl code which is shown below. Here what I am trying to do is first get input from a text file consisting of a HTTP URL with a Title.
thus the first regex is the title and the second regex fetches the id from inside the URL.
All these values are inserted into the hash table %myfilenames().
So this hash table has key as the URL id, and value as the Title. Everything till here works fine, now I have a set of files on my computer which have the ID in their name which we extracted from the URL.
What I want to do is that if the ID is there in the hash table, then the files name should change to the value assigned to the ID. Now the output at the print statement in the last function is correct but I am unable to rename the files. I tried many things, but nothing works. Can someone help please.
example stuff:
URL: https://abc.com/789012 <--- ID
Value (new Title) : ABC
file name on computer = file-789012 <---- ID
new file name = ABC
My code:
use File::Slurp;
use File::Copy qw(move);
open( F, '<hadoop.txt' );
$key = '';
$value = '';
%myfilenames = ();
foreach (<F>) {
if ( $_ =~ /Lecture/ ) {
$value = $_;
}
if ( $_ =~ /https/ ) {
if ( $_ =~ /\d{6}/ ) {
$key = $&;
}
}
if ( !( $value eq '' || $key eq '' ) ) {
#print "$key\t\t$value";
$myfilenames{$key} = $value;
$key = '';
$value = '';
}
}
#while ( my ( $k, $v ) = each %myfilenames ) { print "$k $v\n"; }
my #files = read_dir 'C:\\inputfolder';
for (#files) {
if ( $_ =~ /\d{6}/ ) {
$oldval = $&;
}
$newval = $myfilenames{$oldval};
chomp($newval);
print $_ , "\t\t$newval" . "\n";
$key = '';
}
You probably didn't prepend the path to the file names. The following works for me (on a Linux box):
#!/usr/bin/perl
use warnings;
use strict;
use File::Slurp qw{ read_dir };
my $dir = 0;
mkdir $dir;
open my $FH, '>', "$dir/$_" for 123456, 234567;
my $key = my $value = q();
my %myfilenames = ();
for (<DATA>) {
chomp;
$value = $_ if /Lecture/;
$key = $1 if /https/ and /(\d{6})/;
if ($value ne q() and $key ne q()) {
$myfilenames{$key} = $value;
$key = $value = q();
}
}
my #files = read_dir($dir);
for (#files) {
if (/(\d{6})/) {
my $oldval = $1;
my $newval = $myfilenames{$oldval};
rename "$dir/$oldval", "$dir/$newval";
}
}
__DATA__
Lecture A1
https://123456
# Comment
Lecture A2
https://234567

Read MS Word table data row wise using win32:ole perl

I am new to win32:ole module in perl. I am trying to print MS word table data row wise on command prompt. But I am able to print only last row of the table. Can you please help me to solve this problem? Thanks in advance.
Below is my code:
#!/usr/bin/perl
use strict;
use warnings;
use File::Spec::Functions qw( catfile );
use Win32::OLE qw(in);
use Win32::OLE::Const 'Microsoft Word';
$Win32::OLE::Warn = 3;
my $word = get_word();
$word->{DisplayAlerts} = wdAlertsNone;
$word->{Visible} = 1;
my $doc = $word->{Documents}->Open('C:\\PerlScripts\\myTest.docx');
my $tables = $word->ActiveDocument->{'Tables'};
for my $table (in $tables)
{
my $tableText = $table->ConvertToText({ Separator => wdSeparateByTabs });
print "Table: ". $tableText->Text(). "\n";
}
$doc->Close(0);
sub get_word
{
my $word;
eval { $word = Win32::OLE->GetActiveObject('Word.Application');};
die "$#\n" if $#;
unless(defined $word)
{
$word = Win32::OLE->new('Word.Application', sub { $_[0]->Quit })
or die "Oops, cannot start Word: ", Win32::OLE->LastError, "\n";
}
return $word;
}
Not a perfect solution by any means but here's an advancement on the problem.
I used a string separator "\n\n" which produces the following output ...
Further hacking required :(
C:\StackOverflow>perl word.pl meTest.docx
Table: Header1
Header2
Header3
Header4
Row1-Cell1
Row1-Cell2
Row1-Cell3
Row1-Cell4
Row2-Cell1
Row2-Cell2
Row2-Cell3
Row2-Cell4
Row2-Cell5
Here's the code. I have commented out some other code in the tables loop that I used to hack on the data returned by $tableRange->{Text} Uncomment to experiment further.
#!/usr/bin/perl
use strict;
use warnings;
use File::Spec::Functions qw( catfile );
use Win32::OLE qw(in);
use Win32::OLE::Const 'Microsoft Word';
$Win32::OLE::Warn = 3;
my $word = get_word();
$word->{DisplayAlerts} = wdAlertsNone;
$word->{Visible} = 1;
my $doc = $word->{Documents}->Open('meTest.docx');
my $tables = $word->ActiveDocument->{'Tables'};
for my $table (in $tables)
{
my $tableRange = $table->ConvertToText({ Separator => "\n\n" });
print "Table: \n" . $tableRange->{Text}. "\n";
# foreach $word (split/\n/, $tableRange->{Text}) {
# print $word . "\n" ;
# # $userinput = <STDIN>;
# }
}
$doc->Close(0);
sub get_word
{
my $word;
eval { $word = Win32::OLE->GetActiveObject('Word.Application');};
die "$#\n" if $#;
unless(defined $word)
{
$word = Win32::OLE->new('Word.Application', sub { $_[0]->Quit })
or die "Oops, cannot start Word: ", Win32::OLE->LastError, "\n";
}
return $word;
}
Sorry I couldn't be of more help.
extract all the doc tables into a single xls file
sub doParseDoc {
my $msg = '' ;
my $ret = 1 ; # assume failure at the beginning ...
$msg = 'START --- doParseDoc' ;
$objLogger->LogDebugMsg( $msg );
$msg = 'using the following DocFile: "' . $DocFile . '"' ;
$objLogger->LogInfoMsg( $msg );
#-----------------------------------------------------------------------
#Using OLE + OLE constants for Variants and OLE enumeration for Enumerations
# Create a new Excel workbook
my $objWorkBook = Spreadsheet::WriteExcel->new("$DocFile" . '.xls');
# Add a worksheet
my $objWorkSheet = $objWorkBook->add_worksheet();
my $var1 = Win32::OLE::Variant->new(VT_BOOL, 'true');
Win32::OLE->Option(Warn => \&Carp::croak);
use constant true => 0;
# at this point you should have the Word application opened in UI with t
# the DocFile
# build the MS Word object during run-time
my $objMSWord = Win32::OLE->GetActiveObject('Word.Application')
or Win32::OLE->new('Word.Application', 'Quit');
# build the doc object during run-time
my $objDoc = $objMSWord->Documents->Open($DocFile)
or die "Could not open ", $DocFile, " Error:", Win32::OLE->LastError();
#Set the screen to Visible, so that you can see what is going on
$objMSWord->{'Visible'} = 1;
# try NOT printing directly to the file
#$objMSWord->ActiveDocument->SaveAs({Filename => 'AlteredTest.docx',
#FileFormat => wdFormatDocument});
my $tables = $objMSWord->ActiveDocument->Tables();
my $tableText = '' ;
my $xlsRow = 1 ;
for my $table (in $tables){
# extract the table text as a single string
#$tableText = $table->ConvertToText({ Separator => 'wdSeparateByTabs' });
# cheated those properties from here:
# https://msdn.microsoft.com/en-us/library/aa537149(v=office.11).aspx#officewordautomatingtablesdata_populateatablewithdata
my $RowsCount = $table->{'Rows'}->{'Count'} ;
my $ColsCount = $table->{'Columns'}->{'Count'} ;
# disgard the tables having different than 5 columns count
next unless ( $ColsCount == 5 ) ;
$msg = "Rows Count: $RowsCount " ;
$msg .= "Cols Count: $ColsCount " ;
$objLogger->LogDebugMsg ( $msg ) ;
#my $tableRange = $table->ConvertToText({ Separator => '##' });
# OBS !!! simple print WILL print to your doc file use Select ?!
#$objLogger->LogDebugMsg ( $tableRange . "\n" );
# skip the header row
foreach my $row ( 0..$RowsCount ) {
foreach my $col (0..$ColsCount) {
# nope ... $table->cell($row,$col)->->{'WrapText'} = 1 ;
# nope $table->cell($row,$col)->{'WordWrap'} = 1 ;
# so so $table->cell($row,$col)->WordWrap() ;
my $txt = '';
# well some 1% of the values are so nasty that we really give up on them ...
eval {
$txt = $table->cell($row,$col)->range->{'Text'};
#replace all the ctrl chars by space
$txt =~ s/\r/ /g ;
$txt =~ s/[^\040-\176]/ /g ;
# perform some cleansing - ColName<primary key>=> ColName
#$txt =~ s#^(.[a-zA-Z_0-9]*)(\<.*)#$1#g ;
# this will most probably brake your cmd ...
# $objLogger->LogDebugMsg ( "row: $row , col: $col with txt: $txt \n" ) ;
} or $txt = 'N/A' ;
# Write a formatted and unformatted string, row and column notation.
$objWorkSheet->write($xlsRow, $col, $txt);
} #eof foreach col
# we just want to dump all the tables into the one sheet
$xlsRow++ ;
} #eof foreach row
sleep 1 ;
} #eof foreach table
# close the opened in the UI document
$objMSWord->ActiveDocument->Close;
# OBS !!! now we are able to print
$objLogger->LogDebugMsg ( $tableText . "\n" );
# exit the whole Word application
$objMSWord->Quit;
return ( $ret , $msg ) ;
}
#eof sub doParseDoc
Use below lines of code
my $doc = $word->Documents->Open('C:\\PerlScripts\\myTest.docx');
my $tables = $word->{'Tables'};
instead of below code
my $doc = $word->{Documents}->Open('C:\\PerlScripts\\myTest.docx');
my $tables = $word->ActiveDocument->{'Tables'};
your problem get solved.