Program run under cron creates file in wrong path

Program run under cron creates file in wrong path - perl

I am trying to execute a Perl script using crontab.
Manually, the script works fine, but when I use cron, I get an error
/home/dev/test.csv : not readable
/home/dev/test.csv is a file generated by the script, but it is created as /home/test.csv and
I don't know how or why.
This is my crontab:
/3 * * * * /home/dev/metrique.pl &> /home/dev/output.txt
this is my code :
#!/sw/freetools/perl/5.8.8/Linux/rh50/x86_64/bin/perl
#use strict ;
#use warnings ;
use DBI ;
use DateTime ;
use Text::CSV;
use MIME::Lite;
my $Month = DateTime->now->subtract(months=>0)->truncate(to=>'month') ->strftime('%B') ;
my $Date = DateTime->now->subtract(months=>0)->truncate(to=>'month') ->strftime('%Y-%m') ;
$Date ="%".$Date."%" ;
my %info = (db => "ilico", host => "gnx5910.gnb.st.com", user => "ilicousr", pass => "" );
my $dbh = DBI->connect("DBI:mysql:$info{db};$info{host}", $info{user}, $info{pass});
my #record ;
my %Report;
my #other;
my #region = qw{EMEA AME ASIA INDIA Global-WAN};
my #scope = qw{wan lan specific};
my $total_weekly = 0;
my $total_usage = 0;
my $weekly = '2';
my $usage = '1';
my #top_user ;
my #array ;
my #user ;
my %hash = ();
my %sum = ();
my %LOGIN_W = ();
my %Groupe = ();
my %hash1 = ();
my %Nom_Complet = ();
my %NUMBER = ();
my $filename1="NBgenerated_Reports.csv";
my $filename2="Report_Scope.csv";
my $filename3 ="Top_10_Features.csv";
my $filename4 ="Top_10_Users.csv";
my $filename5 ="/sw/st/itcad/setup/shared_data/ldp_om.csv";
my $filename6 ="Report_Groupe.csv";
open(my $fh1, ">", $filename1) or die "cannot open < $filename1: $!";
open(my $fh2, ">", $filename2) or die "cannot open < $filename2: $!";
open(my $fh3, ">", $filename3) or die "cannot open < $filename3: $!";
open(my $fh4, ">", $filename4) or die "cannot open < $filename4: $!";
open(my $fh5, "<", $filename5) or die "cannot open < $filename5: $!";
open(my $fh6, ">", $filename6) or die "cannot open < $filename6: $!";
print $fh1 "Region; Usage_Report; Weekly; \n";
print $fh2 "Scope; NB; \n";
print $fh3 "Feature; NB; \n";
print $fh4 "User; NB_Report ;Groupe \n";
print $fh6 "Groupe; NB_Report \n";
#usage & weekly
my $sql = qq/SELECT COUNT( `Region`.`RegID` ) FROM `iLico_Log`, `Region` WHERE `iLico_Log`.`Date` LIKE ? AND `Region`.`RegID` = `iLico_Log`.`RegID` AND `iLico_Log`.`Type` = ?
AND `Region`.`RegName` LIKE ? / ;
foreach my $reg (#region){
foreach my $type ($weekly, $usage){
my $sth = $dbh->prepare($sql) or die ("unable to prepare");
$sth->execute(($Date, $type, $reg)) ;
#record = $sth -> fetchrow_array();
$Report{$reg}{$type}=$record[0];
}
}
foreach my $reg (keys %Report) {
$total_usage += $_ for($Report{$reg}{$usage});
$total_weekly += $_ for($Report{$reg}{$weekly});
print $fh1 "$reg ; $Report{$reg}{$usage}; $Report{$reg}{$weekly} \n";
}
print $fh1 "total; $total_usage; $total_weekly; \n";
#scope
my $SCOPE = qq/SELECT COUNT(logID ) FROM `iLico_Log` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`scope`= ?/;
foreach my $sc (#scope){
my $sth = $dbh->prepare($SCOPE) or die ("unable to prepare");
$sth->execute($Date, $sc) ;
my #record = $sth -> fetchrow_array();
print $fh2 "$sc; #record; \n";
}
#Top 10 features
my $TopFeatures = qq/SELECT `Feature`.`FeatName` , COUNT( * ) NB FROM `iLico_Log`, `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID` GROUP BY `Feature`.`FeatID` ORDER BY NB DESC LIMIT 10 /;
my $sth = $dbh->prepare($TopFeatures) or die ("unable to prepare");
$sth->execute($Date) ;
while( #record = $sth -> fetchrow_array())
{
print $fh3 "$record[0]; $record[1]; \n";
}
#other features number
my $Other = qq/SELECT COUNT(DISTINCT `iLico_Log`.`FeatID`) NB FROM `iLico_Log`, `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID`/;
$sth = $dbh->prepare($Other) or die ("unable to prepare");
$sth->execute($Date) ;
#record = $sth -> fetchrow_array();
$other[0] = $record[0] - 10 ;
print $fh3 "Other_features_number; #other \n";
#total usage of all and other features
my $TotalUsage =qq/SELECT COUNT( * ) SU FROM `iLico_Log` , `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID`/;
my $SUMTopFeatures = qq/select sum(NB) from (SELECT `Feature`.`FeatName` , COUNT( * ) NB FROM `iLico_Log`, `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID` GROUP BY `Feature`.`FeatID` ORDER BY NB DESC LIMIT 10) AS subquery /;
$sth = $dbh->prepare($TotalUsage) or die ("unable to prepare");
my $sth1 = $dbh->prepare($SUMTopFeatures) or die ("unable to prepare");
$sth->execute($Date) ;
$sth1->execute($Date) ;
#record = $sth -> fetchrow_array();
my #sum = $sth1 -> fetchrow_array();
$other[0] = $record[0] - $sum[0] ;
print $fh3 "Other_total_usage; #other";
#select login windows and groupe from file ldp_om.csv to be used in top_10_user and nomber Report/Groupe
while (<$fh5>) {
chomp;
my ($mail, $uid, $site, $grp, $dvs, $cnt, $ccost, $mng, $typ, $phone, $first, $last, $login, $cn) = split ';', lc($_), 14;
if (! exists $LOGIN_W{$login}) {
$LOGIN_W{$login} = $grp;
}
if (! exists $hash{$login}) {
$Groupe{$login} = $grp;
$Nom_Complet{$login} = $cn;
}
}
#top 10 user / Groups
my $TopUsers = qq/select ilicoUserLogin, COUNT(*) NB, Display from ilico_log I where Date like ? GROUP BY I.ilicoUserLogin ORDER BY NB DESC LIMIT 10/;
$sth = $dbh->prepare($TopUsers) or die ("unable to prepare");
$sth->execute($Date) ;
while( #top_user = $sth -> fetchrow_array())
{
$top_user[0] =~ s/\s+/ /g;
push (#array, lc($top_user[0]));
my $login = lc($top_user[0]);
$NUMBER{$login} = $top_user[1];
}
foreach my $login ( #array ){
$hash1{$login} = $Groupe{$login};
}
foreach my $login (sort {$NUMBER{$b} <=> $NUMBER{$a}} keys %hash1) {
my $grpe = uc($hash1{$login}) ;
my $name = ucfirst($Nom_Complet{$login});
print $fh4 "$name ; $NUMBER{$login} ; $grpe ; \n";
}
#Report/Groupe
my $Groupe = qq/select ilicoUserLogin, Count(*) NB from ilico_log I where Date like ? GROUP BY I.ilicoUserLogin ORDER BY NB DESC /;
$sth = $dbh->prepare($Groupe) or die ("unable to prepare");
$sth->execute($Date) ;
while( #user = $sth -> fetchrow_array())
{
$user[0] =~ s/\s+/ /g;
my $login = lc($user[0]);
$LOGIN_W{my $grp}{$login} = $user[1];
}
foreach my $login ( keys %LOGIN_W) {
if (defined( $login ) and $login ne '')
{
$sum{$LOGIN_W{$login}} += $LOGIN_W{my $var}{$login} ;
}
}
for my $key (sort {$sum{$b} <=> $sum{$a}} keys %sum) {
if ($sum{$key})
{
my $KEYS = uc($key);
print $fh6 "$KEYS; $sum{$key}; \n";
}
}
close $fh1;
close $fh2;
close $fh3;
close $fh4;
close $fh5;
close $fh6;
my $msg = MIME::Lite->new (
From => 'maha.mastouri#st.com',
To => 'maha.mastouri#st.com',
# Cc => 'maha.mastouri#st.com',
Subject => "iLico Mertique $Month",
Type => 'text/plain' ,
Path => '/home/dev/text'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/NBgenerated_Reports.csv',
Disposition => 'attachment',
Filename => 'NB_generated_Reports.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Top_10_Features.csv',
Disposition => 'attachment',
Filename => 'Top_10_Features.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Report_Scope.csv',
Disposition => 'attachment',
Filename => 'Report_Scope.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Top_10_Users.csv',
Disposition => 'attachment',
Filename => 'Top_10_Users.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Report_Groupe.csv',
Disposition => 'attachment',
Filename => 'Report_Groupe.csv'
);
$msg->send();

cron context is very different than a login shell. It has no env vars by default. It appears to me that your program depends on $ENV{USER} to build it's output (or input). Well, that env var is just going to be missing from cron. crontabs are executed by "cron" daemon and not as your login shell.
You can try to print the whole %ENV to somewhere like "/tmp/env.txt" just to see that it's basically an empty hash. It's the best if you can change the program not to depend on env var. You may also try to add them back right before the schedule line:
USER=dev
/3 * * * * /home/dev/metrique.pl &> /home/dev/output.txt
I must also notify you that after doing this, the env var USER becomes present for all the schedules below these 2 lines. Those env vars can also be inspected by ps e.
If an env var is required just to decide an input path, than it's as easy as getting the input path from #ARGV

It should run your .profile(or .bash_profile for bash) before executing the commands from cron.So, put it before your command in cron as shown. Similarly if there are any profile scripts which you run on login that is used in the perl script, those have to be included.
/3 * * * * . $HOME/.profile; /home/dev/metrique.pl &> /home/dev/output.txt

I solved the problem, crontab execute the script in the home "/home/httpldev/" (default), so I changed the execution path by following;
0 9 1 * * cd /home/httpldev/iLicoMetrics/ && /home/httpldev/iLicoMetrics/metrique.pl &> /dev/null .
Thank you a lot for your help.

Related

Sort all records according to modified date and time

I have some problem with my code. I have 1 GB records, in which I have to sort according to date and time. Records are look like :
TYP_journal article|KEY_1926000001|AED_|TIT_A Late Eighteenth-Century Purist|TPA_|GLO_Pronouncements of George Campbell and his contemporaries which time has set aside.|AUT_Bryan, W. F.|AUS_|AFF_|RES_|IED_|TOC_|FJN_Studies in Philology|ISN_0039-3738|ESN_|PLA_Chapel Hill, NC|URL_|DAT_1926|VOL_23|ISS_|EXT_358-370|CPP_|FSN_|ISN_|PLA_|SNO_|PUB_|IBZ_|PLA_|PYR_|PAG_|DAN_|DGI_|DGY_|OFP_|OFU_|FSS_|PDF_|LIB_|INO_|FAU_|INH_|IUR_|INU_|CDT_9/15/2003 3:12:28 PM|MDT_5/16/2017 9:18:40 AM|
I sort these records using MDT_5/16/2017 9:18:40 AM.
I used below technique:
I filter file, which have MDT_ or not (create two file with MDT_ and without MDT_).
For MDT data code:
open read_file, '<:encoding(UTF-8)', "$current_in/$file_name" || die "file found $!";
my #Dt_ModifiedDate = grep { $_ =~ /MDT_([0-9]+)\/([0-9]+)\/([0-9]+) ([0-9]+):([0-9]+):([0-9]+) ([A-Z]+)/i} <read_file>;
my $doc_MD = new IO::File(">$current_ou/output/$file_name_with_out_ext.ModifiedDate");
$doc_MD->binmode(':utf8');
print $doc_MD #Dt_ModifiedDate;
$doc_MD->close;
close (read_file);
For Un_MDT data code:
open read_file, '<:encoding(UTF-8)', "$current_in/$file_name" || die "file found $!";
my #un_ModifiedDate = grep { $_ !~ /MDT_([0-9]+)\/([0-9]+)\/([0-9]+) ([0-9]+):([0-9]+):([0-9]+) ([A-Z]+)/} <read_file>;
open read_file, '<:encoding(UTF-8)', "$current_in/$file_name" || die "file found $!";
my $doc_UMD = new IO::File(">$current_ou/output/$file_name_with_out_ext.unModifiedDate");
$doc_UMD->binmode(':utf8');
print $doc_UMD #un_ModifiedDate;
$doc_UMD->close;
close (read_file);
From MDT_ contains file, I collect all date and time and sort them and then unique.
#modi_date = map $_->[0],
sort { uc($a->[1]) cmp uc($b->[1]) } map { [ $_, toISO8601($_) ] } #modi_date;
#modi_date = reverse (#modi_date);
#modi_date = uniq (#modi_date);
according to sorted date and time I grep all records from MDT_file. And finally create final file.
my $doc1 = new IO::File(">$current_ou/output/$file_name_with_out_ext.sorted_data");
$doc1->binmode(':utf8');
foreach my $changes (#modi_date)
{
chomp($changes);
$Count_pro++;
#ab = grep (/$changes/, #all_data_with_time);
print $doc1 ("#ab\n");
$progress_bar->update($Count_pro);
}
$doc1->close;
But this process take more time. Is there any way to do in short time?

As you pointed out doing everything in memory is not an option on your machine. However, I do not see why you are first sorting the dates,
to then grep all records with that date, instead of sorting all of those records on the date.
I also suspect that if you were to go through the original file line by line and not in one huge map sort split map, you might save some memory,
but I'll leave that up to you to try - it would save you creating the files and then re-parsing things.
I would suggest doing 2 + 3 in one go:
Skip building #modi_date ( somewhere not visible to us :/ ).
my $mdt_fn = 'with_mdt.txt'; # <- whatever name you gave that file?
open ( my $fh, '< :encoding(UTF-8)', $mdt_fn )
or die "could not open file '$mdt_fn' to read: $!";
my $dt_parser = DateTime::Format::Strptime->new(
pattern => '%m/%d/%Y %r',
);
# get all records from file. To ensure we only need to parse the line once,
# store the datetime in a hashref.
my #records;
while ( my $line = <$fh> ){
push #records, {
dt => _dt_from_record($line),
record => $line,
};
}
# If you wanted to CMP rather than doing datetime comparison,
# adapt _dt_from_record and use 'cmp' instead of '<=>'
#records = sort{ $a->{dt} <=> $b->{dt} }#records;
open ( my $out_fh, '> :encoding(UTF-8)', 'sorted.txt') or
die "could not open file to write to: $!";
# Or reverse first if you want latest to oldest
print $out_fh $_->{record}."\n" for #records;
close $out_fh;
# I prefer using DateTime for this.
# Using a parser will alert me if some date was set, but cannot be parsed.
# If you want to spare yourself some additional time,
# why not store the parsed date in the file. However, I doubt this takes long.
sub _dt_from_record {
my $record = shift;
$record =~ /MDT_([^\|]+)/;
return $dt_parser->parse_datetime($1);
}

Finally i done it.
Complete code is :-
use warnings;
use strict;
use 5.010;
use Cwd;
binmode STDOUT, ":utf8";
use Date::Simple ('date', 'today');
use Time::Simple;
use Encode;
use Time::Piece;
use Win32::Console::ANSI;
use Term::ANSIScreen qw/:color /;
use File::Copy;
BEGIN {our $start_run = time();
my $Start = localtime;
print colored ['bold green'], ("\nstart time :- $Start\n");
}
##vairable
my $current_dir = getcwd();
my $current_in = $ARGV[0];
my $current_ou = $ARGV[1];
my #un_ext_file;
my #un_ext_file1;
my $current_data =today();
my $time = Time::Simple->new();
my $hour = $time->hours;
my $minute = $time->minutes;
my $second = $time->seconds;
my $current_time = "$hour"."-"."$minute"."-"."$second";
my $ren_folder = "output_"."$current_data"."_"."$current_time";
##check for output name DIR
opendir(DIR1, $current_ou);
my #current_ou_folder = readdir(DIR1);
closedir(DIR1);
foreach my $entry (#current_ou_folder)
{
if ($entry eq "output")
{
move "$current_ou/output" , "$current_ou/$ren_folder";
mkdir "$current_ou/output";
}
else
{
mkdir "$current_ou/output";
}
}
opendir(DIR, $current_in);
my #files_and_folder = readdir(DIR);
closedir(DIR);
foreach my $entry (#files_and_folder)
{
next if $entry eq '.' or $entry eq '..';
next if -d $entry;
push(#un_ext_file1, $entry);
}
##### check duplicate file name
my %seen;
my #file_test;
foreach my $file_name (#un_ext_file1)
{
if ($file_name =~ /(.*)\.([a-z]+)$/)
{
push (#file_test, $1);
}
else
{
push (#file_test, $file_name);
}
}
foreach my $string (#file_test)
{
next unless $seen{$string}++;
print "'$string' is duplicated.\n";
}
##collect all file from array
foreach my $file_name (#un_ext_file1)
{
my $REC_counter=0;
if ($file_name =~ /(.*)\.([a-z]+)$/) #####work for all extension
{
my $file_name_with_out_ext = $1;
my #modi_date_not_found;
eval{
#####read source file
#####First short file date wise (old date appear first then new date apper in last)
##### To get modifiedDate from the file
open read_file, '<:encoding(UTF-8)', "$current_in/$file_name" || die "file found $!";
my #Dt_ModifiedDate = grep { $_ =~ /MDT_([0-9]+)\/([0-9]+)\/([0-9]+) ([0-9]+):([0-9]+):([0-9]+) ([A-Z]+)/i} <read_file>;
my $doc_MD = new IO::File(">$current_ou/output/$file_name_with_out_ext.ModifiedDate");
$doc_MD->binmode(':utf8');
print $doc_MD #Dt_ModifiedDate;
$doc_MD->close;
close (read_file);
#Dt_ModifiedDate=undef; ##### free after use
print colored ['bold green'], ("\n\tAll ModifiedDate data Filtered\n\n");
##### To get un-modifiedDate from the file
open read_file, '<:encoding(UTF-8)', "$current_in/$file_name" || die "file found $!";
my #un_ModifiedDate = grep { $_ !~ /MDT_([0-9]+)\/([0-9]+)\/([0-9]+) ([0-9]+):([0-9]+):([0-9]+) ([A-Z]+)/} <read_file>;
my $doc_UMD = new IO::File(">$current_ou/output/$file_name_with_out_ext.unModifiedDate");
$doc_UMD->binmode(':utf8');
print $doc_UMD #un_ModifiedDate;
$doc_UMD->close;
close (read_file);
#un_ModifiedDate=undef; ##### free after use
print colored ['bold green'], ("\n\tAll unModifiedDate data Filtered\n\n\n\n");
##### Read ModifiedDate
open read_file_ModifiedDate, '<:encoding(UTF-8)', "$current_ou/output/$file_name_with_out_ext.ModifiedDate" || die "file found $!";
my #all_ModifiedDate = <read_file_ModifiedDate>;
close(read_file_ModifiedDate);
##### write in sotred_data file ModifiedDate after sorting all data.
my $doc1 = new IO::File(">$current_ou/output/$file_name_with_out_ext.sorted_data");
$doc1->binmode(':utf8');
print $doc1 sort { (toISO8601($a)) cmp (toISO8601($b)) } #all_ModifiedDate;
$doc1->close;
##### Read sorted_data and do in reverse order and then read unModifiedDate data and write in final file.
open read_file_ModifiedDate, '<:encoding(UTF-8)', "$current_ou/output/$file_name_with_out_ext.sorted_data" || die "file found $!";
my #all_sorted_data = <read_file_ModifiedDate>;
close(read_file_ModifiedDate);
#all_sorted_data = reverse (#all_sorted_data);
open read_file_ModifiedDate, '<:encoding(UTF-8)', "$current_ou/output/$file_name_with_out_ext.unModifiedDate" || die "file found $!";
my #all_unModifiedDate = <read_file_ModifiedDate>;
close(read_file_ModifiedDate);
my $doc_final = new IO::File(">$current_ou/output/$1.txt");
$doc_final->binmode(':utf8');
print $doc_final #all_sorted_data;
print $doc_final #all_unModifiedDate;
$doc_final->close;
unlink("$current_ou/output/$file_name_with_out_ext.ModifiedDate");
unlink("$current_ou/output/$file_name_with_out_ext.sorted_data");
unlink("$current_ou/output/$file_name_with_out_ext.unModifiedDate");
}
}
}
#####Process Complete.
say "\n\n---------------------------------------------";
print colored ['bold green'], ("\tProcess Completed\n");
say "---------------------------------------------\n";
get_time();
sub toISO8601
{
my $record = shift;
$record =~ /MDT_([^\|]+)/;
return(Time::Piece->strptime($1, '%m/%d/%Y %I:%M:%S %p')->datetime);
}
sub get_time
{
my $end_run = time();
my $run_time = $end_run - our $start_run;
#my $days = int($sec/(24*60*60));
my $hours = ($run_time/(60*60))%24;
my $mins =($run_time/60)%60;
my $secs = $run_time%60;
print "\nJob took";
print colored ['bold green'], (" $hours:$mins:$secs ");
print "to complete this process\n";
my $End = localtime;
print colored ['bold green'], ("\nEnd time :- $End\n");
}
All process is done with-in :-- 20 min.
specially i am V. very thank-full to #bytepusher.

Perl: How to print a random section (word definition) from a dictionary file

I want to print a random new word English in dictionary file in terminal Unix by Perl. I want to select and print a random line and 2 follow lines.
But my code doesn't complete this work.
Please help me to improve it.
An example of the output I wish:
#inspire: ....
ghk
lko...
Dictionary file:
#inspiration: mean....
abc def...
ghk lmn
...
#inspire: ....
ghk
lko...
#people: ...
...
The complete dictionary file is here anhviet109K.txt. It's about 14MB
My code:
use strict;
use warnings;
use File::Copy qw(copy move);
my $files = 'anhviet109K.txt';
my $fh;
my $linewanted = 16 + int( rand( 513796 - 16 ) );
# 513796: number of lines of file dic.txt
open( $fh, "<", $files ) or die "cannot open < $fh: $!";
my $del = " {2,}";
my $temp = 0;
my $count = 0;
while ( my $line = <$fh> ) {
if ( ( $line =~ "#" ) && ( $. > $linewanted ) ) {
$count = 4;
}
else {
next;
}
if ( $count > 0 ) {
print $line;
$count--;
}
else {
last;
}
}
close $fh;

Something like this, perhaps?
Your data has helped me to exclude the header entries in your dictionary file
This program finds the location of all of the entries (lines beginning with #) in the file, then chooses one at random and prints it
Tốt học tiếng Anh may mắn
use strict;
use warnings 'all';
use Fcntl ':seek';
use constant FILE => 'anhviet109K.txt';
open my $fh, '<', FILE or die qq{Unable to open "#{[FILE]}" for input: $!};
my #seek; # Locations of all the definitions
my $addr = tell $fh;
while ( <$fh> ) {
push #seek, $addr if /^\#(?!00-)/;
$addr = tell $fh;
}
my $choice = $seek[rand #seek];
seek $fh, $choice, SEEK_SET;
print scalar <$fh>;
while ( <$fh> ) {
last if /^\#/;
print;
}
output
#finesse /fi'nes/
* danh từ
- sự khéo léo, sự phân biệt tế nhị
- mưu mẹo, mánh khoé
* động từ
- dùng mưu đoạt (cái gì); dùng mưu đẩy (ai) làm gì; dùng mưu, dùng kế
=to finesse something away+ dùng mưu đoạt cái gì

A single pass approach:
use strict;
use warnings;
use autodie;
open my $fh, '<:utf8', 'anhviet109K.txt';
my $definition = '';
my $count;
my $select;
while (my $line = <$fh>) {
if ($line =~ /^#(?!00-)/) {
++$count;
$select = rand($count) < 1;
if ($select) {
$definition = $line;
}
}
elsif ($select) {
$definition .= $line;
}
}
# remove blank line that some entries have
$definition =~ s/^\s+\z//m;
binmode STDOUT, ':utf8';
print $definition;
This iterative random selection always selects the first item, has a 1/2 chance of replacing it with the second item, a 1/3 for the third, and so on.

Pass lines from 2 files to same subroutine

I'm in the process of learning how to use perl for genomics applications. I am trying to clean up paired end reads (1 forward, 1 reverse). These are stored in 2 files, but the lines match. What I'm having trouble doing is getting the relevant subroutines to read from the second file (the warnings I get are for uninitialized values).
These files are set up in 4 line blocks(fastq) where the first line is a run ID, 2nd is a sequence, 3rd is a "+", and the fourth holds quality values for the sequence in line 2.
I had no real trouble with this code when it was applied only for one file, but I think I'm misunderstanding how to handle multiple files.
Any guidance is much appreciated!
My warning in this scenario is as such : Use of uninitialized value $thisline in subtraction (-) at ./pairedendtrim.pl line 137, line 4.
#!/usr/bin/perl
#pairedendtrim.pl by AHU
use strict;
use warnings;
die "usage: readtrimmer.pl <file1> <file2> <nthreshold> " unless #ARGV == 3;
my $nthreshold = "$ARGV[2]";
open( my $fastq1, "<", "$ARGV[0]" );
open( my $fastq2, "<", "$ARGV[1]" );
my #forline;
my #revline;
while ( not eof $fastq2 and not eof $fastq1 ) {
chomp $fastq1;
chomp $fastq2;
$forline[0] = <$fastq1>;
$forline[1] = <$fastq1>;
$forline[2] = <$fastq1>;
$forline[3] = <$fastq1>;
$revline[0] = <$fastq2>;
$revline[1] = <$fastq2>;
$revline[2] = <$fastq2>;
$revline[3] = <$fastq2>;
my $ncheckfor = removen( $forline[1] );
my $ncheckrev = removen( $revline[1] );
my $fortest = 0;
if ( $ncheckfor =~ /ok/ ) { $fortest = 1 }
my $revtest = 0;
if ( $ncheckrev =~ /ok/ ) { $revtest = 1 }
if ( $fortest == 1 and $revtest == 1 ) { print "READ 1 AND READ 2" }
if ( $fortest == 1 and $revtest == 0 ) { print "Read 1 only" }
if ( $fortest == 0 and $revtest == 1 ) { print "READ 2 only" }
}
sub removen {
my ($thisline) = $_;
my $ntotal = 0;
for ( my $i = 0; $i < length($thisline) - 1; $i++ ) {
my $pos = substr( $thisline, $i, 1 );
#print "$pos\n";
if ( $pos =~ /N/ ) { $ntotal++ }
}
my $nout;
if ( $ntotal <= $nthreshold ) #threshold for N
{
$nout = "ok";
} else {
$nout = "bad";
}
return ($nout);
}

The parameters to a subroutine are in #_, not $_
sub removen {
my ($thisline) = #_;
I have a few other tips for you as well:
use autodie; anytime that you're doing file processing.
Assign the values in #ARGV to variables first thing. This quickly documents what the hold.
Do not chomp a file handle. This does not do anything. Instead apply chomp to the values returned from reading.
Do not use the strings ok and bad as boolean values.
tr can be used to count the number times a character is in a string.
The following is a cleaned up version of your code:
#!/usr/bin/perl
#pairedendtrim.pl by AHU
use strict;
use warnings;
use autodie;
die "usage: readtrimmer.pl <file1> <file2> <nthreshold> " unless #ARGV == 3;
my ( $file1, $file2, $nthreshold ) = #ARGV;
open my $fh1, '<', $file1;
open my $fh2, '<', $file2;
while ( not eof $fh2 and not eof $fh1 ) {
chomp( my #forline = map { scalar <$fh1> } ( 1 .. 4 ) );
chomp( my #revline = map { scalar <$fh2> } ( 1 .. 4 ) );
my $ncheckfor = removen( $forline[1] );
my $ncheckrev = removen( $revline[1] );
print "READ 1 AND READ 2" if $ncheckfor and $ncheckrev;
print "Read 1 only" if $ncheckfor and !$ncheckrev;
print "READ 2 only" if !$ncheckfor and $ncheckrev;
}
sub removen {
my ($thisline) = #_;
my $ntotal = $thisline =~ tr/N/N/;
return $ntotal <= $nthreshold; #threshold for N
}

Parsing CSV with Text::CSV

I am trying to parse a file where the header row is at row 8. From row 9-n is my data. How can I use Text::CSV to do this? I am having trouble, my code is below:
my #cols = #{$csv->getline($io, 8)};
my $row = {};
$csv->bind_columns (\#{$row}{#cols});
while($csv->getline($io, 8)){
my $ip_addr = $row->{'IP'};
}

use Text::CSV;
my $csv = Text::CSV->new( ) or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $io, "test.csv" or die "test.csv: $!";
my $array_ref = $csv->getline_all($io, 8);
my $record = "";
foreach $record (#$array_ref) {
print "$record->[0] \n";
}
close $io or die "test.csv: $!";

Are you dead-set on using bind_columns? I think I see what you're trying to do, and it's notionally very creative, but if all you want is a way to reference the column by the header name, how about something like this:
use strict;
use warnings;
use Text::CSV;
my $csv = Text::CSV->new ( { binary => 1 } );
my (%header);
open my $io, "<", '/var/tmp/foo.csv' or die $!;
while (my $row = $csv->getline ($io)) {
next unless $. > 7;
my #fields = #$row;
unless (%header) {
$header{$fields[$_]} = $_ for 0..$#fields;
next;
}
my $ip_addr = $fields[$header{'IP'}];
print "$. => $ip_addr\n";
}
close $io;
Sample Input:
Test Data,,,
Trash,,,
Test Data,,,
Trash,,,
Beans,Joe,10.224.38.189,XYZ
Beans,Joe,10.224.38.190,XYZ
Beans,Joe,10.224.38.191,XYZ
Last Name,First Name,IP,Computer
Beans,Joe,10.224.38.192,XYZ
Beans,Joe,10.224.38.193,XYZ
Beans,Joe,10.224.38.194,XYZ
Beans,Joe,10.224.38.195,XYZ
Beans,Joe,10.224.38.196,XYZ
Beans,Joe,10.224.38.197,XYZ
Output:
9 => 10.224.38.192
10 => 10.224.38.193
11 => 10.224.38.194
12 => 10.224.38.195
13 => 10.224.38.196
14 => 10.224.38.197

Why does DBD::CSV complain about "Loose unescaped quote"?

Why does reading from __DATA__ work and reading from the file doesn't (Loose unescaped quote)?
#!/usr/bin/env perl
use warnings; use strict; use 5.010;
use DBI;
my $table = 'klassik_CD.csv';
print qx(cat $table);
print qq{\n"data" or "Enter" : };
chomp( my $aw = <> );
if ( $aw eq 'data' ) {
$table = 'te_mp_fi_le.csv';
open my $fh, '>', $table or die $!;
while ( defined( my $row = <DATA> ) ) {
print $fh $row;
}
close $fh or die $!;
}
my $dbh = DBI->connect( "dbi:CSV:", { RaiseError => 1 } );
$dbh->{csv_tables}{$table} = { col_names => [], sep_char => ';' };
my $sth = $dbh->prepare( "SELECT * FROM $table" );
$sth->execute;
while ( defined( my $row = $sth->fetchrow_hashref ) ) {
say $row->{col1};
}
__DATA__
1;"Kammermusik fuer Blaeser";16;"DG";"eloquence";"dc129610"
2;"Requiem – Laudate Dominum Exultate, jubilate";19;"DG";"eloquence";"0a11f513"
Output: "data"
1;"Kammermusik fuer Blaeser";16;"DG";"eloquence";"dc129610"
2;"Requiem – Laudate Dominum Exultate, jubilate";19;"DG";"eloquence";"0a11f513"
"data" or "Enter" : data
Kammermusik fuer Blaeser
Requiem – Laudate Dominum Exultate, jubilate
Output: "Enter"
1;"Kammermusik fuer Blaeser";16;"DG";"eloquence";"dc129610"
2;"Requiem – Laudate Dominum Exultate, jubilate";19;"DG";"eloquence";"0a11f513"
"data" or "Enter" :
DBD::CSV::st execute failed:
Execution ERROR: Error 2034 while reading file ./klassik_CD.csv: EIF - Loose unescaped quote at /usr/local/lib/perl5/site_perl/5.10.1/DBD/CSV.pm line 220
.
[for Statement "SELECT * FROM klassik_CD.csv"] at ./zzzzzzzzzz.pl line 27.
DBD::CSV::st fetchrow_hashref failed: Attempt to fetch row without a preceeding execute () call or from a non-SELECT statement [for Statement "SELECT * FROM klassik_CD.csv"] at ./zzzzzzzzzz.pl line 28.

When I rename the file from "klassik_CD.csv" to "klassik_cd.csv" (all lowercase) it works (though there was no such message as "file not found" ).

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Program run under cron creates file in wrong path - perl

I solved the problem, crontab execute the script in the home "/home/httpldev/" (default), so I changed the execution path by following; 0 9 1 * * cd /home/httpldev/iLicoMetrics/ && /home/httpldev/iLicoMetrics/metrique.pl &> /dev/null . Thank you a lot for your help.

Related

Sort all records according to modified date and time

Perl: How to print a random section (word definition) from a dictionary file

Pass lines from 2 files to same subroutine

Parsing CSV with Text::CSV

Why does DBD::CSV complain about "Loose unescaped quote"?

Categories

Resources