Why does DBD::CSV complain about "Loose unescaped quote"? - perl

Why does reading from __DATA__ work and reading from the file doesn't (Loose unescaped quote)?
#!/usr/bin/env perl
use warnings; use strict; use 5.010;
use DBI;
my $table = 'klassik_CD.csv';
print qx(cat $table);
print qq{\n"data" or "Enter" : };
chomp( my $aw = <> );
if ( $aw eq 'data' ) {
$table = 'te_mp_fi_le.csv';
open my $fh, '>', $table or die $!;
while ( defined( my $row = <DATA> ) ) {
print $fh $row;
}
close $fh or die $!;
}
my $dbh = DBI->connect( "dbi:CSV:", { RaiseError => 1 } );
$dbh->{csv_tables}{$table} = { col_names => [], sep_char => ';' };
my $sth = $dbh->prepare( "SELECT * FROM $table" );
$sth->execute;
while ( defined( my $row = $sth->fetchrow_hashref ) ) {
say $row->{col1};
}
__DATA__
1;"Kammermusik fuer Blaeser";16;"DG";"eloquence";"dc129610"
2;"Requiem – Laudate Dominum Exultate, jubilate";19;"DG";"eloquence";"0a11f513"
Output: "data"
1;"Kammermusik fuer Blaeser";16;"DG";"eloquence";"dc129610"
2;"Requiem – Laudate Dominum Exultate, jubilate";19;"DG";"eloquence";"0a11f513"
"data" or "Enter" : data
Kammermusik fuer Blaeser
Requiem – Laudate Dominum Exultate, jubilate
Output: "Enter"
1;"Kammermusik fuer Blaeser";16;"DG";"eloquence";"dc129610"
2;"Requiem – Laudate Dominum Exultate, jubilate";19;"DG";"eloquence";"0a11f513"
"data" or "Enter" :
DBD::CSV::st execute failed:
Execution ERROR: Error 2034 while reading file ./klassik_CD.csv: EIF - Loose unescaped quote at /usr/local/lib/perl5/site_perl/5.10.1/DBD/CSV.pm line 220
.
[for Statement "SELECT * FROM klassik_CD.csv"] at ./zzzzzzzzzz.pl line 27.
DBD::CSV::st fetchrow_hashref failed: Attempt to fetch row without a preceeding execute () call or from a non-SELECT statement [for Statement "SELECT * FROM klassik_CD.csv"] at ./zzzzzzzzzz.pl line 28.

When I rename the file from "klassik_CD.csv" to "klassik_cd.csv" (all lowercase) it works (though there was no such message as "file not found" ).

Related

Program run under cron creates file in wrong path

I am trying to execute a Perl script using crontab.
Manually, the script works fine, but when I use cron, I get an error
/home/dev/test.csv : not readable
/home/dev/test.csv is a file generated by the script, but it is created as /home/test.csv and
I don't know how or why.
This is my crontab:
/3 * * * * /home/dev/metrique.pl &> /home/dev/output.txt
this is my code :
#!/sw/freetools/perl/5.8.8/Linux/rh50/x86_64/bin/perl
#use strict ;
#use warnings ;
use DBI ;
use DateTime ;
use Text::CSV;
use MIME::Lite;
my $Month = DateTime->now->subtract(months=>0)->truncate(to=>'month') ->strftime('%B') ;
my $Date = DateTime->now->subtract(months=>0)->truncate(to=>'month') ->strftime('%Y-%m') ;
$Date ="%".$Date."%" ;
my %info = (db => "ilico", host => "gnx5910.gnb.st.com", user => "ilicousr", pass => "" );
my $dbh = DBI->connect("DBI:mysql:$info{db};$info{host}", $info{user}, $info{pass});
my #record ;
my %Report;
my #other;
my #region = qw{EMEA AME ASIA INDIA Global-WAN};
my #scope = qw{wan lan specific};
my $total_weekly = 0;
my $total_usage = 0;
my $weekly = '2';
my $usage = '1';
my #top_user ;
my #array ;
my #user ;
my %hash = ();
my %sum = ();
my %LOGIN_W = ();
my %Groupe = ();
my %hash1 = ();
my %Nom_Complet = ();
my %NUMBER = ();
my $filename1="NBgenerated_Reports.csv";
my $filename2="Report_Scope.csv";
my $filename3 ="Top_10_Features.csv";
my $filename4 ="Top_10_Users.csv";
my $filename5 ="/sw/st/itcad/setup/shared_data/ldp_om.csv";
my $filename6 ="Report_Groupe.csv";
open(my $fh1, ">", $filename1) or die "cannot open < $filename1: $!";
open(my $fh2, ">", $filename2) or die "cannot open < $filename2: $!";
open(my $fh3, ">", $filename3) or die "cannot open < $filename3: $!";
open(my $fh4, ">", $filename4) or die "cannot open < $filename4: $!";
open(my $fh5, "<", $filename5) or die "cannot open < $filename5: $!";
open(my $fh6, ">", $filename6) or die "cannot open < $filename6: $!";
print $fh1 "Region; Usage_Report; Weekly; \n";
print $fh2 "Scope; NB; \n";
print $fh3 "Feature; NB; \n";
print $fh4 "User; NB_Report ;Groupe \n";
print $fh6 "Groupe; NB_Report \n";
#usage & weekly
my $sql = qq/SELECT COUNT( `Region`.`RegID` ) FROM `iLico_Log`, `Region` WHERE `iLico_Log`.`Date` LIKE ? AND `Region`.`RegID` = `iLico_Log`.`RegID` AND `iLico_Log`.`Type` = ?
AND `Region`.`RegName` LIKE ? / ;
foreach my $reg (#region){
foreach my $type ($weekly, $usage){
my $sth = $dbh->prepare($sql) or die ("unable to prepare");
$sth->execute(($Date, $type, $reg)) ;
#record = $sth -> fetchrow_array();
$Report{$reg}{$type}=$record[0];
}
}
foreach my $reg (keys %Report) {
$total_usage += $_ for($Report{$reg}{$usage});
$total_weekly += $_ for($Report{$reg}{$weekly});
print $fh1 "$reg ; $Report{$reg}{$usage}; $Report{$reg}{$weekly} \n";
}
print $fh1 "total; $total_usage; $total_weekly; \n";
#scope
my $SCOPE = qq/SELECT COUNT(logID ) FROM `iLico_Log` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`scope`= ?/;
foreach my $sc (#scope){
my $sth = $dbh->prepare($SCOPE) or die ("unable to prepare");
$sth->execute($Date, $sc) ;
my #record = $sth -> fetchrow_array();
print $fh2 "$sc; #record; \n";
}
#Top 10 features
my $TopFeatures = qq/SELECT `Feature`.`FeatName` , COUNT( * ) NB FROM `iLico_Log`, `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID` GROUP BY `Feature`.`FeatID` ORDER BY NB DESC LIMIT 10 /;
my $sth = $dbh->prepare($TopFeatures) or die ("unable to prepare");
$sth->execute($Date) ;
while( #record = $sth -> fetchrow_array())
{
print $fh3 "$record[0]; $record[1]; \n";
}
#other features number
my $Other = qq/SELECT COUNT(DISTINCT `iLico_Log`.`FeatID`) NB FROM `iLico_Log`, `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID`/;
$sth = $dbh->prepare($Other) or die ("unable to prepare");
$sth->execute($Date) ;
#record = $sth -> fetchrow_array();
$other[0] = $record[0] - 10 ;
print $fh3 "Other_features_number; #other \n";
#total usage of all and other features
my $TotalUsage =qq/SELECT COUNT( * ) SU FROM `iLico_Log` , `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID`/;
my $SUMTopFeatures = qq/select sum(NB) from (SELECT `Feature`.`FeatName` , COUNT( * ) NB FROM `iLico_Log`, `Feature` WHERE `iLico_Log`.`Date` LIKE ? AND `iLico_Log`.`FeatID` = `Feature`.`FeatID` GROUP BY `Feature`.`FeatID` ORDER BY NB DESC LIMIT 10) AS subquery /;
$sth = $dbh->prepare($TotalUsage) or die ("unable to prepare");
my $sth1 = $dbh->prepare($SUMTopFeatures) or die ("unable to prepare");
$sth->execute($Date) ;
$sth1->execute($Date) ;
#record = $sth -> fetchrow_array();
my #sum = $sth1 -> fetchrow_array();
$other[0] = $record[0] - $sum[0] ;
print $fh3 "Other_total_usage; #other";
#select login windows and groupe from file ldp_om.csv to be used in top_10_user and nomber Report/Groupe
while (<$fh5>) {
chomp;
my ($mail, $uid, $site, $grp, $dvs, $cnt, $ccost, $mng, $typ, $phone, $first, $last, $login, $cn) = split ';', lc($_), 14;
if (! exists $LOGIN_W{$login}) {
$LOGIN_W{$login} = $grp;
}
if (! exists $hash{$login}) {
$Groupe{$login} = $grp;
$Nom_Complet{$login} = $cn;
}
}
#top 10 user / Groups
my $TopUsers = qq/select ilicoUserLogin, COUNT(*) NB, Display from ilico_log I where Date like ? GROUP BY I.ilicoUserLogin ORDER BY NB DESC LIMIT 10/;
$sth = $dbh->prepare($TopUsers) or die ("unable to prepare");
$sth->execute($Date) ;
while( #top_user = $sth -> fetchrow_array())
{
$top_user[0] =~ s/\s+/ /g;
push (#array, lc($top_user[0]));
my $login = lc($top_user[0]);
$NUMBER{$login} = $top_user[1];
}
foreach my $login ( #array ){
$hash1{$login} = $Groupe{$login};
}
foreach my $login (sort {$NUMBER{$b} <=> $NUMBER{$a}} keys %hash1) {
my $grpe = uc($hash1{$login}) ;
my $name = ucfirst($Nom_Complet{$login});
print $fh4 "$name ; $NUMBER{$login} ; $grpe ; \n";
}
#Report/Groupe
my $Groupe = qq/select ilicoUserLogin, Count(*) NB from ilico_log I where Date like ? GROUP BY I.ilicoUserLogin ORDER BY NB DESC /;
$sth = $dbh->prepare($Groupe) or die ("unable to prepare");
$sth->execute($Date) ;
while( #user = $sth -> fetchrow_array())
{
$user[0] =~ s/\s+/ /g;
my $login = lc($user[0]);
$LOGIN_W{my $grp}{$login} = $user[1];
}
foreach my $login ( keys %LOGIN_W) {
if (defined( $login ) and $login ne '')
{
$sum{$LOGIN_W{$login}} += $LOGIN_W{my $var}{$login} ;
}
}
for my $key (sort {$sum{$b} <=> $sum{$a}} keys %sum) {
if ($sum{$key})
{
my $KEYS = uc($key);
print $fh6 "$KEYS; $sum{$key}; \n";
}
}
close $fh1;
close $fh2;
close $fh3;
close $fh4;
close $fh5;
close $fh6;
my $msg = MIME::Lite->new (
From => 'maha.mastouri#st.com',
To => 'maha.mastouri#st.com',
# Cc => 'maha.mastouri#st.com',
Subject => "iLico Mertique $Month",
Type => 'text/plain' ,
Path => '/home/dev/text'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/NBgenerated_Reports.csv',
Disposition => 'attachment',
Filename => 'NB_generated_Reports.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Top_10_Features.csv',
Disposition => 'attachment',
Filename => 'Top_10_Features.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Report_Scope.csv',
Disposition => 'attachment',
Filename => 'Report_Scope.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Top_10_Users.csv',
Disposition => 'attachment',
Filename => 'Top_10_Users.csv'
);
$msg->attach( Type => 'TEXT',
Path => '/home/dev/Report_Groupe.csv',
Disposition => 'attachment',
Filename => 'Report_Groupe.csv'
);
$msg->send();
cron context is very different than a login shell. It has no env vars by default. It appears to me that your program depends on $ENV{USER} to build it's output (or input). Well, that env var is just going to be missing from cron. crontabs are executed by "cron" daemon and not as your login shell.
You can try to print the whole %ENV to somewhere like "/tmp/env.txt" just to see that it's basically an empty hash. It's the best if you can change the program not to depend on env var. You may also try to add them back right before the schedule line:
USER=dev
/3 * * * * /home/dev/metrique.pl &> /home/dev/output.txt
I must also notify you that after doing this, the env var USER becomes present for all the schedules below these 2 lines. Those env vars can also be inspected by ps e.
If an env var is required just to decide an input path, than it's as easy as getting the input path from #ARGV
It should run your .profile(or .bash_profile for bash) before executing the commands from cron.So, put it before your command in cron as shown. Similarly if there are any profile scripts which you run on login that is used in the perl script, those have to be included.
/3 * * * * . $HOME/.profile; /home/dev/metrique.pl &> /home/dev/output.txt
I solved the problem, crontab execute the script in the home "/home/httpldev/" (default), so I changed the execution path by following;
0 9 1 * * cd /home/httpldev/iLicoMetrics/ && /home/httpldev/iLicoMetrics/metrique.pl &> /dev/null .
Thank you a lot for your help.

rearrange data from one column to a row

I have the below data and I need to make the second column as the header. Any help is appreciated.
Data:
IBM,Voltality,7,73894756.93897434897
IBM,Market,100,983874.34324
GOOG,Sanity,15,8932748
GOOG,Rate Jump,25,873476378.234234
MBLY,Market,340,23423423432.6783
Output:
PRODUCT|Market|Rate Jump|Sanity|Voltality
IBM|100,983874.34324|||7,73894756.93897434897
GOOG||25,873476378.234234|15,8932748|||
MBLY|340,23423423432.6783|||
Code (incomplete / not sure hot to get to the end):
#!/usr/bin/perl
use strict;
use Getopt::Long;
use warnings;
use Data::Dumper;
my $valsep = ',';
my ( %type, %keys, %ccy, %cnt, %avg );
while (<>) {
chomp;
my ( $product, $reason, $count, $lat ) = split /,/;
my $key = "$product,$reason";
if ( not exists( $type{$reason} ) ) {
$type{$reason} = $reason;
}
$ccy{$key} = $product;
$cnt{$key} = $count;
$avg{$key} = $lat;
}
close(INPUT);
print Dumper ( \%ccy );
print Dumper ( \%type );
my ( %pair, %details );
foreach my $rows ( sort keys %ccy ) {
print "the key is : $rows and $ccy{$rows}\n";
foreach my $res ( sort keys %type ) {
print "The type is : $res and $type{$res}\n";
}
}
You just need to keep track of your columns and row data when parsing the data structure.
The following demonstrates:
#!/usr/bin/perl
use strict;
use warnings;
my $fh = \*DATA;
my %columns;
my %rows;
while (<$fh>) {
chomp;
my ( $company, $col, $vals ) = split ',', $_, 3;
# Track Columns for later labeling
$columns{$col}++;
$rows{$company}{$col} = $vals;
}
my #columns = sort keys %columns;
# Header
print join( '|', 'PRODUCT', #columns ), "\n";
for my $company ( sort keys %rows ) {
print join( '|', $company, map { $_ // '' } #{ $rows{$company} }{#columns} ), "\n";
}
__DATA__
IBM,Voltality,7,73894756.93897434897
IBM,Market,100,983874.34324
GOOG,Sanity,15,8932748
GOOG,Rate Jump,25,873476378.234234
MBLY,Market,340,23423423432.6783
Outputs:
PRODUCT|Market|Rate Jump|Sanity|Voltality
GOOG||25,873476378.234234|15,8932748|
IBM|100,983874.34324|||7,73894756.93897434897
MBLY|340,23423423432.6783|||
The following code will do the job; rather than using several hashes, I've put all the data in a hash of hashes. I've put comments in the script to explain what is happening in case you are not sure. You can, of course, delete them in your script.
#!/usr/bin/perl
use warnings;
use strict;
my %market;
while (<DATA>) {
next unless /\w/;
# remove line endings
chomp;
# split line by commas -- only split into three parts
my #col = split ",", $_, 3;
# save the data as $market{col0}{col1} = col2
$market{$col[0]}{$col[1]} = $col[2];
}
# create an output file
my $outfile = 'output.txt';
open( my $fh, ">", $outfile ) or die "Could not open $outfile: $!";
my #headers = ('Market','Rate Jump','Sanity','Volatility');
# print out the header line, joined by |
print { $fh } join('|', 'PRODUCT', #headers) . "\n";
# for each product in the market data
for my $p (sort keys %market) {
# print the product name
print { $fh } join('|', $p,
# go through the headers using map (map acts like a "for" loop)
# if the relevant property exists in the market data, print it;
# if not, print nothing
map { $market{$p}{$_} // '' } #headers) . "\n";
}
# this is the input data. You might be reading yours in from a file
__DATA__
IBM,Voltality,7,73894756.93897434897
IBM,Market,100,983874.34324
GOOG,Sanity,15,8932748
GOOG,Rate Jump,25,873476378.234234
MBLY,Market,340,23423423432.6783
Output:
PRODUCT|Market|Rate Jump|Sanity|Volatility
GOOG||25,873476378.234234|15,8932748|
IBM|100,983874.34324|||7,73894756.93897434897
MBLY|340,23423423432.6783|||

perl DBI comma in query name

I am using perl to talk to an sqlite database, and am getting an error. I think it is because some of the chemical names (stored in the variable $drugs) have commas in them (they also have many other 'strange characters'.
Any help would be appreciated, thanks.
**error message:**
- read in 57to find CIDs for
- Opened database successfully
- retrieving results for cyclo(L-Val-L-Pro)
- retrieving results for Sinapic_acid
- retrieving results for NHQ
- DBD::SQLite::db prepare failed: near ",": syntax error at get_drugName2IDinfo.sqlite.pl line 33.
- DBD::SQLite::db prepare failed: near ",": syntax error at get_drugName2IDinfo.sqlite.pl line 33.
line 33:
my $stmt = qq(SELECT * from chem_aliases WHERE alias LIKE '$drug%');
example drug names:
(2R,3R,4S,6R)-2-((5-hydroxy-2,2-dimethyl-3,4-dihydro-2H-benzo[h]chromen-6-yl)oxy)-6-(hydroxymethyl)tetrahydro-2H-pyran-3,4,5-triol
partial script:
my %HoDrugs;
while (my $line=<IN>){
chomp $line;
$HoDrugs{$line}=1;
}
close(IN);
print "read in\t".scalar(keys %HoDrugs)."to find CIDs for\n";
##
my $driver = "SQLite";
my $database = "/Users/alisonwaller/Documents/Typas/ext_data/STITCHv3.1/STITCHv3.1.sqlite.db";
my $dsn = "DBI:$driver:dbname=$database";
my $userid = "";
my $password = "";
my $dbh = DBI->connect($dsn, $userid, $password, { RaiseError => 1 })
or die $DBI::errstr;
print "Opened database successfully\n";
###
my $outfile="$in_drugNms.sq.plsCIDs.tab";
open (OUT,">",$outfile);
foreach my $drug (keys %HoDrugs){
my $stmt = qq(SELECT * from chem_aliases WHERE alias LIKE '$drug%');
my $sth = $dbh->prepare( $stmt );
my $rv = $sth->execute() or die $DBI::errstr;
if($rv < 0){
print $DBI::errstr;
}
while(my #row = $sth->fetchrow_array()) {
print "retrieving results for\t$drug\n";
print OUT join("\t",$drug,$row[0],$row[1],$row[2]) . "\n";
}
}
print "Operation done successfully\n";
$dbh->disconnect();
Have you tried using placeholders rather than just quoting the string yourself?
my $sth = $dbh->prepare( 'SELECT * from chem_aliases WHERE alias LIKE ?' );
my $rv = $sth->execute( $drug . '%' ) or die $DBI::errstr;
You could always try to use $drug =~ s/[[:punct:]]//g; before performing the query to try to remove punctuation characters?
If you don't want that, maybe replace them with spaces? $drug =~ s/,/ /g;

CSV import to MySQL

Hi I keep getting an error when trying to run the following perl script to import a csv file into an existing mysql database table. Every time I run it I get the message "Died at /home/perl/dep_import_2.pl line 10.
Any help would be appreciated
Thanks
#!/usr/bin/perl
use DBI;
use DBD::mysql;
use warnings "all";
if ($#ARGV != 0) {
print "Usage: dep_import_2.pl filename\n";
die;
}
$filename = $ARGV[0];
# MySQL CONFIG VARIABLES
$host = "localhost";
$user = "standard";
$pw = "standard";
$database = "data_track";
$dsn = "DBI:mysql:database=" . $database . ";host=" . $host;
$dbh = DBI->connect($dsn, $user, $pw)
or die "Can't connect to the DB: $DBI::errstr\n";
print "Connected to DB!\n";
open FILE, "/home/dep/new_study_forms_2.csv", $filename or die $!;
$_ = <FILE>;
$_ = <FILE>;
while (<FILE>) {
#f = split(/,/, $_);
$sql = "INSERT INTO dep (date, subject, weight, size, time, hi_pre, hi_post, hi_afternoon, hi_test, actical_on, actical_off, saggital_1, saggital_2, crown_heel1, crown_heel2, crown_rump1, crown_rump2, scan, record_number, tap, sample, dye, left_chip, right_chip) VALUES('$f[0]', '$f[1]', '$f[2]', '$f[3]' '$f[4]', '$f[5]', '$f[6]', '$f[7]', '$f[8]', '$f[9]', '$f[10]', '$f[11]', '$f[12]', '$f[13]', '$f[14]', '$f[15]', '$f[16]', '$f[17]', '$f[18]', '$f[19]', '$f[20]', '$f[21]', '$f[22]', '$f[23]')";
print "$sql\n";
my $query = $dbh->do($sql);
}
There are a few issues with your code. First, and most importantly, you are not using
use strict;
use warnings;
This is bad because you will not get information about errors in your code without them.
As others have pointed out, the reason the script dies is because $#ARGV is not zero. Meaning that you have either passed too few or too many arguments to the script. The arguments to the script must be exactly one, like the usage statement says.
However, that would not solve your problem, because your open statement below is screwed up. My guess is that you tried to add your file name directly. This line:
open FILE, "/home/dep/new_study_forms_2.csv", $filename or die $!;
It will probably give you the error unknown open() mode .... It should probably be
open FILE, "<", $filename or die $!;
And then you pass /home/dep/new_study_forms_2.csv to the script on the command line, assuming that is the correct file to use.
Also, in your query string, you should not interpolate variables, you should use placeholders, as is described in the documentation for DBI. The placeholders will take care of the quoting for you and avoid any data corruption. To make your query line a bit simpler, you can do something like:
my $sth = $dbh->prepare(
"INSERT INTO dep (date, subject, weight, size, time, hi_pre, hi_post,
hi_afternoon, hi_test, actical_on, actical_off, saggital_1, saggital_2,
crown_heel1, crown_heel2, crown_rump1, crown_rump2, scan, record_number,
tap, sample, dye, left_chip, right_chip)
VALUES(" . join(",", ("?") x #f) . ")");
$sth->execute(#f);
Here's a script which uses Text::CSV to properly parse CSV. It assumes that the first row contains column names, and then loads the CSV in batches, commiting after every 100 inserts. Every parameter (user, password, database) is configurable via command-line options. Usage is an in-line POD document.
#!/usr/bin/env perl
use strict;
use warnings qw(all);
use DBI;
use Getopt::Long;
use Pod::Usage;
use Text::CSV_XS;
=pod
=head1 SYNOPSIS
dep_import_2.pl --filename=file.csv --host=localhost --user=standard --pw=standard --database=data_track
=head1 DESCRIPTION
Loads a CSV file into the specified MySQL database.
=cut
my $host = 'localhost';
my $user = 'standard';
my $pw = 'standard';
my $database = 'data_track';
my $commit = 100;
GetOptions(
'help' => \my $help,
'filename=s' => \my $filename,
'host=s' => \$host,
'user=s' => \$user,
'pw=s' => \$pw,
'database=s' => \$database,
'commit=i' => \$commit,
) or pod2usage(q(-verbose) => 1);
pod2usage(q(-verbose) => 2) if $help;
my $dbh = DBI->connect("DBI:mysql:database=$database;host=$host", $user => $pw)
or die "Can't connect to the DB: $DBI::errstr";
my $csv = Text::CSV_XS->new
or die "Text::CSV error: " . Text::CSV->error_diag;
open(my $fh, '<:utf8', $filename)
or die "Can't open $filename: $!";
my #cols = #{$csv->getline($fh)};
$csv->column_names(\#cols);
my $query = "INSERT INTO dep (#{[ join ',', #cols ]}) VALUES (#{[ join ',', ('?') x (scalar #cols) ]})";
my $sth = $dbh->prepare($query);
my $i = 0;
while (my $row = $csv->getline_hr($fh)) {
$sth->execute(#{$row}{#cols});
$dbh->commit if ((++$i % $commit) == 0);
}
$dbh->commit;
$dbh->disconnect;
$csv->eof or $csv->error_diag;
close $fh;

Unable to read the count of some words from file of size ~2GB with Perl

I have written a Perl program which will match certain words in a log file and store the results in a database. The problem is this program works fine with a small file but doesn't work with file size ~2GB. Is it size or program need to be changed?
use POSIX qw(strftime);
# load module
use DBI;
open( FILE, "/root/temp.log" ) or die "Unable to open logfile:$!\n";
$count_start = 0;
$count_interim = 0;
$count_stop = 0;
while (<FILE>) {
#test = <FILE>;
foreach $line (#test) {
if ( $line =~ m/server start/ ) {
#print "yes\n";
$count_start++;
}
elsif ( $line =~ m/server interim-update/ ) {
$count_stop++;
}
elsif ( $line =~ m/server stop/ ) {
$count_interim++;
}
}
print "$count_start\n";
print "$count_stop\n";
print "$count_interim\n";
$now_string = strftime "%b %e %H:%M:%S", localtime;
print $now_string;
# connect
my $dbh = DBI->connect( "DBI:Pg:dbname=postgres;host=localhost",
"postgres", "postgres", { 'RaiseError' => 1 } );
# execute INSERT query
my $rows = $dbh->do(
"insert into radcount (acc,bcc,dcc) Values ('$count_start','$count_stop','$count_interim')"
);
print "$rows row(s) affected\n";
# clean up
$dbh->disconnect();
}
close(LOG);
There's a few things here - first off I'd recommend changing to three arg open for your file handle - reasoning here
open( my $fileHandle, '<', '/root/temp.log' ) or die "blah" ;
Secondly you're reading the whole file into an array - with a large file this will eat a lot of ram. Instead read it line by line and process it:
while(<$fileHandle>){
#contents of your foreach loop
}
I have a few comments about your program.
Always use strict and use warnings at the start of your program, and declare variables using my at their point of first use
Always use lexical filehandles and the three-parameter form of open, and always check the status of an open call
You are opening the file using filehandle FILE, but closing LOG
Your while statement reads the first line of the file and throws it away
#test = <FILE> attempts to read all of the rest of the file into the array. This is what is causing your problem
You should connect to the database once and use the same database handle for the rest of the code
You should use prepare your statement with placeholders and pass the actual values with execute
You are incrementing $count_stop for an interim-update record and $count_interim for a stop record
The core module Time::Piece provides a strftime method without the bloat of POSIX
Here is a modification of your program to show these ideas. I have not set up a log file and database to test it but it looks fine to me and does compile.
use strict;
use warnings;
use Time::Piece;
use DBI;
open my $log, '<', '/root/temp.log' or die "Unable to open log file: $!";
my ($count_start, $count_interim, $count_stop) = (0, 0, 0);
while (<$log>) {
if ( /server start/ ) {
$count_start++;
}
elsif ( /server interim-update/ ) {
$count_interim++;
}
elsif ( /server stop/ ) {
$count_stop++;
}
}
print <<END;
Start: $count_start
Interim: $count_interim
Stop: $count_stop
END
print localtime->strftime("%b %e %H:%M:%S"), "\n";
my $dbh = DBI->connect(
"DBI:Pg:dbname=postgres;host=localhost", "postgres", "postgres",
{ 'RaiseError' => 1 } );
my $insert = $dbh->prepare('INSERT INTO radcount (acc, bcc, dcc) VALUES (?, ?, ?)');
my $rows = $insert->execute($count_start, $count_stop, $count_interim);
printf "%d %s affected\n", $rows, $rows == 1 ? 'row' : 'rows';