Perl script to convert xls to csv - perl

This script coverts xls to csv ok.
The challenge is that it does not convert blank cell in the xls to blanks in csv file.
Any help is appreciated: UPDATED SCRIPT
#!/usr/bin/perl
use strict;
use Spreadsheet::ParseExcel;
use Text::CSV;
my $sourcename = shift #ARGV or die "invocation: $0 <source file>\n";
my $source_excel = new Spreadsheet::ParseExcel;
my $source_book = $source_excel->Parse($sourcename)
or die "Could not open source Excel file $sourcename: $!";
my $storage_book;
foreach my $source_sheet_number (0 .. $source_book->{SheetCount}-1) {
my $source_sheet = $source_book->{Worksheet}[$source_sheet_number];
print "--------- SHEET:", $source_sheet->{Name}, "\n";
next unless defined $source_sheet->{MaxRow};
next unless $source_sheet->{MinRow} <= $source_sheet->{MaxRow};
next unless defined $source_sheet->{MaxCol};
next unless $source_sheet->{MinCol} <= $source_sheet->{MaxCol};
foreach my $row_index ($source_sheet->{MinRow} .. $source_sheet->{MaxRow}) {
foreach my $col_index ($source_sheet->{MinCol} .. $source_sheet->{MaxCol}) {
my $source_cell = $source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell && $source_cell->Value) {
#print "( $row_index , $col_index ) =>", $source_cell->Value, "\t;";
print $source_cell->Value, ";";
}
else
{
print ";"
}
}
}
}
sample excel
EFG KDD ABS JME
FGO POP JET
converted as:
EFG;KDD;ABS;JME;
FGO;POP;JET;
but it should be:
EFG;KDD;ABS;JME;
FGO;;POP;JET;

You have to check if the value of the cell is initialized, not the cell it self.
Change:
if ($source_cell) {
#print "( $row_index , $col_index ) =>", $source_cell->Value, "\t;";
print $source_cell->Value, ";";
}
To:
if ($source_cell && $source_cell->Value) {
#print "( $row_index , $col_index ) =>", $source_cell->Value, "\t;";
print $source_cell->Value, ";";
} else {
print ";";
}
should work.
UPDATE:
foreach my $row_index ($source_sheet->{MinRow} .. $source_sheet->{MaxRow}) {
foreach my $col_index ($source_sheet->{MinCol} .. $source_sheet->{MaxCol}) {
my $source_cell = $source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell && $source_cell->Value) {
print $source_cell->Value.";";
} else {
print ";";
}
}
print "\n";
}
}

Related

Unable to print after while loop in perl

BEGIN {
use FindBin;
$scriptsDir = $FindBin::RealBin;
}
sub print_log {
($log, $msg) = ($_[0], $_[1]);
print $log $msg;
}
$opt_rh_gsr = "path_to_file";
open(FO, "$opt_rh_gsr") || die "-F-: Can not open file \n";
while(<FO>) {
if(/vdd_nets/) {
$vdd_net = 1;
$vdd_string = "VDD_NETS \{ \n";
}
if(/gnd_nets/) {
$gnd_net = 1;
}
if(($gnd_net == 1)) {
chomp();
$new_line = $_;
#split_new_line = split(":", $new_line);
}
if(($gnd_net == 1) && /\}/) {
$gnd_net = 0;
$gnd_string .= "\} \n";
exit;
}
if($vdd_net) {
if(/^\s*\S+\s+\S+\s+{/) {
$paren++;
}
if (0 != $paren && /^\s*(\w+)\s*$/) {
$vdd_nets{$1} = $parenvolt;
next;
}
if(/^\s*}\s*$/ || /^\s+$/) {
if (0 == $paren) {
$vdd_net = 0; next;
}
else {
$paren--; next;
}
}
chomp();
if(/\s*\}\s*$/ && ($vdd_net == 1)){
s/\'//g;
$vdd_net = 0;
#_ = split(":");
$vdd_string .= "$_[0] $_[1] \n";
$vdd_string .= "\} \n";
next;
}
if($gnd_net) {
if(/^\s*\}\s+$/ || /^\s+$/) {
$gnd_net = 0;
next;
}
#chomp();
if(/\s*\}\s*$/ && ($gnd_net == 1)){
s/\'//g;
$gnd_net = 0;
}
#_ = split();
$GNDNET = $_[0];
if ($_[0] =~ /^\w+$/) {
$groundnets{$_[0]} = 1;
}
}
}
}
print " done reading \n";
close(FO);
print "closed file \n";
The above is not printing the last 2 print statement (before and after the close of file handle). I tried print STDOUT, that didn't work. I also tried to flush, that didn't work either.
The script is exiting after executing, so it is not stuck in a infinite loop anywhere. I tries using perl5.6 and 5.8, but both of them have the same problem.
To exit a loop, you should use the keyword last instead of exit (which exits the whole program). This if:
if(($gnd_net == 1) && /\}/) {
$gnd_net = 0;
$gnd_string .= "\} \n";
print "exiting loop $gnd_string \n";
exit;
}
Should thus be:
if(($gnd_net == 1) && /\}/) {
$gnd_net = 0;
$gnd_string .= "\} \n";
print "exiting loop $gnd_string \n";
last;
}
(unless you actually wanted to exit the program, in which case the print should rather have been print "exiting program...")
A few tips:
Always add use strict and use warnings at the beginning of your scripts. It will catch many mistakes and save you a lot of time.
Use 3-operand open to open files (ie, open FILEHANDLE,MODE,EXPR instead of open FILEHANDLE,EXPR), and lexical filehandles (ie, $FO instead of FO). Your open should thus have been: open my $FO, '<', $opt_rh_gsr instead of open(FO, "$opt_rh_gsr").
Adding || die "-F-: Can not open file \n" after open is a good idea, but 1) you should do or die instead of || die (in this specific case it doesn't matter, but with or rather than ||, you can omit the parenthesis around open's arguments), and 2) you should add the name of the file you were trying to open (in that case, you'd print die "-F-: Can not open file '$opt_rh_gsr'). 3) add $! to the die to have the error message (die "-F-: Can not open file '$opt_rh_gsr': $!). And 4), as suggested by TLP, don't add a newline at the end of a die string.
sub print_log { ($log, $msg) = ($_[0], $_[1]); ... could have been sub print_log { ($log, $msg) = #_;; it's a bit more idiomatic and concise.
Indent properly your code. It's possible that indentation was lost in the copy-paste, but, if it's not the case, then you should indent better your code. This will save you a lot of time when writing/reading your code, and will save other people even more time when they'll read your code. Most IDEs have indentation features that can help you indent the code.

Searching string in a multiline file using perl

I'm trying to find a match in a multi-line string using this script.
It works only when there's one row in the destination file.
I would like to know if there's any substitution for $_ in order to search a multi-line text?
#!/usr/bin/perl
my $time=`date +%D_%H:%M`;
chomp($time);
my $last_location=`cat /file.txt`;
chomp($last_location);
open (ERRORLOG, ">>/errors.log") || die "failed to open errorlog file \n$!\n\a";
open (MESSAGES, "</logfile") || die "failed to open alarms file \n$!\n\a";
seek(MESSAGES, 0, 2) || die "Couldn't seek to pos: 0 at end of file $!\n";
$end_position = tell(MESSAGES);
if ($end_position < $last_location) {
$last_location=0;
}
if ($end_position > $last_location) {
seek(MESSAGES, $last_location, 0) || die "Couldn't seek to pos: $last_location $! \n";
$num_of_messages_sent=0;
while (<MESSAGES>) {
chomp;
$line_to_check $_;
if ($line_to_check =~ /some text/ ) {
print ERRORLOG "$time: $line_to_check \n";
if ($num_of_messages_sent < 4) {
do something;
}
if ($num_of_messages_sent == 4) {
do something;
}
#increase counter
$num_of_messages_sent = $num_of_messages_sent + 1;
}
}
$last_location = tell(MESSAGES);
# print "last: $last_location , end: $end_position \n";
`echo $last_location >/file_last_location.txt`;
}
close (ERRORLOG);
close (MESSAGES);
Looks better this way:
while (my $line = <MESSAGES>) {
chomp($line);
print "line : $line\n";
if ($line =~ m!your_regexp_here!i){
print ERRORLOG "$time: $line_to_check \n";
$num_of_messages_sent++;
print "\tMATCH\tline: $line\n";
if ($num_of_messages_sent < 4){
print "Found $num_of_messages_sent matches\n";
}
}
}

Detect empty xml element tag

I have to parse an xml document , decode base64 values from specific elements and printout those fields and the corresponding decoded values. Some of the elements don't have value I want to print the name of the element and "no value" string or simply \n, but somehow I cannot match empty string '' or undefined value.
Example input file:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet href="ums_v1.xsl" type="text/xsl"?>
<ums>
<datatype>Report</datatype>
<reference><![CDATA[NDkxNzYwNDAyNjMwODAy]]></reference>
<sequence></sequence>
<calling-party><![CDATA[NDkxNzYwNDAyNjMwOA==]]></calling-party>
<IP></IP>
<called-party><![CDATA[NDk4OTI0NDI0Mzc0]]></called-party>
<start>26/02/14 09:28:55</start>
<settings></settings>
<direction><![CDATA[Z2VzZW5kZXQ=]]></direction>
<result><![CDATA[ZXJmb2xncmVpY2g=]]></result>
<fax-tif></fax-tif>
<fax-jpg></fax-jpg>
<fax-png></fax-png>
<audio-wav></audio-wav>
<audio-mp3></audio-mp3>
<sms></sms>
<mms></mms>
</ums>
My program:
#!/usr/bin/perl -w
use XML::Parser;
use MIME::Base64;
use feature qw/switch/;
my $message; # Hashref containing infos on a message
while ($file = shift(#ARGV)) {
print "========================================================\n";
print "file: $file \n";
print "========================================================\n";
my $parser = new XML::Parser ( Handlers => { # Creates parser object
Start => \&hdl_start,
End => \&hdl_end,
Char => \&hdl_char,
});
$parser->parsefile($file);
print "========================================================\n";
}
sub hdl_start{
my ($p, $elt, %atts) = #_;
$atts{'_str'} = '';
given ($elt) {
when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
$message = \%atts;
}
when (/^audio-mp3/) {
$message = \%atts;
}
when (/^audio-mp3/) {
$message = \%atts;
}
when (/^mms/) {
$message = \%atts;
}
}
}
sub hdl_end{
my ($p, $elt) = #_;
given ($elt) {
when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
print "$elt : " ;
format_message($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^audio-mp3/) {
print "audio content in $file\.mp3\n" ;
format_mp3($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^audio-wav/) {
print "audio content in $file\.wav\n" ;
format_wav($message) if $message && $message->{'_str'} =~ /\S/;
# print "$Audiowav->{'_str'}";
}
when (/^mms/) {
print "mms content in $file\.mms, depending on the mms content further processing may be needed\n" ;
format_mms($message) if $message && $message->{'_str'} =~ /\S/;
}
}
}
sub hdl_char {
my ($p, $str) = #_;
$message->{'_str'} .=$str;
}
sub hdl_def { } # We just throw everything else
sub format_message { # Helper sub to nicely format what we got from the XML and decode base64 values of the needed attributes
my $atts = shift;
$atts->{'_str'} =~ s/\n//g;
if (!defined($atts->{'_str'})) { print "\n"}
my $decoded = decode_base64($atts->{'_str'});
print " $decoded\n";
# if (!$decoded) {print "\n"}
undef $message;
}
sub format_mp3 { # decode base64 content and save to mp3 file - for the audio-mp3 tag
my $atts = shift;
open FILE, "> ./$file.mp3" or die $!;
$atts->{'_str'} =~ s/\n//g;
# print "mp3: $atts $atts->{'_str'}\n";
my $decoded = decode_base64($atts->{'_str'});
print FILE $decoded;
close FILE;
undef $message;
}
sub format_wav { # decode base64 content and save to wav file - for the audio-wav tag
my $atts = shift;
open FILE, "> ./$file.wav" or die $!;
$atts->{'_str'} =~ s/\n//g;
print "wav: $atts $atts->{'_str'}\n";
my $decoded = decode_base64($atts->{'_str'});
print "$decoded\n";
print FILE $decoded;
close FILE;
undef $message;
}
sub format_mms { # decode mms base64 content and save to file - depending on the content further processing may be needed
my $atts = shift;
open FILE, "> ./$file.wav" or die $!;
$atts->{'_str'} =~ s/\n//g;
print "wav: $atts $atts->{'_str'}\n";
my $decoded = decode_base64($atts->{'_str'});
print "$decoded\n";
print FILE $decoded;
close FILE;
undef $message;
}
I've tried in format_message subroutine different matches, I've tried also in the hdl_end - any idea?
thanks in advance
Using XML::LibXML:
use strict;
use warnings;
use XML::LibXML;
my $string = do { local $/; <DATA> };
my $dom = XML::LibXML->load_xml(string => $string);
for my $node ($dom->findnodes(q{//*})) {
if ($node->textContent() eq '') {
print $node->nodeName, "\n";
}
}
__DATA__
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet href="ums_v1.xsl" type="text/xsl"?>
<ums>
<datatype>Report</datatype>
<reference><![CDATA[NDkxNzYwNDAyNjMwODAy]]></reference>
<sequence></sequence>
<calling-party><![CDATA[NDkxNzYwNDAyNjMwOA==]]></calling-party>
<IP></IP>
<called-party><![CDATA[NDk4OTI0NDI0Mzc0]]></called-party>
<start>26/02/14 09:28:55</start>
<settings></settings>
<direction><![CDATA[Z2VzZW5kZXQ=]]></direction>
<result><![CDATA[ZXJmb2xncmVpY2g=]]></result>
<fax-tif></fax-tif>
<fax-jpg></fax-jpg>
<fax-png></fax-png>
<audio-wav></audio-wav>
<audio-mp3></audio-mp3>
<sms></sms>
<mms></mms>
</ums>
Outputs:
sequence
IP
settings
fax-tif
fax-jpg
fax-png
audio-wav
audio-mp3
sms
mms
Solution is to test the empty tag in the end handler, like:
sub hdl_end{
my ($p, $elt) = #_;
given ($elt) {
when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
print "$elt : " ;
format_message($message) if $message && $message->{'_str'} =~ /\S/;
print "no value\n" if $message && $message->{'_str'} !~ /\S/;
}
when (/^audio-mp3/) {
print "audio content in $file\.mp3\n" if $message && $message->{'_str'} =~ /\S/;
format_mp3($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^audio-wav/) {
print "audio content in $file\.wav\n" if $message && $message->{'_str'} =~ /\S/ ;
format_wav($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^mms/) {
print "mms content in $file\.mms, depending on the mms content further processing may be needed\n" if $message && $message->{'_str'} =~ /\S/;
format_mms($message) if $message && $message->{'_str'} =~ /\S/;
}
}
}
So the if $message->{'_str'} !~ /\S/ is what I needed ... Thank you all for the effort!

Generate Excel output using Win32::OLE in Perl

I am a beginner in Perl and have tried playing around with Perl much to understand its ways and working! I have a basic knowledge of arays, hashes and related topics. I have to develop a script for a topic and i am quite unsure how to go about it. I desperately need help and am very grateful to anyone who can explain the 'how to do' part!
I have a code with 3 parts in it which does the same thing thrice for 3 different lets say components. Basic idea is, it takes all the components marked 'A' from an excel file, iterates through the excel file, adds up its corresponding RAM and ROM values and prints out the output without duplicate entries. The 2nd and 3rd part are the same but for components 'B' and 'C'. So far i am able to print out the output of all 3 parts in a text file. But now i want all three outputs in an excel workbook as 3 separate worksheets!
I am not particularly sure how to go about it. Any ideas are really welcome!!!
PS: Please forgive me if i have not typed the code right in the forum! This is my first post!!
Here is how my code looks so far:
# This Test script was created to try out the possible methods to extract all the Names from the
# excel report without duplicate entries and find their corresponding RAM/ROM size sum
# -excel D:\Abc\Test.xlsx -out D:\Abc\Output
sub usage($)
{
return shift(#_) . <<"END_USAGE";
Usage: $0 -excel Specify the file path.
-out outputdirectory Specify output directiory
END_USAGE
}
use Getopt::Long;
use Win32::OLE;
use List::Util qw(sum);
use Data::Dumper qw(Dumper);
my $output_path = ();
my $excel_path = ();
my $no_rows = ();
my $lastCol = ();
GetOptions("excel=s" => \$excel_path,
"out=s" => \$output_path,
"h|help" => \$help,
);
#help message
die usage("") if ($help);
system(cls);
print "\n*******************************************************************\n";
print "Component Overview \n";
print "*******************************************************************\n";
print "Please wait, Processing may take couple of minutes... \n";
##File handler for the script file.
$log_path = $output_path."\\log.txt";
$output_file_path = $output_path."\\TestExcel.xlsx";
open LogFile,">",$log_path or die "Cannot create the log file:$log_path !!!";
print LogFile "Start time :".localtime()."\n";
# Start Excel and make it visible
my $xlApp = Win32::OLE->GetActiveObject('Excel.Application') || Win32::OLE->new('Excel.Application', 'Quit');
$xlApp->{Visible} = 0;
#Opening the work book
my $workBook = $xlApp->Workbooks->Open($excel_path);
#print "X: " . $workBook . " - " . $excel_path . "\n";
my $excelSheet = $workBook->Worksheets("Report");
$excelSheet->Activate();
print "Reading the file...\n";
&ReadExcel();
print LogFile "Completed time :".localtime()."\n";
print "\nCompleted.Please close this window...\n" ;
print "*******************************************************************\n";
# Sub routine to parse the cosipa file
sub ReadExcel()
{
my $row_index;
#Findings the number of valid rows
$no_rows = $excelSheet->UsedRange->Rows->{'Count'};
$lastCol = $excelSheet->UsedRange->Columns->{'Count'};
$row_index = findRowindex();
my #comp_array = ();
# Name => ResourceType => size
my $resultData = {};
for(my $index=($row_index+1);$index<=$no_rows;$index++)
{
my $X = $excelSheet->Cells($index,6)->Value();
my $Y = $excelSheet->Cells($index,7)->Value();
my $name = $excelSheet->Cells($index,9)->Value();
my $resourceType = $excelSheet->Cells($index,3)->Value();
my $size = $excelSheet->Cells($index,2)->Value();
#Name Overview
my $currNameTypeMap;
if ( ! exists $resultNameData->{ $name } ) # ->: arrow operator is used to dereference reference to arrays or hashes.
{
$resultNameData->{ $name } = {};
}
$currNameTypeMap = $resultNameData->{ $name };
$currNameTypeMap->{ $resourceType } += $size;
# Y Overview
my $currYTypeMap;
if ( ! exists $resultYData->{ $Y } ) # ->: arrow operator is used to dereference reference to arrays or hashes.
{
$resultYData->{ $cluster } = {};
}
$currYTypeMap = $resultYData->{ $Y };
$currYTypeMap->{ $resourceType } += $size;
# X Overview
my $currXTypeMap;
if ( ! exists $resultXData->{ $X } ) # ->: arrow operator is used to dereference reference to arrays or hashes.
{
$resultXData->{ $X } = {};
}
$currXTypeMap = $resultXData->{ $X };
$currXTypeMap->{ $resourceType } += $size;
}
my #uniqNameArr = sort keys %$resultNameData;
my #uniqYArr = sort keys %$resultYData;
my #uniqXArr = sort keys %$resultXData;
for my $currName ( #uniqNameArr )
{
print $currName . "\n". " RAM: " . $resultNameData->{ $currName }-> { "RAM" } . ", ROM: " . $resultNameData->{ $currName }-> { "ROM" } . "\n";
#print Dumper %$resultData;
}
print "----------------------------------------------------------------------- \n";
for my $currY ( #uniqYArr )
{
print $currY. "\n". " RAM: " . $resultYData->{ $currY }-> { "RAM" } . ", ROM: " . $resultYData->{ $currY }-> { "ROM" } . "\n";
}
print "------------------------------------------------------------------------ \n";
for my $currX ( #uniqXArr )
{
print $currX . "\n". " RAM: " . $resultXData->{ $currX }-> { "RAM" } . ", ROM: " . $resultXData->{ $currX }-> { "ROM" } . "\n";
}
}
#Sub routine to find the starting row index
sub findRowindex()
{
my $ret = ();
for(my $index=1;$index<$no_rows;$index++)
{
if(defined($excelSheet->Cells($index,1)))
{
my $cel_value = $excelSheet->Cells($index,1)->Value();
if($cel_value =~ m/^Name$/i)
{
$ret = $index;
last;
}
}
}
return $ret;
}
#Trim function
sub trim {
(my $s = $_[0]) =~ s/^\s+|\s+$//g;
return $s;
}
A workaround: You could use Excel::Writer::XLSX to create Excel files, it is working fine and quite robust. Here is how you could convert a tab separated file to Excel.
Reading excel: Spreadsheet::XLSX
use Text::Iconv;
my $converter = Text::Iconv -> new ("utf-8", "windows-1251");
use Spreadsheet::XLSX;
my $excel = Spreadsheet::XLSX -> new ('test.xlsx', $converter);
foreach my $sheet (#{$excel -> {Worksheet}}) {
printf("Sheet: %s\n", $sheet->{Name});
$sheet -> {MaxRow} ||= $sheet -> {MinRow};
foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) {
$sheet -> {MaxCol} ||= $sheet -> {MinCol};
foreach my $col ($sheet -> {MinCol} .. $sheet -> {MaxCol}) {
my $cell = $sheet -> {Cells} [$row] [$col];
if ($cell) {
printf("( %s , %s ) => %s\n", $row, $col, $cell -> {Val});
}
}
}
}
Writing excel: Excel::Writer::XLSX
my $workbook = Excel::Writer::XLSX->new( $xls_filename );
my $worksheet = $workbook->add_worksheet('data');
# Create a format for the headings
my $header_format = $workbook->add_format();
$header_format->set_bold();
$header_format->set_size( 18 );
$header_format->set_color( 'black' );
$header_format->set_align( 'center' );
my $row=0;
while (my $line = <$fh>){
chomp($line);
my #cols = split(/\t/,$line);
for(my $col=0;$col<#cols;$col++){
if ($row == 0 ){
$worksheet->write_string( $row, $col, $cols[$col],$header_format );
} else {
$worksheet->write_string( $row, $col, $cols[$col] );
}
}
$row++;
}
close($fh);
I hope this helps you.
Regards,

perl: persist set of strings with commit support

I have a set of strings that is modified inside a loop of 25k iterations. It's empty at the beginning, but 0-200 strings are randomly added or removed from it in each cycle. At the end, the set contains about 80k strings.
I want to make it resumable. The set should be saved to disk after each cycle and be loaded on resume.
What library can I use? The amount of raw data is ~16M, but the changes are usually small. I don't want it to rewrite the whole store on each iteration.
Since the strings are paths, I'm thinking of storing them in a log file like this:
+a
+b
commit
-b
+d
commit
In the beginning the file is loaded into a hash and then compacted. If there's no commit line in the end, the last block is not taken into account.
The Storable package brings persistence to your Perl data structures (SCALAR, ARRAY, HASH or REF objects), i.e. anything that can be conveniently stored to disk and retrieved at a later time.
I've decided to put away the heavy artillery and write something simple:
package LoL::IMadeADb;
sub new {
my $self;
( my $class, $self->{dbname} ) = #_;
# open for read, then write. create if not exist
#msg "open $self->{dbname}";
open(my $fd, "+>>", $self->{dbname}) or die "cannot open < $self->{dbname}: $!";
seek($fd, 0, 0);
$self->{fd} = $fd;
#msg "opened";
$self->{paths} = {};
my $href = $self->{paths};
$self->{nlines} = 0;
my $lastcommit = 0;
my ( $c, $rest );
while(defined($c = getc($fd)) && substr(($rest = <$fd>), -1) eq "\n") {
$self->{nlines}++;
chomp($rest);
if ($c eq "c") {
$lastcommit = tell($fd);
#msg "lastcommit: " . $lastcommit;
} elsif ($c eq "+") {
$href->{$rest} = undef;
} elsif ($c eq "-") {
delete $href->{$rest};
}
#msg "line: '" . $c . $rest . "'";
}
if ($lastcommit < tell($fd)) {
print STDERR "rolling back incomplete file: " . $self->{dbname} . "\n";
seek($fd, $lastcommit, 0);
while(defined($c = getc($fd)) && substr(($rest = <$fd>), -1) eq "\n") {
$self->{nlines}--;
chomp($rest);
if ($c eq "+") {
delete $href->{$rest};
} else {
$href->{$rest} = undef;
}
}
truncate($fd, $lastcommit) or die "cannot truncate $self->{dbname}: $!";
print STDERR "rolling back incomplete file; done\n";
}
#msg "entries = " . (keys( %{ $href })+0) . ", nlines = " . $self->{nlines} . "\n";
bless $self, $class
}
sub add {
my ( $self , $path ) = #_;
if (!exists $self->{paths}{$path}) {
$self->{paths}{$path} = undef;
print { $self->{fd} } "+" . $path . "\n";
$self->{nlines}++;
$self->{changed} = 1;
}
undef
}
sub remove {
my ( $self , $path ) = #_;
if (exists $self->{paths}{$path}) {
delete $self->{paths}{$path};
print { $self->{fd} } "-" . $path . "\n";
$self->{nlines}++;
$self->{changed} = 1;
}
undef
}
sub save {
my ( $self ) = #_;
return undef unless $self->{changed};
my $fd = $self->{fd};
my #keys = keys %{$self->{paths}};
if ( $self->{nlines} - #keys > 5000 ) {
#msg "compacting";
close($fd);
my $bkpdir = dirname($self->{dbname});
($fd, my $bkpname) = tempfile(DIR => $bkpdir , SUFFIX => ".tmp" ) or die "cannot create backup file in: $bkpdir: $!";
$self->{nlines} = 1;
for (#keys) {
print { $fd } "+" . $_ . "\n" or die "cannot write backup file: $!";
$self->{nlines}++;
}
print { $fd } "c\n";
close($fd);
move($bkpname, $self->{dbname})
or die "cannot rename " . $bkpname . " => " . $self->{dbname} . ": $!";
open($self->{fd}, ">>", $self->{dbname}) or die "cannot open < $self->{dbname}: $!";
} else {
print { $fd } "c\n";
$self->{nlines}++;
# flush:
my $previous_default = select($fd);
$| ++;
$| --;
select($previous_default);
}
$self->{changed} = 0;
#print "entries = " . (#keys+0) . ", nlines = " . $self->{nlines} . "\n";
undef
}
1;