Detect empty xml element tag - perl

I have to parse an xml document , decode base64 values from specific elements and printout those fields and the corresponding decoded values. Some of the elements don't have value I want to print the name of the element and "no value" string or simply \n, but somehow I cannot match empty string '' or undefined value.
Example input file:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet href="ums_v1.xsl" type="text/xsl"?>
<ums>
<datatype>Report</datatype>
<reference><![CDATA[NDkxNzYwNDAyNjMwODAy]]></reference>
<sequence></sequence>
<calling-party><![CDATA[NDkxNzYwNDAyNjMwOA==]]></calling-party>
<IP></IP>
<called-party><![CDATA[NDk4OTI0NDI0Mzc0]]></called-party>
<start>26/02/14 09:28:55</start>
<settings></settings>
<direction><![CDATA[Z2VzZW5kZXQ=]]></direction>
<result><![CDATA[ZXJmb2xncmVpY2g=]]></result>
<fax-tif></fax-tif>
<fax-jpg></fax-jpg>
<fax-png></fax-png>
<audio-wav></audio-wav>
<audio-mp3></audio-mp3>
<sms></sms>
<mms></mms>
</ums>
My program:
#!/usr/bin/perl -w
use XML::Parser;
use MIME::Base64;
use feature qw/switch/;
my $message; # Hashref containing infos on a message
while ($file = shift(#ARGV)) {
print "========================================================\n";
print "file: $file \n";
print "========================================================\n";
my $parser = new XML::Parser ( Handlers => { # Creates parser object
Start => \&hdl_start,
End => \&hdl_end,
Char => \&hdl_char,
});
$parser->parsefile($file);
print "========================================================\n";
}
sub hdl_start{
my ($p, $elt, %atts) = #_;
$atts{'_str'} = '';
given ($elt) {
when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
$message = \%atts;
}
when (/^audio-mp3/) {
$message = \%atts;
}
when (/^audio-mp3/) {
$message = \%atts;
}
when (/^mms/) {
$message = \%atts;
}
}
}
sub hdl_end{
my ($p, $elt) = #_;
given ($elt) {
when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
print "$elt : " ;
format_message($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^audio-mp3/) {
print "audio content in $file\.mp3\n" ;
format_mp3($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^audio-wav/) {
print "audio content in $file\.wav\n" ;
format_wav($message) if $message && $message->{'_str'} =~ /\S/;
# print "$Audiowav->{'_str'}";
}
when (/^mms/) {
print "mms content in $file\.mms, depending on the mms content further processing may be needed\n" ;
format_mms($message) if $message && $message->{'_str'} =~ /\S/;
}
}
}
sub hdl_char {
my ($p, $str) = #_;
$message->{'_str'} .=$str;
}
sub hdl_def { } # We just throw everything else
sub format_message { # Helper sub to nicely format what we got from the XML and decode base64 values of the needed attributes
my $atts = shift;
$atts->{'_str'} =~ s/\n//g;
if (!defined($atts->{'_str'})) { print "\n"}
my $decoded = decode_base64($atts->{'_str'});
print " $decoded\n";
# if (!$decoded) {print "\n"}
undef $message;
}
sub format_mp3 { # decode base64 content and save to mp3 file - for the audio-mp3 tag
my $atts = shift;
open FILE, "> ./$file.mp3" or die $!;
$atts->{'_str'} =~ s/\n//g;
# print "mp3: $atts $atts->{'_str'}\n";
my $decoded = decode_base64($atts->{'_str'});
print FILE $decoded;
close FILE;
undef $message;
}
sub format_wav { # decode base64 content and save to wav file - for the audio-wav tag
my $atts = shift;
open FILE, "> ./$file.wav" or die $!;
$atts->{'_str'} =~ s/\n//g;
print "wav: $atts $atts->{'_str'}\n";
my $decoded = decode_base64($atts->{'_str'});
print "$decoded\n";
print FILE $decoded;
close FILE;
undef $message;
}
sub format_mms { # decode mms base64 content and save to file - depending on the content further processing may be needed
my $atts = shift;
open FILE, "> ./$file.wav" or die $!;
$atts->{'_str'} =~ s/\n//g;
print "wav: $atts $atts->{'_str'}\n";
my $decoded = decode_base64($atts->{'_str'});
print "$decoded\n";
print FILE $decoded;
close FILE;
undef $message;
}
I've tried in format_message subroutine different matches, I've tried also in the hdl_end - any idea?
thanks in advance

Using XML::LibXML:
use strict;
use warnings;
use XML::LibXML;
my $string = do { local $/; <DATA> };
my $dom = XML::LibXML->load_xml(string => $string);
for my $node ($dom->findnodes(q{//*})) {
if ($node->textContent() eq '') {
print $node->nodeName, "\n";
}
}
__DATA__
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet href="ums_v1.xsl" type="text/xsl"?>
<ums>
<datatype>Report</datatype>
<reference><![CDATA[NDkxNzYwNDAyNjMwODAy]]></reference>
<sequence></sequence>
<calling-party><![CDATA[NDkxNzYwNDAyNjMwOA==]]></calling-party>
<IP></IP>
<called-party><![CDATA[NDk4OTI0NDI0Mzc0]]></called-party>
<start>26/02/14 09:28:55</start>
<settings></settings>
<direction><![CDATA[Z2VzZW5kZXQ=]]></direction>
<result><![CDATA[ZXJmb2xncmVpY2g=]]></result>
<fax-tif></fax-tif>
<fax-jpg></fax-jpg>
<fax-png></fax-png>
<audio-wav></audio-wav>
<audio-mp3></audio-mp3>
<sms></sms>
<mms></mms>
</ums>
Outputs:
sequence
IP
settings
fax-tif
fax-jpg
fax-png
audio-wav
audio-mp3
sms
mms

Solution is to test the empty tag in the end handler, like:
sub hdl_end{
my ($p, $elt) = #_;
given ($elt) {
when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
print "$elt : " ;
format_message($message) if $message && $message->{'_str'} =~ /\S/;
print "no value\n" if $message && $message->{'_str'} !~ /\S/;
}
when (/^audio-mp3/) {
print "audio content in $file\.mp3\n" if $message && $message->{'_str'} =~ /\S/;
format_mp3($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^audio-wav/) {
print "audio content in $file\.wav\n" if $message && $message->{'_str'} =~ /\S/ ;
format_wav($message) if $message && $message->{'_str'} =~ /\S/;
}
when (/^mms/) {
print "mms content in $file\.mms, depending on the mms content further processing may be needed\n" if $message && $message->{'_str'} =~ /\S/;
format_mms($message) if $message && $message->{'_str'} =~ /\S/;
}
}
}
So the if $message->{'_str'} !~ /\S/ is what I needed ... Thank you all for the effort!

Related

Config::Simple, how to change default output header?

Testing the generation on the fly of config files in ini format with the package Config::Simple generates the desired file but always with the same header section, which includes the name of the Perl package at the beginning of the file. Does Config::Simple have a method to modify this default printing? I would like to replace this name with a new one. Is it possible with some feature in the package?
Here is a toy code:
#!/usr/bin/perl
use strict;
use warnings;
use Config::Simple;
my $cfg = new Config::Simple(
syntax => 'ini'
) or die Config::Simple->error();
$cfg->param("Program.mode", "ALL");
$cfg->param("Program.strategies", "1,2,3,4,5,6,7,8,9,10");
$cfg->param("Data.name_specie", "Homo sapiens");
$cfg->write("test.cfg") or die $cfg->error();
The output:
; Config::Simple 4.58
; Mon Mar 16 12:33:55 2020
[Program]
strategies=1,2,3,4,5,6,7,8,9,10
mode=ALL
[Data]
name_specie=Homo sapiens
Just wanted to replace the ; Config::Simple 4.58 line.
Thanks in advance for your suggestions.
"Simple" modules often aren't simple because they make so many decisions for you. If you don't like those decisions, you are stuck. This particular module hasn't been updated in over a decade and has several architectural issues. If you want INI files, use a different module, such as Config:: IniFiles, Config::Tiny, or Config::INI which are maintained and flexible.
My first thought was to subclass and override the method that adds the header since it was hard-baked into one of the methods. This is onerous because the module uses a combination of "private" subroutines and package variables. I tend to try this first because I don't disturb the original package:
use strict;
use warnings;
use v5.12;
use Config::Simple;
package Local::Config::Simple {
use parent qw(Config::Simple);
# generates a writable string
sub as_string {
my $self = shift;
my $syntax = $self->{_SYNTAX} or die "'_SYNTAX' is not defined";
my $sub_syntax = $self->{_SUB_SYNTAX} || '';
my $currtime = localtime;
my $STRING = undef;
if ( $syntax eq 'ini' ) {
while ( my ($block_name, $key_values) = each %{$self->{_DATA}} ) {
unless ( $sub_syntax eq 'simple-ini' ) {
$STRING .= sprintf("[%s]\n", $block_name);
}
while ( my ($key, $value) = each %{$key_values} ) {
my $values = join (Config::Simple::WRITE_DELIM(), map { Config::Simple::quote_values($_) } #$value);
$STRING .= sprintf("%s=%s\n", $key, $values );
}
$STRING .= "\n";
}
} elsif ( $syntax eq 'http' ) {
while ( my ($key, $value) = each %{$self->{_DATA}} ) {
my $values = join (Config::Simple::WRITE_DELIM(), map { Config::Simple::quote_values($_) } #$value);
$STRING .= sprintf("%s: %s\n", $key, $values);
}
} elsif ( $syntax eq 'simple' ) {
while ( my ($key, $value) = each %{$self->{_DATA}} ) {
my $values = join (Config::Simple::WRITE_DELIM(), map { Config::Simple::quote_values($_) } #$value);
$STRING .= sprintf("%s %s\n", $key, $values);
}
}
$STRING .= "\n";
return $STRING;
}
}
my $cfg = Local::Config::Simple->new(
syntax => 'ini'
) or die Config::Simple->error();
$cfg->param("Program.mode", "ALL");
$cfg->param("Program.strategies", "1,2,3,4,5,6,7,8,9,10");
$cfg->param("Data.name_specie", "Homo sapiens");
$cfg->write("file.ini") or die $cfg->error();
That works and gives the output:
[Data]
name_specie=Homo sapiens
[Program]
mode=ALL
strategies=1,2,3,4,5,6,7,8,9,10
However, it broke several OO ideas, so I find this approach unpleasant. I can do a little bit less work by fixing the original package by redefining the original subroutine. Then the package variables and subroutines still work. Load the original module first then add your redefinitions:
use strict;
use warnings;
use v5.12;
use Config::Simple;
no warnings 'redefine';
package Config::Simple {
# generates a writable string
sub as_string {
my $self = shift;
my $syntax = $self->{_SYNTAX} or die "'_SYNTAX' is not defined";
my $sub_syntax = $self->{_SUB_SYNTAX} || '';
my $currtime = localtime;
my $STRING = undef;
if ( $syntax eq 'ini' ) {
while ( my ($block_name, $key_values) = each %{$self->{_DATA}} ) {
unless ( $sub_syntax eq 'simple-ini' ) {
$STRING .= sprintf("[%s]\n", $block_name);
}
while ( my ($key, $value) = each %{$key_values} ) {
my $values = join (WRITE_DELIM, map { quote_values($_) } #$value);
$STRING .= sprintf("%s=%s\n", $key, $values );
}
$STRING .= "\n";
}
} elsif ( $syntax eq 'http' ) {
while ( my ($key, $value) = each %{$self->{_DATA}} ) {
my $values = join (WRITE_DELIM, map { quote_values($_) } #$value);
$STRING .= sprintf("%s: %s\n", $key, $values);
}
} elsif ( $syntax eq 'simple' ) {
while ( my ($key, $value) = each %{$self->{_DATA}} ) {
my $values = join (WRITE_DELIM, map { quote_values($_) } #$value);
$STRING .= sprintf("%s %s\n", $key, $values);
}
}
$STRING .= "\n";
return $STRING;
}
}
my $cfg = Config::Simple->new(
syntax => 'ini'
) or die Config::Simple->error();
$cfg->param("Program.mode", "ALL");
$cfg->param("Program.strategies", "1,2,3,4,5,6,7,8,9,10");
$cfg->param("Data.name_specie", "Homo sapiens");
$cfg->write("file.ini") or die $cfg->error();
I write quite a bit about this in Effective Perl Programming as a way to deal with legacy code.
As a side note, you asked if there was some method in the module. You could have simply looked at the source to see what was happening and what was available. You would have seen that the header was hard-coded into as_string.

Perl script to convert xls to csv

This script coverts xls to csv ok.
The challenge is that it does not convert blank cell in the xls to blanks in csv file.
Any help is appreciated: UPDATED SCRIPT
#!/usr/bin/perl
use strict;
use Spreadsheet::ParseExcel;
use Text::CSV;
my $sourcename = shift #ARGV or die "invocation: $0 <source file>\n";
my $source_excel = new Spreadsheet::ParseExcel;
my $source_book = $source_excel->Parse($sourcename)
or die "Could not open source Excel file $sourcename: $!";
my $storage_book;
foreach my $source_sheet_number (0 .. $source_book->{SheetCount}-1) {
my $source_sheet = $source_book->{Worksheet}[$source_sheet_number];
print "--------- SHEET:", $source_sheet->{Name}, "\n";
next unless defined $source_sheet->{MaxRow};
next unless $source_sheet->{MinRow} <= $source_sheet->{MaxRow};
next unless defined $source_sheet->{MaxCol};
next unless $source_sheet->{MinCol} <= $source_sheet->{MaxCol};
foreach my $row_index ($source_sheet->{MinRow} .. $source_sheet->{MaxRow}) {
foreach my $col_index ($source_sheet->{MinCol} .. $source_sheet->{MaxCol}) {
my $source_cell = $source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell && $source_cell->Value) {
#print "( $row_index , $col_index ) =>", $source_cell->Value, "\t;";
print $source_cell->Value, ";";
}
else
{
print ";"
}
}
}
}
sample excel
EFG KDD ABS JME
FGO POP JET
converted as:
EFG;KDD;ABS;JME;
FGO;POP;JET;
but it should be:
EFG;KDD;ABS;JME;
FGO;;POP;JET;
You have to check if the value of the cell is initialized, not the cell it self.
Change:
if ($source_cell) {
#print "( $row_index , $col_index ) =>", $source_cell->Value, "\t;";
print $source_cell->Value, ";";
}
To:
if ($source_cell && $source_cell->Value) {
#print "( $row_index , $col_index ) =>", $source_cell->Value, "\t;";
print $source_cell->Value, ";";
} else {
print ";";
}
should work.
UPDATE:
foreach my $row_index ($source_sheet->{MinRow} .. $source_sheet->{MaxRow}) {
foreach my $col_index ($source_sheet->{MinCol} .. $source_sheet->{MaxCol}) {
my $source_cell = $source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell && $source_cell->Value) {
print $source_cell->Value.";";
} else {
print ";";
}
}
print "\n";
}
}

Perl scripting to get "Wanted Data"

I have data like below:
Source file: input.txt containing many records with a format like this:
09.05.2014 09.49.52:359 RID:routerNode1#app1:1662306081,msisdn:787887225696,sid:93889095007001,tid:1405090902095648846024000,status:2,time:20140509094952,reasonELIVRD,refund:null,status2:null
output1.txt:
09.05.2014,09.49.52,routerNode1#app1:1662306081,787887225696,93889095007001,1405090902095648846024000,2,20140509094952
output2.txt
09-05-2014,09-49-52,routerNode1#app1:1662306081,787887225696,93889095007001,1405090902095648846024000,2,20140509094952
Please help me to build the perl program to read from input file and then write to the output file.
this code read from the line, not yet read from input file. But still problem
data given is not truedata given is not truedata given is not truedata given is not true,787887225696,93889095007001,1405090902095648846024000,2,20140509094952
#!/usr/bin/perl
use warnings;
use strict;
my $str = <<'_STR_';
09.05.2014 09.49.52:359 RID:routerNode1#app1:1662306081,msisdn:787887225696,sid:93889095007001,tid:1405090902095648846024000,status:2,time:20140509094952,reason:DELIVRD,refund:null,status2:null
_STR_
my $wanted_data;
for ( split /,/, $str ) {
if (/(\d{2}\.\d{2}\.\d{4}' '\d{2}\.\d{2}\.\d{2}).+RID:(\d+)/sm) {
my ( $format_date, $rid )= ( $1, $2 );
$format_date =~ s/(\d{4})(\d{2})(\d{2})/$1-$2-$3/;
$wanted_data .= $format_date . ',' . $rid;
}
elsif (/msisdn:(.+)/) {
$wanted_data .= ',' . $1;
}
elsif (/sid:(.+)/) {
$wanted_data .= ',' . $1;
}
elsif (/tid:(.+)/) {
$wanted_data .= ',' . $1;
}
elsif (/status:(.+)/) {
$wanted_data .= ',' . $1;
}
elsif (/time:(.+)/) {
$wanted_data .= ',' . $1;
}
else{ print "data given is not true"}
}
print $wanted_data, $/;
You could try the following perl script:
#! /usr/bin/perl
use warnings;
use strict;
while (<>) {
my #out;
my #fld=split(/,/);
my ($format_date, $rid)=$fld[0]=~/(\d{2}\.\d{2}\.\d{4} \d{2}\.\d{2}\.\d{2}).+RID:(.*)$/;
badInput() if ! defined $format_date;
my #f=split(" ",$format_date);
push(#out,$f[0]=~s/\./-/gr);
push(#out,$f[1]=~s/\./-/gr);
push(#out,$rid);
my $i=1;
for (qw(msisdn sid tid status time)) {
my ($id)=$fld[$i++]=~/$_:(.*)$/;
badInput() if ! defined $id;
push(#out,$id);
}
print join(",",#out)."\n";
}
sub badInput {
die "Data given is not true!";
}

parsing abnf grammar using perl

Thanks for the inputs for question posted at link Parse::ABNF perl usage . I am still facing difficulty in resolving my problem. Request to check my problem below and provide pointers towards solution.
For now I have created the sip grammar in ABNF format in a file(named it as sip.abnf).
I have sip messages with headers in a file like below (recd_message.txt):
From: <sip:07455900064#212.136.178.216:5060;user=phone>;tag=1526438727-1338998848384-
To: "govoice-lab2-aokanlawon"<sip:441127653485#isf.fire.ipgcom.com>
Contact: <sip:07455900064#10.12.33.29:5070;transport=udp>
Allow: ACK,BYE,CANCEL,INFO,INVITE,OPTIONS,PRACK,REFER,NOTIFY
Accept: multipart/mixed,application/media_control+xml,application/sdp
I created a Perl program to use the ABNF grammar to validate the above headers messages named it as testSipHeader.pl with below content:
use strict; use warnings;
use File::Slurp;
use Parse::ABNF ;
use Data::Dumper;
my $grammar_file = shift;
my $messages = shift;
my #header_name;
my $header_status;
my $grammar = scalar read_file( $grammar_file );
$grammar =~ s/\x0d\x0a/\n/g;
$grammar =~ s/^\s+(?=[\w-]+\s*=)//mg;
$grammar = Parse::ABNF->new->parse( scalar read_file( $grammar_file ) );
if (defined $grammar) {
print "Grammar is now defined...\n";
my $header;
open ( $header , "<", $messages) or die "Could not open $messages file";
while(<$header>) {
print "Processing the message $_\n";
#header_name = split(': ', $_);
if ($header_name[0] eq "From") {
$header_status = $grammar->From($_) ;
} elsif ($header_name[0] eq "To") {
$header_status = $grammar->To($_) ;
} elsif ($header_name[0] eq "Contact") {
$header_status = $grammar->Contact($_) ;
} elsif ($header_name[0] eq "Allow") {
$header_status = $grammar->Allow($_) ;
} elsif ($header_name[0] eq "Accept") {
$header_status = $grammar->Accept($_) ;
} else {
print "Error: Unsupported header $header_name[0] received\n";
}
}
} else {
print "Error: grammar is not defined\n";
}
Perl invokation and output/error is below
$> perl -I. testSipHeader.pl sip.abnf recd_messages.txt
Grammar is now defined...
Processing the message From: <sip:07455900064#212.136.178.216:5060;user=phone>;tag=1526438727-
Can't call method "From" on unblessed reference at testSipHeader.pl line 21, <$header> line 1.
Note: Currently I have C program that generates the SIP headers, and I am trying to validate the header's content by this perl functions. And I am trying to use similar to usage as given at link Parse::RecDescent grammar not working as expected
I had to modify a little in your script to include Parse::ABNF and handle input file and after that the output received is present in path https://drive.google.com/file/d/0B8KDQDXsCKR_ZERzV3IyY1M2NW8/edit?usp=sharing
I have rewrited you script a bit, what is the output?
use Data::Dumper;
my $grammar_rules;
{
local $/=undef;
open(my $fh,'<',$grammar_file) or die $grammar_file,$!;
$grammar_rules = <$fh>;
$grammar_rules =~ s/\x0d\x0a/\n/g;
$grammar_rules =~ s/^\s+(?=[\w-]+\s*=)//mg;
}
print Dumper('rules',$grammar_rules);
my $grammar = Parse::ABNF->new->parse( $grammar_rules );
print Dumper('grammar',$grammar);
die "Error: grammar is not defined" if ! defined $grammer;
print "Grammar is now defined...\n";
open ( my $header_fh , "<", $messages) or die "Could not open $messages file, $!";
while(my $line = <$header_fh>) {
chomp($line);
print "Processing the message $line\n";
#header_name = split(': ', $line);
if ($header_name[0] eq "From") {
$header_status = $grammar->From($line) ;
} elsif ($header_name[0] eq "To") {
$header_status = $grammar->To($line) ;
} elsif ($header_name[0] eq "Contact") {
$header_status = $grammar->Contact($line) ;
} elsif ($header_name[0] eq "Allow") {
$header_status = $grammar->Allow($line) ;
} elsif ($header_name[0] eq "Accept") {
$header_status = $grammar->Accept($line) ;
} else {
print "Error: Unsupported header $header_name[0] received\n";
}
}

perl: persist set of strings with commit support

I have a set of strings that is modified inside a loop of 25k iterations. It's empty at the beginning, but 0-200 strings are randomly added or removed from it in each cycle. At the end, the set contains about 80k strings.
I want to make it resumable. The set should be saved to disk after each cycle and be loaded on resume.
What library can I use? The amount of raw data is ~16M, but the changes are usually small. I don't want it to rewrite the whole store on each iteration.
Since the strings are paths, I'm thinking of storing them in a log file like this:
+a
+b
commit
-b
+d
commit
In the beginning the file is loaded into a hash and then compacted. If there's no commit line in the end, the last block is not taken into account.
The Storable package brings persistence to your Perl data structures (SCALAR, ARRAY, HASH or REF objects), i.e. anything that can be conveniently stored to disk and retrieved at a later time.
I've decided to put away the heavy artillery and write something simple:
package LoL::IMadeADb;
sub new {
my $self;
( my $class, $self->{dbname} ) = #_;
# open for read, then write. create if not exist
#msg "open $self->{dbname}";
open(my $fd, "+>>", $self->{dbname}) or die "cannot open < $self->{dbname}: $!";
seek($fd, 0, 0);
$self->{fd} = $fd;
#msg "opened";
$self->{paths} = {};
my $href = $self->{paths};
$self->{nlines} = 0;
my $lastcommit = 0;
my ( $c, $rest );
while(defined($c = getc($fd)) && substr(($rest = <$fd>), -1) eq "\n") {
$self->{nlines}++;
chomp($rest);
if ($c eq "c") {
$lastcommit = tell($fd);
#msg "lastcommit: " . $lastcommit;
} elsif ($c eq "+") {
$href->{$rest} = undef;
} elsif ($c eq "-") {
delete $href->{$rest};
}
#msg "line: '" . $c . $rest . "'";
}
if ($lastcommit < tell($fd)) {
print STDERR "rolling back incomplete file: " . $self->{dbname} . "\n";
seek($fd, $lastcommit, 0);
while(defined($c = getc($fd)) && substr(($rest = <$fd>), -1) eq "\n") {
$self->{nlines}--;
chomp($rest);
if ($c eq "+") {
delete $href->{$rest};
} else {
$href->{$rest} = undef;
}
}
truncate($fd, $lastcommit) or die "cannot truncate $self->{dbname}: $!";
print STDERR "rolling back incomplete file; done\n";
}
#msg "entries = " . (keys( %{ $href })+0) . ", nlines = " . $self->{nlines} . "\n";
bless $self, $class
}
sub add {
my ( $self , $path ) = #_;
if (!exists $self->{paths}{$path}) {
$self->{paths}{$path} = undef;
print { $self->{fd} } "+" . $path . "\n";
$self->{nlines}++;
$self->{changed} = 1;
}
undef
}
sub remove {
my ( $self , $path ) = #_;
if (exists $self->{paths}{$path}) {
delete $self->{paths}{$path};
print { $self->{fd} } "-" . $path . "\n";
$self->{nlines}++;
$self->{changed} = 1;
}
undef
}
sub save {
my ( $self ) = #_;
return undef unless $self->{changed};
my $fd = $self->{fd};
my #keys = keys %{$self->{paths}};
if ( $self->{nlines} - #keys > 5000 ) {
#msg "compacting";
close($fd);
my $bkpdir = dirname($self->{dbname});
($fd, my $bkpname) = tempfile(DIR => $bkpdir , SUFFIX => ".tmp" ) or die "cannot create backup file in: $bkpdir: $!";
$self->{nlines} = 1;
for (#keys) {
print { $fd } "+" . $_ . "\n" or die "cannot write backup file: $!";
$self->{nlines}++;
}
print { $fd } "c\n";
close($fd);
move($bkpname, $self->{dbname})
or die "cannot rename " . $bkpname . " => " . $self->{dbname} . ": $!";
open($self->{fd}, ">>", $self->{dbname}) or die "cannot open < $self->{dbname}: $!";
} else {
print { $fd } "c\n";
$self->{nlines}++;
# flush:
my $previous_default = select($fd);
$| ++;
$| --;
select($previous_default);
}
$self->{changed} = 0;
#print "entries = " . (#keys+0) . ", nlines = " . $self->{nlines} . "\n";
undef
}
1;