Perl: How come "seek" is not working - perl

I'm trying to go through a bunch of text files twice to look for two different values. However, the seek $fh, 0, 0 doesn't seem to work. Why?
Please help
My codes:
use strict;
use warnings;
...
read_in_data_employer();
read_in_data_union();
process_files ($FileFolder);
close $FileHandle;
...
sub process_files
{
opendir (DIR, $FileFolder)
or die "Unable to open $FileFolder: $!";
my #files = grep { /.pdf.txt/ } readdir (DIR);
closedir (DIR);
#files = map { $FileFolder . '/' . $_ } #files;
foreach my $file (#files)
{
open (my $txtfile, $file) or die "error opening $file\n";
print "$file";
LookForEmployer:
{
print $FileHandle "\t";
while (my $line=<$txtfile>)
{
foreach (#InputData_Employers)
{
if ($line =~ /\Q$_/i)
{
print $FileHandle "$_";
last LookForEmployer;
}
}
}
}
seek ($txtfile, 0, 0);
LookForUnion:
{
print $FileHandle "\t";
while (my $line=<$txtfile>)
{
print "$.\n";
foreach (#InputData_Unions)
{
if ($line =~ /\Q$_/i)
{
print $FileHandle "$_";
last LookForUnion;
}
}
}
}
close $txtfile
}
}
Output:
>perl "test.pl" test "employers.txt" "unions.txt" output.txt
test/611-2643-03 (801-0741).pdf.txt12
13
14
15
16
17
18
19
20
21
22
test/611-2643-05 (801-0741).pdf.txt
7
8
9
10
11
12
test/611-2732-21 (805-0083).pdf.txt
2
3
4
5
6
7
8
test/611-2799-17 (801-0152).pdf.txt
6
7
8
9
10
11
12
13
14
Thanks

Files don't have line numbers. They don't even have lines. Files just have bytes. That means you can't just ask the system "What line of the file is at this position?"
But, since you're seeking to the start of the file, all you need is to reset $..
use Fcntl qw( SEEK_SET );
seek($txtfile, 0, SEEK_SET)
or die("seek: $!\n");
$. = 0;
By the way, you program is insanely inefficient. Load the data into hashes or into a database!

Related

I want to write multiple files from one file without using array to remove complexity

I want to write multiple files from one file (getting latest data every time) without using array to remove complexity. I already tried it using array but when data is high than it will slow down the process.
Kindly give some hint to me how I will remove the complexity of the program.
Input: read a text file from a directory.
Output:
File1.pl - 1 2 3 4 5 6
File2.pl - 6 7 8 9 10
File3.pl -11 12 13 14 15
File4.pl -16 17 18 19 20
I do this using array:
use feature 'state';
open (DATA,"<","e:/today.txt");
#array=<DATA>;
$sizeofarray=scalar #array;
print "Total no. of lines in file is :$sizeofarray";
$count=1;
while($count<=$sizeofarray)
{
open($fh,'>',"E:/e$count.txt");
print $fh "#array[$count-1..($count+3)]\n";
$count+=5;
}
Store lines in a small buffer, and open a file every fifth line and write the buffer to it
use warnings;
use strict;
use feature 'say';
my $infile = shift || 'e:/today.txt';
open my $fh_in, '<', $infile or die "Can't open $infile: $!";
my ($fh_out, #buf);
while (<$fh_in>) {
push #buf, $_;
if ($. % 5 == 0) {
my $file = 'e' . (int $./5) . '.txt';
open $fh_out, '>', $file or do {
warn "Can't open $file: $!";
next;
};
print $fh_out $_ for #buf;
#buf = ();
}
}
# Write what's left over, if any, after the last batch of five
if (#buf) {
my $file = 'e' . ( int($./5)+1 ) . '.txt';
open $fh_out, '>', $file or die "Can't open $file: $!";
print $fh_out $_ for #buf;
}
As I observed from your code You can try this
use warnings;
use strict;
open (my $fh,"<","today.txt") or die "Error opening $!";
my $count = 1;
while(my $line = <$fh>)
{
open my $wh,'>',"e$count.txt" or die "Error creating $!";
print $wh $line;
for(1..4){
if(my $v = scalar <$fh>){
print $wh $v ;
}
else{
last ;
}
}
$count++;
}

Perl script grep

The script is printing the amount of input lines, I want it to print the amount of input lines that are present in another file
#!/usr/bin/perl -w
open("file", "text.txt");
#todd = <file>;
close "file";
while(<>){
if( grep( /^$_$/, #todd)){
#if( grep #todd, /^$_$/){
print $_;
}
print "\n";
}
if for example file contains
1
3
4
5
7
and the input file that will be read from contains
1
2
3
4
5
6
7
8
9
I would want it to print 1,3,4,5 and 7
but 1-9 are being printed instead
UPDATE******
This is my code now and I am getting this error
readline() on closed filehandle todd at ./may6test.pl line 3.
#!/usr/bin/perl -w
open("todd", "<text.txt");
#files = <todd>; #file looking into
close "todd";
while( my $line = <> ){
chomp $line;
if ( grep( /^$line$/, #files) ) {
print $_;
}
print "\n";
}
which makes no sense to me because I have this other script that is basically doing the same thing
#!/usr/bin/perl -w
open("file", "<text2.txt"); #
#file = <file>; #file looking into
close "file"; #
while(<>){
$temp = $_;
$temp =~ tr/|/\t/; #puts tab between name and id
my ($name, $number1, $number2) = split("\t", $temp);
if ( grep( /^$number1$/, #file) ) {
print $_;
}
}
print "\n";
OK, the problem here is - grep sets $_ too. So grep { $_ } #array will always give you every element in the array.
At a basic level - you need to:
while ( my $line = <> ) {
chomp $line;
if ( grep { /^$line$/ } #todd ) {
#do something
}
}
But I'd suggest instead that you might want to consider building a hash of your lines instead:
open( my $input, '<', "text.txt" ) or die $!;
my %in_todd = map { $_ => 1 } <$input>;
close $input;
while (<>) {
print if $in_todd{$_};
}
Note - you might want to watch for trailing linefeeds.

perl simple matching yet not matching it

in blah.txt:
/a/b/c-test
in blah.pl
1 my #dirs;
2 $ws = '/a/b/c-test/blah/blah'; <--- trying to match this
3 sub blah{
4 my $err;
5 open(my $fh, "<", "blah.txt") or $err = "catn do it\n";
6 if ($err) {
7 print $err;
8 return;
9 } else {
10 while(<$fh>){
11 chomp;
12 push #dirs, $_;
13 }
14 }
15 close $fh;
16 print "successful\n";
17 }
18
19
20 blah();
21
22 foreach (#dirs) {
23 print "$_\n"; #/a/b/c-test
24 if ($_ =~ /$ws/ ) { <--- didnt match it
25 print "GOT IT!\n";
26 } else {
27 print "didnt get it\n";
28 }
29 }
~
perl blah.pl
successful
/a/b/c-test
didnt get it
I am not quite sure why it is not matching.
Anyone know?
Consider,
if ($ws =~ /$_/ ) {
instead of,
if ($_ =~ /$ws/ ) {
as /a/b/c-test/blah/blah contains /a/b/c-test string, not otherwise.
As a side notes:
use at least strict and warnings
read and process file in while() loop instead of filling array first
if you must fill array, use my #dirs = <$fh>; chomp(#dirs);

Vertical index Perl

File 1 has ranges 3-9, 2-6 etc
3 9
2 6
12 20
File2 has values: column 1 indicates the range and column 2 has values.
1 4
2 4
3 5
4 4
5 4
6 1
7 1
8 1
9 4
I would like to calculate the sum of values (file2, column2) for ranges in file1). Eg: If range is 3-9, then sum of values will be 5+4+4+1+1+1+4 = 20
What I have tried is:
open (FILE1,"file1.txt");
open (FILE2,"file2.txt");
#file1 = <FILE1>;
#file2 = <FILE2>;
foreach (#file1)
{
#split_file2 = split("\\s",$_); //splitting the file by space
foreach (#file2)
{
#split_file2 = split("\\s",$_); //splitting the file by space
if (#split_file1[0] == #split_file2[0]) //if column0 of file1 matches with column0 of file2
{
$x += #split_file2[1]; //sum the column1 of file2
if ( #split_file2[0] == #split_file1[0] ) //until column1 of file1 = column0 of file2.
{
last;
}
}
}}
Always use use strict; use warnings;.
split /\s/ is easier to read. split ' ' is what you actually want.
Don't use global variables (e.g. for file handles).
It's useful to check if open succeeds, if only by adding or die $!.
Use meaningful names, not file1 and file2.
use strict;
use warnings;
use feature qw( say );
use List::Util qw( sum );
my $file1 = 'file1.txt';
my $file2 = 'file2.txt';
my #file2;
{
open(my $fh, '<', $file2)
or die "Can't open $file2: $!\n";
while (<$fh>) {
my ($k, $v) = split;
$file2[$k] = $v;
}
}
{
open(my $fh, '<', $file1)
or die "Can't open $file1: $!\n";
while (<$fh>) {
my ($start, $end) = split;
say sum grep defined, #file2[$start .. $end];
}
}
Another solution:
#!/usr/bin/perl
use strict;
use warnings;
my $f1 = shift;
my $f2 = shift;
open FH1, "<", $f1 or die "$!\n";
open FH2, "<", $f2 or die "$!\n";
my %data;
while (<FH1>) {
$data{$1} = $2 if ($_ =~ m/^(\d+)\s+(\d+)$/);
}
while (<FH2>) {
if ($_ =~ m/^(\d+)\s+(\d+)$/) {
my $sum;
for ($1..$2) {
$sum += $data{$_} if defined($data{$_});
}
print "sum for $1-$2: $sum\n" if defined($sum);
}
}
close FH1;
close FH2;
Call: script.pl values.txt ranges.txt

How to print lines that don't match?

The script I have written outputs all lines from the file 2 that starts with a number that is in the file 1.
Question
How do I output all the other lines that didn't matched?
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my #res;
open(FILE, '<', "1") or die $!;
while (defined (my $line = <FILE>)) {
chomp $line;
push #res, $line;
}
close FILE;
open(FILE, '<', "2") or die $!;
while (defined (my $line = <FILE>)) {
chomp $line;
$line =~ m/(\d+)/;
if (defined $1) {
foreach my $a (#res) {
if ($a == $1) {
print $line . "\n";
}
}
}
}
close FILE;
File 1
155
156
157
158
159
160
File 2
150 a
151 f
152 r
153 a
154 a
155 a
156 a
157 f
158 f
159 f
Your answer is pretty close actually: it's enough to change this
foreach my $a (#res) {
if ($a == $1) {
print $line . "\n";
}
}
... to this ...
my $found;
foreach my $a (#res) {
if ($a eq $1) { # we compare strings, not numbers, even if these strings are 'numeric'
$found = 1;
print $line . "\n";
last; # no need to look further, we already found an item
}
}
print "Not matched: $line", "\n" unless $found;
Yet still there's something to talk about. ) See, as all these number strings in the first file are unique, it's much better to use a hash for storing them. The code will actually not change that much:
my %digits;
... # in the first file processing loop:
$digits{$line} = 1;
... # in the second file processing loop, instead of foreach:
if ($digits{$1}) {
print $line, "\n";
} else {
print "Not matched: $line", "\n";
}
But the point is that searching in hash is MUCH faster than looping through an array again and again. )
use strict;
use warnings;
my %res;
open(FILE, '<', "1") or die $!;
while (defined (my $line = <FILE>)) {
chomp $line;
$res{$line} = 1;
}
close FILE;
open(FILE, '<', "2") or die $!;
while (defined (my $line = <FILE>)) {
if ($line =~ m/(\d+)/) {
print $line if not $res{$1};
}
}
close FILE;