perl redirect stdout to lexical filehandle - perl

I'm trying to write a helper function that runs a perl function in another process and returns a closure that produces a line of output at a time when called.
I figured out a way of doing this using pipe that mixes old and new-style filehandles. I used an old-style one for the sink in order to use the open(STDOUT, ">&thing") syntax and a new-style one for the source since it needs to be captured by a closure and I didn't want to burden the caller with providing a filehandle.
Is there a way of using a new-style filehandle in a construction with the same meaning as open(STDOUT, ">&thing")?
#!/usr/bin/env perl
# pipe.pl
# use pipe() to create a pair of fd's.
# write to one and read from the other.
#
# The source needs to be captured by the closure and can't be
# destructed at the end of get_reader(), so it has to be lexical.
#
# We need to be able to redirect stdout to sink in such a way that
# we actually dup the file descriptor (so shelling out works as intended).
# open(STDOUT, ">&FILEHANDLE") achieves this but appears to require an
# old-style filehandle.
use strict;
use warnings;
sub get_reader {
local *SINK;
my $source;
pipe($source, SINK) or die "can't open pipe!";
my $cpid = fork();
if ($cpid == -1) {
die 'failed to fork';
}
elsif ($cpid == 0) {
open STDOUT, ">&SINK" or die "can't open sink";
system("echo -n hi");
exit;
}
else {
return sub {
my $line = readline($source);
printf "from child (%s)\n", $line;
exit;
}
}
}
sub main {
my $reader = get_reader();
$reader->();
}
main();
When run, this produces
from child (hi)
as expected.

sub get_reader {
my ($cmd) = #_;
open(my $pipe, '-|', #$cmd);
return sub {
return undef if !$pipe;
my $line = <$pipe>;
if (!defined($line)) {
close($pipe);
$pipe = undef;
return undef;
}
chomp($line);
return $line;
};
}
If that's not good enough (e.g. because you also need to redirect the child's STDIN or STDERR), you can use IPC::Run instead.
use IPC::Run qw( start );
sub get_reader {
my ($cmd) = #_;
my $buf = '';
my $h = start($cmd, '>', \$buf);
return sub {
return undef if !$h;
while (1) {
if ($buf =~ s/^([^\n]*)\n//) {
return $1;
}
if (!$h->pump())) {
$h->finish();
$h = undef;
return substr($buf, 0, length($buf), '') if length($buf);
return undef;
}
}
};
}
Either way, you can now do
my $i = get_reader(['prog', 'arg', 'arg']);
while (defined( my $line = $i->() )) {
print "$line\n";
}
Either way, error handling left to you.

Related

perl: How to make 'warn' think we read from a file?

I have a function (a variation of string++):
sub inc
{
$_[0] =~ /^(.*?)([0-9]+)$/;
my ($a,$b)=($1,$2);
die "cannot increment [$_[0]]" unless defined $b;
warn "increment overflow [$_[0]]" if length(++$b) != length($2);
$a.$b;
}
It is invoked in many places of a script, on different data (sometimes from a file, sometimes from a database).
When I read from a filehandle, die and warn print a message like this:
cannot increment [abc] at script line 5, <filehandle> line 123.
otherwise a shorter message is printed:
cannot increment [abc] at script line 5.
When I read from database I would like to have a message like this:
cannot increment [abc] at script line 5, <SELECT...> line 123.
Is it possible?
Setting the line number is quite simple: an assignment to $. can be made. But how to set the 'filehandle' part and make it visible?
I have found such a workaround:
my $fh = "SELECT...";
open $fh, "/dev/null";
<$fh>;
but it is a bit long, and it actually does open a file.
The filehandle information that appears in warn and die messages is only set after calls to <HANDLE>, readline, tell, eof, and seek. When you fetch data from a database with DBI, for example, you're not calling any of these, so you have to pass the extra data yourself.
One way to do this is to write a custom exception class that stringifies to the text you want:
package MyException;
use strict;
use warnings 'all';
use v5.18.0;
use overload '""' => \&as_string;
sub new {
my ($self, $message, $src, $src_line) = #_;
my ($package, $file, $line) = caller;
if (! defined $src && ref ${^LAST_FH} eq 'GLOB') {
$src = *${^LAST_FH}{NAME};
$src_line = $.;
}
bless { message => $message,
file => $file,
line => $line,
src => $src,
src_line => $src_line }, $self;
}
sub as_string {
my ($self) = #_;
my $message = "$self->{message} at $self->{file} line $self->{line}";
if (defined $self->{src} && defined $self->{src_line}) {
$message .= ", <$self->{src}> line $self->{src_line}";
}
$message .= "\n";
}
1;
Note that Perl 5.18.0 or up is required to use the read-only ${^LAST_FH} variable, which holds a reference to the last read filehandle.
Here's how you would use this when reading from a file:
use strict;
use warnings 'all';
use MyException;
while (<DATA>) {
warn MyException->new('foo'); # equivalent to warn 'foo'
}
__DATA__
first
second
Output:
foo at ./myscript line 9, <DATA> line 1
foo at ./myscript line 9, <DATA> line 2
And here's how you would use it when fetching records from a database:
use strict;
use warnings 'all';
use DBI;
use MyException;
my $dbh = DBI->connect('dbi:mysql:test', 'user', 'pass', {
RaiseError => 1
});
my $sql = 'SELECT * FROM test';
my $sth = $dbh->prepare($sql);
$sth->execute;
my $count;
while (my $row = $sth->fetch) {
warn MyException->new('foo', $sql, ++$count);
}
Output:
foo at ./myscript line 19, <SELECT * FROM test> line 1
foo at ./myscript line 19, <SELECT * FROM test> line 2
(Unfortunately, DBI doesn't provide a method to get the number of rows that have been fetched so far, so you have to count them yourself.)
Since you're trying to warn or die from inside a subroutine, you have to do a little bit more work. The simplest approach for die would be to trap exceptions from your subroutine with eval and re-throw them:
my $count = 1;
while (my $row = $sth->fetch) {
eval {
inc($row[0]);
};
if ($# =~ /^(cannot increment \[.*?\])/) {
die MyException->new($1, $sql, $count);
}
elsif ($#) {
die $#;
}
$count++;
}
You can handle warnings in a similar way by creating a __WARN__ handler:
{
my $count = 1;
local $SIG{__WARN__} = sub {
if ($_[0] =~ /^(increment overflow \[.*?\])/) {
warn MyException->new($1, $sql, $count);
}
else {
warn #_;
}
};
while (my $row = $sth->fetch) {
inc($row[0]);
$count++;
}
}
You may prefer this implementation of your inc subroutine. Your own uses the reserved variables $a and $b, as well as saving and retrieving the initial non-numeric part of the string
Note that the STDERR output is not in sync with STDOUT, so the warning appears prematurely in the aggregated text. In reality the warning is issued only when the passed string has an all-nines numeric field
use strict;
use warnings 'all';
my $s = 'ZZ90';
for ( 1 .. 20 ) {
$s = inc_str($s);
print $s, "\n";
}
sub inc_str {
my ($str) = #_;
$str =~ s{([0-9]+)$}{
my $num = $1;
warn "Increment overflow [$str]" unless $num =~ /[^9]/;
sprintf '%0*d', length($num), $num+1;
}e or die "Cannot increment [$str]";
return $str;
}
output
Increment overflow [ZZ99] at E:\Perl\source\inc_str.pl line 18.
ZZ91
ZZ92
ZZ93
ZZ94
ZZ95
ZZ96
ZZ97
ZZ98
ZZ99
ZZ100
ZZ101
ZZ102
ZZ103
ZZ104
ZZ105
ZZ106
ZZ107
ZZ108
ZZ109
ZZ110

Preserve local context across nested subroutines

Let's consider the wanted code below. I have recursive calls to process and for each recursion I use a local %context. In this way I can get my context back when I return from a call.
sub process {
my %context; # Local context
process() if rerun();
job1();
job2();
sub job1() {print $context{foo}}
sub job2() {print $context{bar}}
}
Unfortunately perl does not manage nested subroutines as I expected. By moving my subroutines outside from the process subroutine I will get a problem because I won't be able to access %context anymore. So I need to make it global and use a stack as follow:
my %context; # Local context
my #context_stack;
sub process {
push #context_stack, %context;
%context = undef;
process() if rerun();
job1();
job2();
%context = pop #context_stack;
}
sub job1() {print $context{foo}}
sub job2() {print $context{bar}}
The third solution is to pass the context to all subroutines which can be annoying for very small subroutines. Also %context become global to all my program. So I loose the privacy of this variable.
my %context; # Local context
my #context_stack;
sub process {
push #context_stack, %context;
%context = undef;
process() if rerun(\%context);
job1(\%context);
job2(\%context);
%context = pop #context_stack;
}
sub job1() {$context = shift; print $context->{foo}}
sub job2() {$context = shift; print $context->{bar}}
What would be the best approach?
EDIT
For a better understanding of my specific, I provide another example:
process(#ARGV);
exit 0;
sub process {
my $infile = shift;
my $outfile = shift;
open my $fp_in, '<', $infile;
open my $fp_out, '>', $outfile;
LINE: while(<$fp_in>) {
remove_c_comment();
say STDERR "File is $infile";
process($1, "$1.processed") if /#include "(.*?)";
warning("Huh raisin, no!") if /\braisin/;
say STDERR "Fill is still $infile";
print $fp_out $_;
}
sub remove_c_comment { s|//.*$|| }
sub warning { say "[Warning] $infile:$. ".shift() }
}
The thing you're looking for - but you may not know it - is called a closure. (see also: perlref)
{
my %context;
sub job1 { print $context{foo} };
sub job2 { print $context{bar} };
sub init_context{ $context{foo} = 1 };
}
Context remains private within this block, but accessible to all the subroutines.
As an alternative - you can return a code reference from a subroutine - like this:
use strict;
use warnings;
sub make_sub_with_context {
my %context;
$context{"bar"} = 1;
return sub { print $context{"bar"}++ };
}
my $job1_with_context = make_sub_with_context();
my $another_job_with_context = make_sub_with_context();
$job1_with_context->();
$job1_with_context->();
$another_job_with_context->();
$another_job_with_context->();
$another_job_with_context->();
Which may be a better example.
Edit:
Following on from your updated example it looks like your problem spec is to iterate a set of files, and (recursively) traverse referenced files.
Sort of like a find but following include directives. I would point out that by doing it that way, what you're doing is potentially going to end up with a loop, which isn't ideal.
Can I suggest instead taking a different approach? Don't recurse:
use strict;
use warnings;
my #files_to_process = #ARGV;
my %done;
while ( my $infile = pop #files_to_process ) {
next if $done{$infile}++;
open my $fp_in, '<', $infile or die $!;
open my $fp_out, '>', $infile . ".processed" or die $!;
while ( my $line = <$fp_in> ) {
$line =~ s|\/\/.*$||;
if ( my ($include) = ( $line =~ m/#include "(.*?)"/ ) ) {
push #files_to_process, $include;
}
print {$fp_out} $line;
}
close($fp_out);
close($fp_in);
}
With a bit more thought, and the expansion that this task needs to process stuff in declaration order - I'd offer instead - perhaps taking an OO approach would help. Something like:
use strict;
use warnings;
package parser;
sub new {
my ($class) = #_;
my $self = {};
bless $self, $class;
return $self;
}
sub process {
my ( $self, $infile, $outfile ) = #_;
open my $fp_in, '<', $infile;
open my $fp_out, '>', $outfile;
LINE: while ( my $line = <$fp_in> ) {
$line =~ s|\/\/.*$||;
say STDERR "File is $infile";
if ( my ($includefile) = ( $line =~ m/#include "(.*?)"/ ) ) {
my $processor = parser->new();
$processor -> process( $includefile, "$includefile.processed" );
}
$self->warning("Huh raisin, no!") if /\braisin/;
say STDERR "Fill is still $infile";
print $fp_out $line;
}
}
package main;
my $processor = parser->new()->process(#ARGV);

Reading STDOUT and STDERR of external command with no wait

I would like to execute external command rtmpdump and read it's STDOUT and STDERR separately, but not to wait till such command ends, but read its partial outputs in bulks, when available...
What is a safe way to do it in Perl?
This is a code I have that works "per-line" basis:
#!/usr/bin/perl
use warnings;
use strict;
use Symbol;
use IPC::Open3;
use IO::Select;
sub execute {
my($cmd) = #_;
print "[COMMAND]: $cmd\n";
my $pid = open3(my $in, my $out, my $err = gensym(), $cmd);
print "[PID]: $pid\n";
my $sel = new IO::Select;
$sel->add($out, $err);
while(my #fhs = $sel->can_read) {
foreach my $fh (#fhs) {
my $line = <$fh>;
unless(defined $line) {
$sel->remove($fh);
next;
}
if($fh == $out) {
print "[OUTPUT]: $line";
} elsif($fh == $err) {
print "[ERROR] : $line";
} else {
die "[ERROR]: This should never execute!";
}
}
}
waitpid($pid, 0);
}
But the above code works in text mode only, I believe. To use rtmpdump as a command, I need to collect partial outputs in binary mode, so do not read STDOUT line-by-line as it is in the above code.
Binary output of STDOUT should be stored in variable, not printed.
Using blocking functions (e.g. readline aka <>, read, etc) inside a select loop defies the use of select.
$sel->add($out, $err);
my %bufs;
while ($sel->count) {
for my $fh ($sel->can_read) {
my $rv = sysread($fh, $bufs{$fh}, 128*1024, length($bufs{$fh}));
if (!defined($rv)) {
# Error
die $! ;
}
if (!$rv) {
# Eof
$sel->remove($fh);
next;
}
if ($fh == $err) {
while ($bufs{$err} =~ s/^(.*\n)//) {
print "[ERROR] $1";
}
}
}
}
print "[ERROR] $bufs{$err}\n" if length($bufs{$err});
waitpid($pid, 0);
... do something with $bufs{$out} ...
But it would be much simpler to use IPC::Run.
use IPC::Run qw( run );
my ($out_buf, $err_buf);
run [ 'sh', '-c', $cmd ],
'>', \$out_buf,
'2>', sub {
$err_buf .= $_[0];
while ($err_buf =~ s/^(.*\n)//) {
print "[ERROR] $1";
}
};
print "[ERROR] $err_buf\n" if length($err_buf);
... do something with $out_buf ...
If you're on a POSIX system, try using Expect.pm. This is exactly the sort of problem it is designed to solve, and it also simplifies the task of sending keystrokes to the spawned process.

How to I use a class property/variable as a print filehandle in Perl?

I want to do the same thing as
open MYFILE, ">", "data.txt";
print MYFILE "Bob\n";
but instead in class variable like
sub _init_tmp_db
{
my ($self) = #_;
open $$self{tmp_db_fh}, ">", "data.txt";
print $$self{tmp_db_fh} "Bob\n";
}
It gave me this error : 'String found where operator expected near "Bob\n"'
what should I do?
From the print manpage:
If you're storing handles in an array or hash, or in general whenever
you're using any expression more complex than a bareword handle or a
plain, unsubscripted scalar variable to retrieve it, you will have to
use a block returning the filehandle value instead.
You should be using:
print { $$self{tmp_db_fh} } "Bob\n";
This code won't work under use strict. To fix it just use a my variable:
open my $fh, ">", "data.txt" or die $!;
$$self{tmp_db_fh} = $fh;
print { $$self{tmp_db_fh} } "Bob\n";
You should the IO::File module instead.
use IO::File;
my $file = IO::File->new;
$file->open("> data.txt");
print_something($file);
sub print_something {
my ($file) = #_;
$file->print("hello world\n");
}
Or in your example function:
use IO::File;
# ...
sub _init_tmp_db
{
my ($self) = #_;
$self{tmp_db_fh} = IO::File->new;
$self{tmp_db_fh}->open(">", "data.txt");
$self{tmp_db_fh}->print"Bob\n";
}
(note, you can still non -> based calls too, but I wrote the above
using the more traditional ->open() type calls.)
Filehandles can only be scalars.
But $$self{tmp_db_fh} is either an open filehandle (to data.txt) then this would work:
sub _init_tmp_db
{
my ($self) = #_;
my $filehandle = $$self{tmp_db_fh} ;
print $filehandle "Bob\n";
}
or you open the filehandle inside _init_tmp_db
sub _init_tmp_db
{
my ($self) = #_;
open my $filehandle , ">", "data.txt" or die "Cannot open data.txt" ;
print $filehandle "Bob\n";
}
But providing a string in $$self{tmp_db_fh} (like 'FILEHANDLE') won't work.
This is easily solved by creating a variable for a file handle:
sub _init_tmp_db {
my $self = shift;
my $fh;
open $fh, ">", "data.txt"
$self->{temp_db_fh} = $fh;
# Sometime later...
$fh = $self-{temp_db_hf};
print $fh "Bob\n";
}
This is an issue because the way the print syntax is parsed and the early sloppiness of the syntax. The print statement has really two separate formats: Format #1 is that the you're simply passing it stuff to print. Format #2 says that the first item may be a file handle, and the rest is the stuff you want to print to the file handle. If print can't easily determine that the first parameter is a file handle, it fails.
If you look at other languages, they'll use a parameter for passing the file handle, and maybe the stuff to print. Or in object oriented languages, they'll overload >> for the file handle parameter. They'll look something like this:
print "This is my statement", file=file_handle;
or
print "This is my statement" >> file_handle;
You might be able to munge the syntax to get away from using a variable. However, it doesn't make the program more efficient or more readable, and may simply make the program harder to maintain. So, just use a variable for the file handle.
You said class in your title. I assume that you are interested in writing a fully fledge object oriented package to do this. Here's a quick example. Notice in the write subroutine method I retrieve the file handle into a variable and use the variable in the print statement.
#! /usr/bin/env perl
#
use strict;
use warnings;
#######################################################
# MAIN PROGRAM
#
my $file = File->new;
$file->open("OUTPUT") or
die "Can't open 'OUTPUT' for writing\n";
$file->write("This is a test");
#
#######################################################
package File;
use Carp;
sub new {
my $class = shift;
my $self = {};
bless $self, $class;
return $self;
}
sub open {
my $self = shift;
my $file = shift;
my $fh;
if (defined $file) {
$self->{FILE} = $file;
open ($fh, ">", $file) and $self->_fh($fh);
}
return $self->_fh;
}
sub _fh {
my $self = shift;
my $fh = shift;
if (defined $fh) {
$self->{FH} = $fh;
}
return $self->{FH};
}
sub write {
my $self = shift;
my $note = shift;
my $fh = $self->_fh;
print $fh $note . "\n";
return
}

How can I still get automatic assignment to '$_' with a mocked 'readline' function?

Perl has some special handling for the readline function (and the equivalent <> I/O operator) where it treats the expressions
while (<HANDLE>)
while (readline(HANDLE))
as equivalent to
while (defined($_ = <HANDLE>))
cf.
$ perl -MO=Deparse -e 'f($_) while <>'
f($_) while defined($_ = <ARGV>); <--- implicitly sets $_
-e syntax OK
But this automatic assignment doesn't seem to happen if you hijack the readline function:
$ perl -MO=Deparse -e 'BEGIN {
> *CORE::GLOBAL::readline = sub { }
> }
> f($_) while <>'
sub BEGIN {
*CORE::GLOBAL::readline = sub {
};
}
f($_) while readline(ARGV); <--- doesn't set $_ !
-e syntax OK
Of course, this will make the custom readline function work incorrectly for a lot of legacy code. The output of this code is "foo" with the BEGIN block and "bar" without it, but I want it to be "BAR".
use warnings;
BEGIN { *CORE::GLOBAL::readline = \&uc_readline; }
sub uc_readline {
my $line = CORE::readline(shift || *ARGV);
return uc $line if defined $line;
return;
}
($_, $bar) = ("foo\n", "bar\n");
open X, '<', \$bar;
while (<X>) {
print $_; # want and expect to see "BAR\n"
}
What options do I have to hijack the readline function but still get the proper treatment of the while (<...>) idiom? It's not practical to explicitly convert everything to while (defined($_=<...>)) in all the legacy code.
This is a fairly dirty hack using overloading to detect boolean context, but it seems to do the trick. It certainly needs more testing than I have given it before using this solution in a production environment:
use warnings;
BEGIN { *CORE::GLOBAL::readline = \&uc_readline; }
sub uc_readline {
my $line = CORE::readline(shift || *ARGV);
return Readline->new(uc $line) if defined $line;
return;
}
{package Readline;
sub new {shift; bless [#_]}
use overload fallback => 1,
'bool' => sub {defined($_ = $_[0][0])}, # set $_ in bool context
'""' => sub {$_[0][0]},
'+0' => sub {$_[0][0]};
}
my $bar;
($_, $bar) = ("foo\n", "bar\n");
open X, '<', \$bar;
while (<X>) {
print $_; # want and expect to see "BAR\n"
}
which prints:
BAR
This will also make if (<X>) {...} set $_. I don't know if there is a way to limit the magic to only while loops.
This code:
use warnings;
BEGIN { *CORE::GLOBAL::readline = \&uc_readline; }
sub uc_readline {
my $line = CORE::readline(shift || *ARGV);
return unless defined $line;
$line = uc $line;
$_ = $line;
return $line;
}
($_, $bar) = ("foo\n", "bar\n");
open X, '<', \$bar;
while (<X>) {
print $_; # want and expect to see "BAR\n"
}
print "$_"; # prints "BAR" instad of "foo"
does almost the right thing, but $_ is not localised, so after the loop, $_ is set to the last value read from the filehandle. Adding Scope::Upper to the mix fixes that:
use warnings;
use Scope::Upper qw/localize SCOPE/;
BEGIN { *CORE::GLOBAL::readline = \&uc_readline; }
sub uc_readline {
my $line = CORE::readline(shift || *ARGV);
return unless defined $line;
$line = uc $line;
local $_ = $line;
# localize $_ in the scope of the while
localize *main::_, \$line, SCOPE(1);
return $line;
}
($_, $bar) = ("foo\n", "bar\n");
open X, '<', \$bar;
while (<X>) {
print "$_"; # want and expect to see "BAR\n"
}
print "$_"; # will print 'foo', not "BAR"