Recursively remove adjacent duplicate characters - perl

I am having a string say
$str = "hhiiishs aappllee eerrffdd"
I want to remove adjacent duplicate characters recursively from a string. I dont know how to write recursion. I have written a code that is not recursive but working if we pass string by string
use strict;
use warnings;
my $str = "AABBCCDEEFDDS asdwdwws ffoorr";
sub remove {
my $var1 = "";
my $str = $_[0];
my #arr = split (//, $str);
my $f = "";
foreach (0..$#arr) {
if ( $arr[$_] eq $var1) {
next;
#substr ( $str, $_)
} else {
$var1 = $arr[$_];
$f = "$f"."$arr[$_]";
}
}
$f = "$f"." ";
return $f;
}
Please guide me how to write recursive in Perl.

You can try,
$str =~ s/(.)\1+/$1/g;
gives
hishs aple erfd

Using recursion probably isn't the best choice for this, but here is a recursive function below.
#!/usr/bin/perl
use strict;
use warnings;
my $foo = "aabbccddeeffgg hhiijjkkllmmnnoo pp";
print reDup($foo), "\n";
sub reDup {
my #string = split ('', shift); #split string into array of characters
my $val;
for my $i( 0..$#string){
if(defined($val) && $string[$i] eq $val){
#string[$i..$#string] = #string[($i+1)..$#string]; #if last char checked = current char, shift the array to the left.
pop #string; #Above leaves unwanted element at the end, so pop it off
my $str = join('', #string);
return reDup($str); #do it all again
}
$val = $string[$i];
}
return join('', #string); #when the for loops if statement is never executed, it must contain no duplicates.
}

sub _remove_adjacent {
my $out = shift;
if (#_ == 0) {
return $out;
}
elsif (#_ == 1) {
return $out.$_[0];
}
elsif ($_[0] eq $_[1]) {
shift;
return _remove_adjacent($out.shift(#_), #_);
} else {
return _remove_adjacent($out.shift(#_), #_);
}
}
sub remove_adjacent {
my ($in) = #_;
return _remove_adjacent('', split(//, $in));
}
Of course, that's purely tail-recursive, so it can be inlined into a loop.
sub remove_adjacent {
my ($in) = #_;
my #in = split(//, $in);
my $out = '';
while (1) {
if (#in == 0) {
last;
}
elsif (#in == 1) {
$out .= $in[0];
last;
}
elsif ($in[0] eq $in[1]) {
shift(#in);
$out .= shift(#in);
} else {
$out .= shift(#in);
}
}
return $out;
}
This can be cleaned up further, but it shows that recursion would be a pure waste here.

Related

Creating a hash using a function

I am trying to create a hash using a function in perl. Actually i was working on creating a binary search tree in perl. Below is the code :
sub newhash {
$data = shift;
$left = undef;
$right = undef;
%node = ("data"=>$data,"left"=>$left,"right"=>$right);
return (\%node);
}
$firstele = newhash(2);
foreach ( keys %$firstele )
{
print "$_:$firstele->{$_}\n";
}
$node = newhash(1);
foreach ( keys %$node )
{
print "$_:$node->{$_} \n";
}
foreach ( keys %$firstele )
{
print "$_:$firstele->{$_}\n";
}
The trouble is that when i am printing the original hash, the data key gets replaced by whatever i am passing to the newhash function . The output:
left:
right:
data:2
left:
right:
data:1
left:
right:
data:1
Any ideas why is the data key getting replaced?
use strict; would tell you about a bunch of undeclared variables; lexicalize them with my and it should solve your problem. As it stands, there's only one %node and you overwrite it with every call to newhash.
use strict;
sub newhash {
my $data = shift;
my $left;
my $right;
my %node = ( # <-- a brand new %node every time
data => $data,
left => $left,
right => $right,
);
return (\%node); # new %node, new reference
}
my $firstele = newhash(2);
print "firstele data: $firstele->{data}\n";
my $node = newhash(1);
print "node data: $node->{data}\n";
print "firstele data: $firstele->{data}\n";
Here is the code for adding elements in BT structure.
use strict;
use List::Util qw(first);
my (#input,$data);
print "Enter the data for being in a BST structure: ";
$data=<>;
chomp($data);
my $root=$data;
push(#input,$data);
while($data =~ m/\b-?\d{1,3}\b/){
my $idx=first { $input[$_] == $root } 0..$#input;
if($data<$root) {
for(my $i=0;$i<=$idx;$i++) {
next if($data>$input[$i]) ;
if($data<$input[$i]) {
splice(#input,$i,0,$data);
}
last;
}
}
if($data>$root) {
for(my $i=$idx;$i<=$#input;$i++) {
if($data>$input[$i]) {
if(($i+1==scalar(#input)) or ($data<$input[$i+1] && $i+1 !=
scalar(#input))) {
splice(#input,$i+1,0,$data);
last;
}
else {
next;
}
}
last;
}
}
print "Enter the number for being in a BT structure: ";
$data=<>;
chomp($data);
}
print "Final BT Array:\n",join(',', #input),"\n";

Perl subroutine doesn't return value

Sorry to disturb you with such a silly question, I'm new at Perl.
I'm trying to modify parsing subroutine, written by my colleague and have problems with functions in perl.
It returns empty value, I don't understand why? Have already read reference sites, seen examples and they are obvious. Here's the code of function:
sub parseHTML
{
my ($node, $depth) = #_;
my $str = ' ';
if (ref $node)
{
if ($node->tag () ne "script" && $node->tag () ne "style")
{
my #children = $node->content_list ();
for my $child_node (#children)
{
parseHTML ($child_node, $depth + 1);
}
}
}
else
{
$str = $str.$node."\n";
#print $str;
}
return $str;
}
And then I try to use it:
my $parser = HTML::TreeBuilder->new ();
$parser->parse ($cont);
my $Parsed = parseHTML ($parser, 0);
print "$Parsed\n";
#parseHTML ($parser, 0);
The return value is empty. However, if I decide to print data right in function, uncomment string:print $str; and use parseHTML ($parser, 0); instead, it works, and there's an output.
Where could be the mistake? Data in function seems to be local.
Here's the complete code listing as well.
You have to concat the $str returning from parseHTML
$str .= parseHTML ($child_node, $depth + 1);
or you can use a pointer this way:
...
my $Parsed;
parseHTML ($parser, 0,\$Parsed);
....
sub parseHTML
{
my ($node, $depth, $out) = #_;
my $str = ' ';
if (ref $node)
{
if ($node->tag() ne "script" && $node->tag() ne "style")
{
my #children = $node->content_list ();
for my $child_node (#children)
{
parseHTML ($child_node, $depth + 1,$out);
}
}
}
else
{
$$out .= $node."\n";
}
}
You forgot to add to $str in the "then" part of the if.
parseHTML ($child_node, $depth + 1);
should be
$str .= parseHTML ($child_node, $depth + 1);

How can I enter and return a single letter properly from a sub in Perl?

I am attempting to write a code that will encrypt letters with a basic cyclic shift cipher while leaving any character that is not a letter alone. I am trying to do this through the use of a sub that finds the new value for each of the letters. When I run the code now,it formats the result so there is a single space between every encrypted letter instead of keeping the original formatting. I also cannot get the result to be only in lowercase letters.
sub encrypter {
my $letter = shift #_;
if ($letter =~ m/^[a-zA-Z]/) {
$letter =~ y/N-ZA-Mn-za-m/A-Za-z/;
return $letter;
}
else {
return lc($letter);
}
}
print "Input string to be encrypted: ";
my $input = <STDIN>;
chomp $input;
print "$input # USER INPUT\n";
my #inputArray = split (//, $input);
my $i = 0;
my #encryptedArray;
for ($i = 0; $i <= $#inputArray; $i++) {
$encryptedArray[$i] = encrypter($inputArray[$i]);
}
print "#encryptedArray # OUTPUT\n";
You might try changing this line:
if ($letter = m/[^a-zA-Z]/ ) {
To something more like this:
if ($letter =~ m/^[a-zA-Z]/) {
In the original line you are doing an assignment to the variable $letter, and the ^ will need to be before the [a-zA-Z] for the comparison.
You're attempting to do a rot13 translation on your characters. This can be done a little easier using tr:
use strict;
use warnings;
sub rot13 {
my $string = shift;
$string =~ tr/a-zA-Z/n-zA-Za-m/;
return $string;
}
print "Input string to be encrypted: ";
chomp(my $input = <STDIN>);
print "$input # USER INPUT\n";
print "Cycle of 4:\n";
for (1..4) {
$input = rot13($input);
print " $input\n";
}
Outputs
Input string to be encrypted: asdf
asdf # USER INPUT
Cycle of 4:
nFqs
ASDF
NfQS
asdf
Here is a some kind of more general implementation of it, it is easier to adapt it to something like, for example, using different rotation places for different letter:
#!/usr/bin/perl
use strict;
use warnings;
use feature qw(switch say);
sub rotateBy {
my ($letter, $rotate_places) = #_;
$rotate_places = $rotate_places ? $rotate_places : 13;
my $width = (ord 'z') - (ord 'a') + 1;
sub rotate {
my ($let, $base, $places, $width) = #_;
my $i = (ord $let) - (ord $base);
return chr((ord $base) + ($i + $places) % $width);
}
given ($letter) {
when (m/[a-z]/) {
return rotate ($letter, 'a', $rotate_places, $width);
}
when (m/A-Z/) {
return rotate ($letter, 'A', $rotate_places, $width);
}
default {
return $letter;
}
}
}
while (<>) {
chomp;
print "PLAINTEXT : $_\n";
print "CIPHERTEXT: ";
foreach my $let (split //) {
print rotateBy($let);
}
print "\n";
}
By the way, the above code looks too verbose to me, maybe there is a better way to do it.

How can I replace a specific word when it occurs only once in a given subset of data?

Consider the dataset below. Each chunk begining with a number is a 'case'. In the real dataset I have hundreds of thousands of cases. I'd like to replace the word "Exclusion" with "0" when there's only one word Exclusion in a case (e.g. case 10001).
If I loop through lines, I can count how many "Exclusions" I have in each case. But, if there's only one line with the word "Exclusion", I don't know how to get back to that line and replace the word.
How can I do that?
10001
M1|F1|SP1;12;12;12;11;13;10;Exclusion;D16S539
M1|F1|SP1;12;10;12;9;11;9;3.60;D16S
M1|F1|SP1;12;10;10;7;11;7;20.00;D7S
M1|F1|SP1;13;12;12;12;12;12;3.91;D13S
M1|F1|SP1;11;11;13;11;13;11;3.27;D5S
M1|F1|SP1;14;12;14;10;12;10;1.99;CSF
10002
M1|F1|SP1;8;13;13;8;8;12;2.91;D16S
M1|F1|SP1;13;11;13;10;10;10;4.13;D7S
M1|F1|SP1;12;9;12;10;11;16;Exclusion;D13S
M1|F1|SP1;12;10;12;10;14;15;Exclusion;D5S
M1|F1|SP1;13;10;10;10;17;18;Exclusion;CSF
sub process_block {
my ($block) = #_;
$block =~ s/\bExclusion\b/0/
if $block !~ /\bExclusion\b.*\bExclusion\b/s;
print($block);
}
my $buf;
while (<>) {
if (/^\d/) {
process_block($buf) if $buf;
$buf = '';
}
$buf .= $_;
}
process_block($buf) if $buf;
As you read the file, buffer up all lines in a case, and count exclusions,
my ($case,$buf,$count) = (undef,"",0);
while(my $ln = <>) {
Use a regex to detect a case,
if( $ln =~ /^\d+$/ ) {
#new case, process/print old case
$buf =~ s/;Exclusion;/;0;/ if($count==1);
print $buf;
($case,$buf,$count) = ($ln,"",0);
}
use a regex to detect 'Exclusion' now?
elsif( $ln =~ /;Exclusion;/ ) { $count++; }
$buf .= $l;
}
And when you are done, you may have a case left to process,
if( length($buf)>0 ) {
$buf =~ s/;Exclusion;/;0;/ if($count==1);
print $buffer;
}
This is the best I could think of. Assume you read your file into #lines
# separate into blocks
foreach my $line (#lines) {
chomp($line);
if ($line =~ m/^(\d+)/) {
$key = $1;
}
else {
push (#{$block{$key}}, $line);
}
}
# go through each block
foreach my $key (keys %block) {
print "$key\n";
my #matched = grep ($_ =~ m/exclusion/i, #{$block{$key}});
if (scalar (1 == #matched)){
foreach my $line (#{$block{$key}}) {
$line =~ s/Exclusion/0/i;
print "$line\n";
}
}
else {
foreach my $line (#{$block{$key}}) {
print "$line\n";
}
}
}
There're already many correct answers here, which use buffers to store the content of a "case".
Here's another solution using tell and seek to rewind the file, so buffers are not necessary. This could be useful when your "case" is very large and you're sensitive to the performance or memory usage.
use strict;
use warnings;
open FILE, "text.txt";
open REPLACE, ">replace.txt";
my $count = 0; # count of 'Exclusion' in the current case
my $position = 0;
my $prev_position = 0;
my $first_occur_position = 0; # first occurence of 'Exclusion' in the current case
my $visited = 0; # whether the current line is visited before
while (<FILE>) {
# keep track of the position before reading
# the current line
$prev_position = $position;
$position = tell FILE;
if ($visited == 0) {
if (/^\d+/) {
# new case
if ($count == 1) {
# rewind to the first occurence
# of 'Exclusion' in the previous case
seek FILE, $first_occur_position, 0;
$visited = 1;
}
else {
print REPLACE $_;
}
}
elsif (/Exclusion/) {
$count++;
if ($count > 1) {
seek FILE, $first_occur_position, 0;
$visited = 1;
}
elsif ($count == 1) {
$first_occur_position = $prev_position;
}
}
else {
print REPLACE $_ if ($count == 0);
}
if (eof FILE && $count == 1) {
seek FILE, $first_occur_position, 0;
$visited = 1;
}
}
else {
if ($count == 1) {
s/Exclusion/0/;
}
if (/^\d+/) {
$position = tell FILE;
$visited = 0;
$count = 0;
}
print REPLACE $_;
}
}
close REPLACE;
close FILE;

Find unused "use'd" Perl modules

I am working on a very large, very old "historically grown" codebase. In the past, there were often people thinking "Oh, I may need this and that module, so I just include it...", and later, people often "cached" Data inside of modules ("use ThisAndThat" needing a few seconds to load some hundred MB from DB to RAM, yeah, its really a stupid Idea, we are working on that too) and so, often, we have a small module use'ing like 20 or 30 modules, from who 90% are totally unused in the source itself, and, because of "caching" in several use'd submodules, modules tend to take up one minute to load or even more, which is, of course, not acceptable.
So, Im trying to get that done better. Right now, my way is looking through all the modules, understanding them as much as possible and I look at all the modules including them and see whether they are needed or not.
Is there any easier way? I mean: There are functions returning all subs a module has like
...
return grep { defined &{"$module\::$_"} } keys %{"$module\::"}
, so, aint there any simple way to see which ones are exported by default and which ones come from where and are used in the other modules?
A simple example is Data::Dumper, which is included in nearly every file, even, when all debug-warns and prints and so on arent in the script anymore. But still the module has to load Data::Dumper.
Is there any simple way to check that?
Thanks!
The following code could be part of your solution - it will show you which symbols are imported for each instance of use:
package traceuse;
use strict;
use warnings;
use Devel::Symdump;
sub import {
my $class = shift;
my $module = shift;
my $caller = caller();
my $before = Devel::Symdump->new($caller);
my $args = \#_;
# more robust way of emulating use?
eval "package $caller; require $module; $module\->import(\#\$args)";
my $after = Devel::Symdump->new($caller);
my #added;
my #after_subs = $after->functions;
my %before_subs = map { ($_,1) } $before->functions;
for my $k (#after_subs) {
push(#added, $k) unless $before_subs{$k};
}
if (#added) {
warn "using module $module added: ".join(' ', #added)."\n";
} else {
warn "no new symbols from using module $module\n";
}
}
1;
Then just replace "use module ..." with "use traceuse module ...", and you'll get a list of the functions that were imported.
Usage example:
package main;
sub foo { print "debug: foo called with: ".Dumper(\#_)."\n"; }
use traceuse Data::Dumper;
This will output:
using module Data::Dumper added: main::Dumper
i.e. you can tell which functions were imported in robust way. And you can easily extend this to report on imported scalar, array and hash variables - check the docs on Devel::Symdump.
Determine which functions are actually used is the other half of the equation. For that you might be able to get away with a simple grep of your source code - i.e. does Dumper appear in the module's source code that's not on a use line. It depends on what you know about your source code.
Notes:
there may be a module which does what traceuse does - I haven't checked
there might be a better way to emulate "use" from another package
I kind of got of got it to work with PPI. It looks like this:
#!/usr/local/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Term::ANSIColor;
use PPI;
use PPI::Dumper;
my %doneAlready = ();
$" = ", ";
our $maxDepth = 2;
my $showStuffOtherThanUsedOrNot = 0;
parse("/modules/Test.pm", undef, undef, 0);
sub parse {
my $file = shift;
my $indent = shift || 0;
my $caller = shift || $file;
my $depth = shift || 0;
if($depth && $depth >= $maxDepth) {
return;
}
return unless -e $file;
if(exists($doneAlready{$file}) == 1) {
return;
}
$doneAlready{$file} = 1;
my $skript = PPI::Document->new($file);
my #included = ();
eval {
foreach my $x (#{$skript->find("PPI::Statement::Include")}) {
foreach my $y (#{$x->{children}}) {
push #included, $y->{content} if (ref $y eq "PPI::Token::Word" && $y->{content} !~ /^(use|vars|constant|strict|warnings|base|Carp|no)$/);
}
}
};
my %double = ();
print "===== $file".($file ne $caller ? " (Aufgerufen von $caller)" : "")."\n" if $showStuffOtherThanUsedOrNot;
if($showStuffOtherThanUsedOrNot) {
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $is_crap = ((exists($double{$modul})) ? 1 : 0);
print "\t" x $indent;
print color("blink red") if($is_crap);
print $modul;
print color("reset") if($is_crap);
print "\n";
$double{$modul} = 1;
}
}
foreach my $modul (#included) {
next unless -e createFileName($modul);
my $anyUsed = 0;
my $modulDoc = parse(createFileName($modul), $indent + 1, $file, $depth + 1);
if($modulDoc) {
my #exported = getExported($modulDoc);
print "Exported: \n" if(scalar #exported && $showStuffOtherThanUsedOrNot);
foreach (#exported) {
print(("\t" x $indent)."\t");
if(callerUsesIt($_, $file)) {
$anyUsed = 1;
print color("green"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
} else {
print color("red"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot;
}
print "\n" if $showStuffOtherThanUsedOrNot;
}
print(("\t" x $indent)."\t") if $showStuffOtherThanUsedOrNot;
print "Subs: " if $showStuffOtherThanUsedOrNot;
foreach my $s (findAllSubs($modulDoc)) {
my $isExported = grep($s eq $_, #exported) ? 1 : 0;
my $rot = callerUsesIt($s, $caller, $modul, $isExported) ? 0 : 1;
$anyUsed = 1 unless $rot;
if($showStuffOtherThanUsedOrNot) {
print color("red") if $rot;
print color("green") if !$rot;
print "$s, ";
print color("reset");
}
}
print "\n" if $showStuffOtherThanUsedOrNot;
print color("red"), "=========== $modul wahrscheinlich nicht in Benutzung!!!\n", color("reset") unless $anyUsed;
print color("green"), "=========== $modul in Benutzung!!!\n", color("reset") if $anyUsed;
}
}
return $skript;
}
sub createFileName {
my $file = shift;
$file =~ s#::#/#g;
$file .= ".pm";
$file = "/modules/$file";
return $file;
}
sub getExported {
my $doc = shift;
my #exported = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement")}) {
my $worthATry = 0;
my $isMatch = 0;
foreach my $y (#{$x->{children}}) {
$worthATry = 1 if(ref $y eq "PPI::Token::Symbol");
if($y eq '#EXPORT') {
$isMatch = 1;
} elsif($isMatch && ref($y) ne "PPI::Token::Whitespace" && ref($y) ne "PPI::Token::Operator" && $y->{content} ne ";") {
push #exported, $y->{content};
}
}
}
};
my #realExported = ();
foreach (#exported) {
eval "\#realExported = $_";
}
return #realExported;
}
sub callerUsesIt {
my $subname = shift;
my $caller = shift;
my $namespace = shift || undef;
my $isExported = shift || 0;
$caller = `cat $caller`;
unless($namespace) {
return 1 if($caller =~ /\b$subname\b/);
} else {
$namespace = createPackageName($namespace);
my $regex = qr#$namespace(?:::|->)$subname#;
if($caller =~ $regex) {
return 1;
}
}
return 0;
}
sub findAllSubs {
my $doc = shift;
my #subs = ();
eval {
foreach my $x (#{$doc->find("PPI::Statement::Sub")}) {
my $foundName = 0;
foreach my $y (#{$x->{children}}) {
no warnings;
if($y->{content} ne "sub" && ref($y) eq "PPI::Token::Word") {
push #subs, $y;
}
use warnings;
}
}
};
return #subs;
}
sub createPackageName {
my $name = shift;
$name =~ s#/modules/##g;
$name =~ s/\.pm$//g;
$name =~ s/\//::/g;
return $name;
}
Its really ugly and maybe not 100% working, but it seems, with the tests that Ive done now, that its good for a beginning.