Perl Error - Global symbol requires explicit package name - perl
I am getting some errors while trying to run a Perl file which is I need to run. (I usually use Python but I do not know any other programming languages including Perl.) I will show errors firstly, then I will attach the whole code in the end.
The error:
$ perl C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl
Global symbol "$fba" requires explicit package name (did you forget to declare "my $fba"?) at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 171.
Global symbol "$fba" requires explicit package name (did you forget to declare "my $fba"?) at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 172.
Global symbol "$fba" requires explicit package name (did you forget to declare "my $fba"?) at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 173.
Global symbol "$fba" requires explicit package name (did you forget to declare "my $fba"?) at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 174.
Global symbol "$map" requires explicit package name (did you forget to declare "my $map"?) at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 202.
Global symbol "$pmap" requires explicit package name (did you forget to declare "my $pmap"?) at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 233.
Execution of C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl aborted due to compilation errors.
When I put "my" at before global symbols which are mentioned in the error, this time I am getting another error below:
$ perl C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl
"my" variable $fba masks earlier declaration in same statement at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 172.
"my" variable $fba masks earlier declaration in same statement at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 173.
"my" variable $fba masks earlier declaration in same statement at C:/databases/ModelSEEDDatabase/Scripts/Archived_Perl_Scripts/DumpSOLRTables_Master.pl line 174.
So, I do not know what should I do now actually. This Perl script will dump/extract two necessary files which are "ComplexRoles.tsv" and "TemplateReactions.tsv".
I am using Python 3.7 and Cygwin64. ModelSEEDDatabase Github link:
https://github.com/ModelSEED/ModelSEEDDatabase
If you can help me, I would be really glad. Thank you in advance.
The whole script is below: (I indicated the problematic lines which are 171, 172, 173, 174, 202, 233.)
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long::Descriptive;
my ($opt, $usage) = describe_options("%c %o <directory>",
[ "compounds=s", "path to master compounds file", { default => "../Biochemistry/compounds.master.tsv" } ],
[ "compartments=s", "path to master compartments file", { default => "../Biochemistry/compartments.master.tsv" } ],
[ "reactions=s", "path to master reactions file", { default => "../Biochemistry/reactions.master.tsv" } ],
[ "aliasdir=s", "path to directory with alias files", { default => "../Aliases/" } ],
[ "pathwaydir=s", "path to directory with pathway files", { default => "../Pathways/" } ],
[ "structuredir=s", "path to directory with structure files", { default => "../Structures/" } ],
[ "master=s", "path to output master biochemistry json file", { default => "../Biochemistry/biochemistry.master.json" } ],
[ "help|h", "print usage message and exit" ]
);
print($usage->text), exit if $opt->help;
my $directory = $ARGV[0];
exit if !$directory || !-d $directory;
$directory.="/" if $directory !~ /\/$/;
my #temp=();
my $header = 1;
#######################################################
#Initialization
#######################################################
#Collect Aliases
opendir(my $AliasDir, $opt->aliasdir);
my #Files = grep { $_ =~ /\.aliases$/ } readdir($AliasDir);
closedir($AliasDir);
my $rxn_alias_hash = {};
my $p_rxn_alias_hash = {};
my $cpd_alias_hash = {};
my $p_cpd_alias_hash = {};
my %alias_cpd_hash = ();
my %alias_rxn_hash = ();
foreach my $file (sort #Files){
$file =~ /^(\w+)\.aliases/;
my $aliasSet = $1;
$aliasSet = join(" ", split(/_/,$aliasSet)) if $aliasSet eq "Enzyme_Class";
open(FH, "< ".$opt->aliasdir.$file);
$header = 1;
while(<FH>){
chomp;
if($header){$header--;next}
#temp=split(/\t/,$_,-1);
if($temp[1] =~ /^cpd/ || $temp[2] =~ /^cpd/){
foreach my $cpd (split(/\|/,$temp[1])){
$cpd_alias_hash->{$aliasSet}->{$temp[0]}->{$cpd}=1;
$alias_cpd_hash{$cpd}{$aliasSet}{$temp[0]}=1;
}
foreach my $cpd (split(/\|/,$temp[2])){
$p_cpd_alias_hash->{$aliasSet}->{$temp[0]}->{$cpd}=1;
#Need to revise the decision to forgo aliases found in more recent database
if(!exists($alias_cpd_hash{$cpd}) && !exists($alias_cpd_hash{$cpd}{$aliasSet})){
$alias_cpd_hash{$cpd}{$aliasSet}{$temp[0]}=1;
}
}
}
if($temp[1] =~ /^rxn/ || $temp[2] =~ /^rxn/){
foreach my $rxn (split(/\|/,$temp[1])){
$rxn_alias_hash->{$aliasSet}->{$temp[0]}->{$rxn}=1;
$alias_rxn_hash{$rxn}{$aliasSet}{$temp[0]}=1;
}
foreach my $rxn (split(/\|/,$temp[2])){
$p_rxn_alias_hash->{$aliasSet}->{$temp[0]}->{$rxn}=1;
#Need to revise the decision to forgo aliases found in more recent database
if(!exists($alias_rxn_hash{$rxn}) && !exists($alias_rxn_hash{$rxn}{$aliasSet})){
$alias_rxn_hash{$rxn}{$aliasSet}{$temp[0]}=1;
}
}
}
}
close(FH);
}
my $rxn_pathways = {};
open(my $fh, "< ".$opt->pathwaydir."HopeScenarios.txt");
while (my $line = <$fh>) {
chomp($line);
my $array = [split(/\t/,$line)];
my $patharray = [split(/:/,$array->[0])];
pop(#{$patharray});
pop(#{$patharray});
if (defined($rxn_alias_hash->{KEGG}->{$array->[1]})) {
foreach my $rxn (keys(%{$rxn_alias_hash->{KEGG}->{$array->[1]}})) {
$rxn_pathways->{$rxn}->{KEGG}->{$patharray->[0]} = 1;
$rxn_pathways->{$rxn}->{Scenario}->{join("/",#{$patharray})} = 1;
}
} elsif (defined($p_rxn_alias_hash->{KEGG}->{$array->[1]})) {
foreach my $rxn (keys(%{$p_rxn_alias_hash->{KEGG}->{$array->[1]}})) {
$rxn_pathways->{$rxn}->{KEGG}->{$patharray->[0]} = 1;
$rxn_pathways->{$rxn}->{Scenario}->{join("/",#{$patharray})} = 1;
}
}
}
close($fh);
open($fh, "< ".$opt->pathwaydir."plantdefault.pathways.tsv");
my #headers = split(/\t/,<$fh>);
shift(#headers);
chomp($headers[$#headers]);
while(<$fh>){
chomp;
#temp=split(/\t/,$_,-1);
my $id = shift (#temp);
for(my $i=0;$i<scalar(#headers);$i++){
next if $temp[$i] eq "null";
foreach my $path (split(/\|/,$temp[$i])){
$rxn_pathways->{$id}{$headers[$i]}{$temp[$i]}=1;
}
}
}
close($fh);
my $cpd_structure = {};
open($fh, "< ".$opt->structuredir."KEGG_Charged_InChI.txt");
while (my $line = <$fh>) {
chomp($line);
my $array = [split(/\t/,$line)];
if (defined($cpd_alias_hash->{KEGG}->{$array->[0]})) {
foreach my $cpdid (keys(%{$cpd_alias_hash->{KEGG}->{$array->[0]}})){
if (!defined($cpd_structure->{$cpdid})) {
$cpd_structure->{$cpdid} = $array->[1];
}
}
}
if (defined($p_cpd_alias_hash->{KEGG}->{$array->[0]})) {
foreach my $cpdid (keys(%{$p_cpd_alias_hash->{KEGG}->{$array->[0]}})){
if (!defined($cpd_structure->{$cpdid})) {
$cpd_structure->{$cpdid} = $array->[1];
}
}
}
}
close($fh);
open($fh, "< ".$opt->structuredir."MetaCyc_Charged_InChI.txt");
while (my $line = <$fh>) {
chomp($line);
my $array = [split(/\t/,$line)];
if (defined($cpd_alias_hash->{MetaCyc}->{$array->[0]})) {
foreach my $cpdid (keys(%{$cpd_alias_hash->{MetaCyc}->{$array->[0]}})){
if (!defined($cpd_structure->{$cpdid})) {
$cpd_structure->{$cpdid} = $array->[1];
}
}
}
if (defined($p_cpd_alias_hash->{MetaCyc}->{$array->[0]})) {
foreach my $cpdid (keys(%{$p_cpd_alias_hash->{MetaCyc}->{$array->[0]}})){
if (!defined($cpd_structure->{$cpdid})) {
$cpd_structure->{$cpdid} = $array->[1];
}
}
}
}
close($fh);
#Retreiving templates
# Need to get these from source file
my $templates = [
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","GramPosModelTemplate"), ###line171
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","GramNegModelTemplate"), ###line172
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","CoreModelTemplate"), ###line173
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","PlantModelTemplate") ###line174
];
#Printing template table
my $templatlist = ["template.0","template.1","template.2","template.3"];
my $templatedata = [
["template.0","gram_positive_template","genome_scale_model","Bacteria","0","1","chenry"],
["template.1","gram_negative_template","genome_scale_model","Bacteria","0","1","chenry"],
["template.2","core_template","core_model","Bacteria","0","1","chenry"],
["template.3","plant_template","genome_scale_model","Plant","0","1","seaver"],
];
#Printing complex roles
open($fh, ">", $directory."ComplexRoles.tsv");
my $columns = [qw(
complex_id
complex_name
complex_source
complex_type
role_id
role_name
role_type
role_source
role_aliases
role_exemplar
type
triggering
optional
)];
print $fh join("\t",#{$columns})."\n";
my $cpxs = $map->complexes(); # Need to get these from source file ###line202
my $idhash;
my $count=0;
for (my $i=0; $i < #{$cpxs}; $i++) {
my $cpx = $cpxs->[$i];
$idhash->{$cpx->id()} = "cpx.".$i;
my $cpxroles = $cpx->complexroles();
for (my $j=0; $j < #{$cpxroles}; $j++) {
my $cpxrole = $cpxroles->[$j];
my $roleid = $cpxrole->role()->id();
$roleid =~ s/ms//;
my $data = [
"cpx.".$i,
"cpx.".$i,
"ModelSEED",
"SEED_role_complex",
$roleid,
$cpxrole->role()->name(),
"SEED_role",
"SEED",
"searchname:".$cpxrole->role()->searchname(),
"null",
"role_mapping",
$cpxrole->triggering(),
$cpxrole->optionalRole(),
];
print $fh join("\t",#{$data})."\n";
}
$count = $i;
}
$cpxs = $pmap->complexes(); # Need to get these from source file ###line233
for (my $i=0; $i < #{$cpxs}; $i++) {
$count++;
my $cpx = $cpxs->[$i];
$idhash->{$cpx->id()} = "cpx.".$count;
my $cpxroles = $cpx->complexroles();
for (my $j=0; $j < #{$cpxroles}; $j++) {
my $cpxrole = $cpxroles->[$j];
my $roleid = $cpxrole->role()->id();
$roleid =~ s/ms//;
my $data = [
"cpx.".$count,
"cpx.".$count,
"ModelSEED",
"SEED_role_complex",
$roleid,
$cpxrole->role()->name(),
"SEED_role",
"SEED",
"searchname:".$cpxrole->role()->searchname(),
"null",
"role_mapping",
$cpxrole->triggering(),
$cpxrole->optionalRole(),
];
print $fh join("\t",#{$data})."\n";
}
}
close($fh);
#Printing compounds
#As it stands, it's a copy of the master compounds file with the aliases integrated
open(FH, "< ".$opt->compounds);
open($fh, ">", $directory."Compounds.tsv");
$header = 1;
undef(#headers);
my %Compounds=();
while(<FH>){
chomp;
if($header){
#headers = split(/\t/,$_,-1);
print $fh $_."\n";
$header--;
next;
}
#temp=split(/\t/,$_,-1);
#map values to keys
#probably not that necessary, but useful if column order changes
my %cpdHash=();
for(my $i=0;$i<scalar(#headers);$i++){
$cpdHash{$headers[$i]}=$temp[$i];
}
my #aliases = ();
foreach my $aliasSet (keys %{$alias_cpd_hash{$cpdHash{id}}}){
foreach my $alias (keys %{$alias_cpd_hash{$cpdHash{id}}{$aliasSet}}){
push(#aliases, "\"".$aliasSet.":".$alias."\"");
}
}
$cpdHash{aliases}= scalar(#aliases)>0 ? join(";",#aliases) : "null";
print $fh join("\t", map { $cpdHash{$_} } #headers),"\n";
$Compounds{$cpdHash{id}}=\%cpdHash;
}
close($fh);
#Printing reactions
#As it stands, it's a copy of the master reactions file with the pathways, aliases, and ec numbers integrated
open(FH, "< ".$opt->reactions);
open($fh, ">", $directory."Reactions.tsv");
$header = 1;
undef(#headers);
my %Reactions=();
while(<FH>){
chomp;
if($header){
#headers = split(/\t/,$_,-1);
print $fh join("\t", grep { $_ ne 'is_obsolete' && $_ ne 'linked_reaction' } #headers),"\n";
$header--;
next;
}
#temp=split(/\t/,$_,-1);
#map values to keys
#probably not that necessary, but useful if column order changes
my %rxnHash=();
for(my $i=0;$i<scalar(#headers);$i++){
$rxnHash{$headers[$i]}=$temp[$i];
}
my #ecnums = ();
my #aliases = ();
foreach my $aliasSet (keys %{$alias_rxn_hash{$rxnHash{id}}}){
foreach my $alias (keys %{$alias_rxn_hash{$rxnHash{id}}{$aliasSet}}){
#Only include full ec numbers (?)
if ($aliasSet eq "Enzyme Class"){
if($alias =~ m/\d+\.\d+\.\d+\.\d+/){
push(#ecnums, $alias);
}
}else{
push(#aliases, "\"".$aliasSet.":".$alias."\"");
}
}
}
$rxnHash{aliases}= scalar(#aliases)>0 ? join(";",#aliases) : "null";
$rxnHash{ec_numbers}= scalar(#ecnums)>0 ? join(";",#ecnums) : "null";
my #pathways = ();
if (defined($rxn_pathways->{$rxnHash{id}})) {
foreach my $type (keys(%{$rxn_pathways->{$rxnHash{id}}})) {
foreach my $path (keys(%{$rxn_pathways->{$rxnHash{id}}{$type}})) {
push(#pathways, $type.":".$path);
}
}
}
$rxnHash{pathways}= scalar(#pathways)>0 ? join(";",#pathways) : "null";
print $fh join("\t", map { $rxnHash{$_} } grep { $_ ne 'is_obsolete' && $_ ne 'linked_reaction' } #headers),"\n";
$Reactions{$rxnHash{id}}=\%rxnHash;
}
close($fh);
#Printing template biomasses reactions
open($fh, ">", $directory."TemplateBiomasses.tsv");
$columns = [qw(
id
name
type
other
dna
rna
protein
lipid
cellwall
cofactor
energy
template_id
template_name
template_modeltype
template_domain
template_version
template_is_current
template_owner
compartment_ids
compound_ids
compound_data
)];
print $fh join("\t",#{$columns})."\n";
for (my $i=0; $i < #{$templates}; $i++) {
my $biomasses = $templates->[$i]->templateBiomasses();
for (my $j=0; $j < #{$biomasses}; $j++) {
my $compounds = {};
my $comps = {};
my $bio = $biomasses->[$j];
my $biocpds = $bio->templateBiomassComponents();
my #compounddata = ();
for (my $k=0; $k < #{$biocpds}; $k++) {
my $biocpd = $biocpds->[$k];
my $biocpd_id = $biocpd->compound()->id();
my #links = ();
my $linkrefs = $biocpd->linked_compounds();
for (my $m=0; $m < #{$linkrefs}; $m++) {
push(#links, $linkrefs->[$m]->id()."{".$biocpd->link_coefficients()->[$m]."}");
}
$compounds->{$biocpd_id} = 1;
$comps->{$biocpd->compartment()->id()} = 1;
push(#compounddata, $biocpd_id.":\"".$Compounds{$biocpd_id}{name}."\":".$biocpd->coefficient().":".$biocpd->coefficientType().":".$biocpd->class().":".join("|",#links));
}
my $data = [
$templatedata->[$i]->[0].".".$bio->id(),
$bio->name(),
"growth",
$bio->other(),
$bio->dna(),
$bio->rna(),
$bio->protein(),
$bio->lipid(),
$bio->cellwall(),
$bio->cofactor(),
$bio->energy(),
$templatedata->[$i]->[0],
$templatedata->[$i]->[1],
$templatedata->[$i]->[2],
$templatedata->[$i]->[3],
$templatedata->[$i]->[4],
$templatedata->[$i]->[5],
$templatedata->[$i]->[6],
"0:".join(";0:",keys(%{$comps})),
join(";",keys(%{$compounds})),
join(";",#compounddata)
];
print $fh join("\t",#{$data})."\n";
}
}
close($fh);
#Printing template reactions
open($fh, ">", $directory."TemplateReactions.tsv");
$columns = [qw(
id
reaction_id
abbreviation
name
code
stoichiometry
is_transport
equation
definition
model_direction
gapfill_direction
type
base_cost
forward_penalty
reverse_penalty
pathways
aliases
ec_numbers
deltag
deltagerr
template_id
template_name
template_modeltype
template_domain
template_version
template_is_current
template_owner
compartment_ids
complex_ids
compound_ids
)];
print $fh join("\t",#{$columns})."\n";
for (my $i=0; $i < #{$templates}; $i++) {
my $rxns = $templates->[$i]->templateReactions();
for (my $j=0; $j < #{$rxns}; $j++) {
my $rxn = $rxns->[$j];
my $complexes = {};
my $cpxs = $rxn->complexs();
for (my $j=0; $j < #{$cpxs}; $j++) {
$complexes->{$idhash->{$cpxs->[$j]->id()}} = 1;
}
my $compounds = {};
my $comps = {};
# my $rgts = [split(/;/,$Reactions{$rxn->reaction()->id()}{stoichiometry})];
my $rgts = $rxn->reaction()->reagents();
for (my $j=0; $j < #{$rgts}; $j++) {
# my ($coef,$cpd,$cmpt) = split(/:/,$rgts->[$j]);
my ($cpd,$cmpt) = ($rgts->[$j]->compound()->id(),$rgts->[$j]->compartment()->id());
$compounds->{$cpd}=1;
$comps->{$cmpt}=1;
}
my $rxn_id = $rxn->reaction()->id();
my $compid = "c0";
my $data = [
$templatedata->[$i]->[0].".".$rxn_id."_".$compid,
$rxn_id,
$Reactions{$rxn_id}{abbreviation}."_".$compid,
$Reactions{$rxn_id}{name}."_".$compid,
# $Reactions{$rxn_id}{code},
# $Reactions{$rxn_id}{stoichiometry},
$rxn->reaction()->code(),
$rxn->reaction()->stoichiometry(),
$Reactions{$rxn_id}{is_transport},
# $Reactions{$rxn_id}{equation},
# $Reactions{$rxn_id}{definition},
$rxn->reaction()->equation(),
$rxn->reaction()->definition(),
$rxn->direction(),
defined($rxn->GapfillDirection()) ? $rxn->GapfillDirection() : "=",
"null",
defined($rxn->base_cost()) ? $rxn->base_cost() : 0,
defined($rxn->forward_penalty()) ? $rxn->base_cost() : 0,
defined($rxn->reverse_penalty()) ? $rxn->base_cost() : 0,
$Reactions{$rxn_id}{pathways},
$Reactions{$rxn_id}{aliases},
$Reactions{$rxn_id}{ec_numbers},
$Reactions{$rxn_id}{deltag},
$Reactions{$rxn_id}{deltagerr},
$templatedata->[$i]->[0],
$templatedata->[$i]->[1],
$templatedata->[$i]->[2],
$templatedata->[$i]->[3],
$templatedata->[$i]->[4],
$templatedata->[$i]->[5],
$templatedata->[$i]->[6],
"0:".join(";0:",keys(%{$comps})),
join(";",keys(%{$complexes})),
join(";",keys(%{$compounds}))
];
print $fh join("\t",#{$data})."\n";
}
}
close($fh);
my $templates = [
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","GramPosModelTemplate"), ###line171
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","GramNegModelTemplate"), ###line172
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","CoreModelTemplate"), ###line173
$fba->_get_msobject("ModelTemplate","KBaseTemplateModels","PlantModelTemplate") ###line174
];
The code is calling a method on the object in $fba without first declaring it it or even assigning an object to it. (In Python terms, the code does fba._get_msobject(...) without first doing fba = ....)
Not only will you need to declare the variable (my $fba), you will need to assign to it whatever object it's supposed to have.
Related
Getting Error error message: Use of uninitialized value $_ in pattern match (m//) at te_analysis/transposonPSI_result_2fasta.pl line
I am new to Perl and in below script while executing, I am getting error Use of uninitialized value $_ in pattern match (m//) at te_analysis/transposonPSI_result_2fasta.pl line I think the issue is while assign $_ value but not sure if that the cause. How it can be fixed. #!/usr/bin/perl -w use strict; my $largest = 0; my $contig = ''; if (#ARGV != 2) { print "$0 fasta result\n"; exit; } my $filenameA = $ARGV[0]; my $result = $ARGV[1]; my %seqs = (); open(IN, "$filenameA") or die "oops!\n"; my $read_name = ''; my $read_seq = ''; while () { if (/^>(\S+)/) { $read_name = "$1"; $read_seq = ""; while () { if (/^>(\S+)/) { $seqs{ $read_name } = $read_seq; $read_name = "$1"; $read_seq = ""; } else { chomp; $read_seq. = $_; } } } } close(IN); $seqs{ $read_name } = $read_seq; open(IN, "$result") or die "oops"; open OUT, ">", "$result.fa" or die "oooooops\n"; while () { if (/Chain\s+(\S+)\s+(\d+)-(\d+)\s+(\S+)\s+(\d+)-(\d+)/) { print "$1\t$2\t$3\t$4\t$5\t$6\n"; my $seq = substr($seqs{ $4 }, $5 - 1, ($6 - $5 + 1)); print OUT ">$1.$2-$3.$4.$5-$6\n$seq\n"; } }
You are performing matches against the content of $_ —/.../ is short for $_ =~ /.../— but you never assign anything to $_. Both instances of while () should be while (<IN>) which is short for while (defined( $_ = readline(IN) )) I suspect you copied the code from an HTML document that wasn't properly constructed, causing the <IN> to be interpreted as an unknown tag rather than being displayed. There may be other problems.
Recursively remove adjacent duplicate characters
I am having a string say $str = "hhiiishs aappllee eerrffdd" I want to remove adjacent duplicate characters recursively from a string. I dont know how to write recursion. I have written a code that is not recursive but working if we pass string by string use strict; use warnings; my $str = "AABBCCDEEFDDS asdwdwws ffoorr"; sub remove { my $var1 = ""; my $str = $_[0]; my #arr = split (//, $str); my $f = ""; foreach (0..$#arr) { if ( $arr[$_] eq $var1) { next; #substr ( $str, $_) } else { $var1 = $arr[$_]; $f = "$f"."$arr[$_]"; } } $f = "$f"." "; return $f; } Please guide me how to write recursive in Perl.
You can try, $str =~ s/(.)\1+/$1/g; gives hishs aple erfd
Using recursion probably isn't the best choice for this, but here is a recursive function below. #!/usr/bin/perl use strict; use warnings; my $foo = "aabbccddeeffgg hhiijjkkllmmnnoo pp"; print reDup($foo), "\n"; sub reDup { my #string = split ('', shift); #split string into array of characters my $val; for my $i( 0..$#string){ if(defined($val) && $string[$i] eq $val){ #string[$i..$#string] = #string[($i+1)..$#string]; #if last char checked = current char, shift the array to the left. pop #string; #Above leaves unwanted element at the end, so pop it off my $str = join('', #string); return reDup($str); #do it all again } $val = $string[$i]; } return join('', #string); #when the for loops if statement is never executed, it must contain no duplicates. }
sub _remove_adjacent { my $out = shift; if (#_ == 0) { return $out; } elsif (#_ == 1) { return $out.$_[0]; } elsif ($_[0] eq $_[1]) { shift; return _remove_adjacent($out.shift(#_), #_); } else { return _remove_adjacent($out.shift(#_), #_); } } sub remove_adjacent { my ($in) = #_; return _remove_adjacent('', split(//, $in)); } Of course, that's purely tail-recursive, so it can be inlined into a loop. sub remove_adjacent { my ($in) = #_; my #in = split(//, $in); my $out = ''; while (1) { if (#in == 0) { last; } elsif (#in == 1) { $out .= $in[0]; last; } elsif ($in[0] eq $in[1]) { shift(#in); $out .= shift(#in); } else { $out .= shift(#in); } } return $out; } This can be cleaned up further, but it shows that recursion would be a pure waste here.
How can I replace a specific word when it occurs only once in a given subset of data?
Consider the dataset below. Each chunk begining with a number is a 'case'. In the real dataset I have hundreds of thousands of cases. I'd like to replace the word "Exclusion" with "0" when there's only one word Exclusion in a case (e.g. case 10001). If I loop through lines, I can count how many "Exclusions" I have in each case. But, if there's only one line with the word "Exclusion", I don't know how to get back to that line and replace the word. How can I do that? 10001 M1|F1|SP1;12;12;12;11;13;10;Exclusion;D16S539 M1|F1|SP1;12;10;12;9;11;9;3.60;D16S M1|F1|SP1;12;10;10;7;11;7;20.00;D7S M1|F1|SP1;13;12;12;12;12;12;3.91;D13S M1|F1|SP1;11;11;13;11;13;11;3.27;D5S M1|F1|SP1;14;12;14;10;12;10;1.99;CSF 10002 M1|F1|SP1;8;13;13;8;8;12;2.91;D16S M1|F1|SP1;13;11;13;10;10;10;4.13;D7S M1|F1|SP1;12;9;12;10;11;16;Exclusion;D13S M1|F1|SP1;12;10;12;10;14;15;Exclusion;D5S M1|F1|SP1;13;10;10;10;17;18;Exclusion;CSF
sub process_block { my ($block) = #_; $block =~ s/\bExclusion\b/0/ if $block !~ /\bExclusion\b.*\bExclusion\b/s; print($block); } my $buf; while (<>) { if (/^\d/) { process_block($buf) if $buf; $buf = ''; } $buf .= $_; } process_block($buf) if $buf;
As you read the file, buffer up all lines in a case, and count exclusions, my ($case,$buf,$count) = (undef,"",0); while(my $ln = <>) { Use a regex to detect a case, if( $ln =~ /^\d+$/ ) { #new case, process/print old case $buf =~ s/;Exclusion;/;0;/ if($count==1); print $buf; ($case,$buf,$count) = ($ln,"",0); } use a regex to detect 'Exclusion' now? elsif( $ln =~ /;Exclusion;/ ) { $count++; } $buf .= $l; } And when you are done, you may have a case left to process, if( length($buf)>0 ) { $buf =~ s/;Exclusion;/;0;/ if($count==1); print $buffer; }
This is the best I could think of. Assume you read your file into #lines # separate into blocks foreach my $line (#lines) { chomp($line); if ($line =~ m/^(\d+)/) { $key = $1; } else { push (#{$block{$key}}, $line); } } # go through each block foreach my $key (keys %block) { print "$key\n"; my #matched = grep ($_ =~ m/exclusion/i, #{$block{$key}}); if (scalar (1 == #matched)){ foreach my $line (#{$block{$key}}) { $line =~ s/Exclusion/0/i; print "$line\n"; } } else { foreach my $line (#{$block{$key}}) { print "$line\n"; } } }
There're already many correct answers here, which use buffers to store the content of a "case". Here's another solution using tell and seek to rewind the file, so buffers are not necessary. This could be useful when your "case" is very large and you're sensitive to the performance or memory usage. use strict; use warnings; open FILE, "text.txt"; open REPLACE, ">replace.txt"; my $count = 0; # count of 'Exclusion' in the current case my $position = 0; my $prev_position = 0; my $first_occur_position = 0; # first occurence of 'Exclusion' in the current case my $visited = 0; # whether the current line is visited before while (<FILE>) { # keep track of the position before reading # the current line $prev_position = $position; $position = tell FILE; if ($visited == 0) { if (/^\d+/) { # new case if ($count == 1) { # rewind to the first occurence # of 'Exclusion' in the previous case seek FILE, $first_occur_position, 0; $visited = 1; } else { print REPLACE $_; } } elsif (/Exclusion/) { $count++; if ($count > 1) { seek FILE, $first_occur_position, 0; $visited = 1; } elsif ($count == 1) { $first_occur_position = $prev_position; } } else { print REPLACE $_ if ($count == 0); } if (eof FILE && $count == 1) { seek FILE, $first_occur_position, 0; $visited = 1; } } else { if ($count == 1) { s/Exclusion/0/; } if (/^\d+/) { $position = tell FILE; $visited = 0; $count = 0; } print REPLACE $_; } } close REPLACE; close FILE;
perl: persist set of strings with commit support
I have a set of strings that is modified inside a loop of 25k iterations. It's empty at the beginning, but 0-200 strings are randomly added or removed from it in each cycle. At the end, the set contains about 80k strings. I want to make it resumable. The set should be saved to disk after each cycle and be loaded on resume. What library can I use? The amount of raw data is ~16M, but the changes are usually small. I don't want it to rewrite the whole store on each iteration. Since the strings are paths, I'm thinking of storing them in a log file like this: +a +b commit -b +d commit In the beginning the file is loaded into a hash and then compacted. If there's no commit line in the end, the last block is not taken into account.
The Storable package brings persistence to your Perl data structures (SCALAR, ARRAY, HASH or REF objects), i.e. anything that can be conveniently stored to disk and retrieved at a later time.
I've decided to put away the heavy artillery and write something simple: package LoL::IMadeADb; sub new { my $self; ( my $class, $self->{dbname} ) = #_; # open for read, then write. create if not exist #msg "open $self->{dbname}"; open(my $fd, "+>>", $self->{dbname}) or die "cannot open < $self->{dbname}: $!"; seek($fd, 0, 0); $self->{fd} = $fd; #msg "opened"; $self->{paths} = {}; my $href = $self->{paths}; $self->{nlines} = 0; my $lastcommit = 0; my ( $c, $rest ); while(defined($c = getc($fd)) && substr(($rest = <$fd>), -1) eq "\n") { $self->{nlines}++; chomp($rest); if ($c eq "c") { $lastcommit = tell($fd); #msg "lastcommit: " . $lastcommit; } elsif ($c eq "+") { $href->{$rest} = undef; } elsif ($c eq "-") { delete $href->{$rest}; } #msg "line: '" . $c . $rest . "'"; } if ($lastcommit < tell($fd)) { print STDERR "rolling back incomplete file: " . $self->{dbname} . "\n"; seek($fd, $lastcommit, 0); while(defined($c = getc($fd)) && substr(($rest = <$fd>), -1) eq "\n") { $self->{nlines}--; chomp($rest); if ($c eq "+") { delete $href->{$rest}; } else { $href->{$rest} = undef; } } truncate($fd, $lastcommit) or die "cannot truncate $self->{dbname}: $!"; print STDERR "rolling back incomplete file; done\n"; } #msg "entries = " . (keys( %{ $href })+0) . ", nlines = " . $self->{nlines} . "\n"; bless $self, $class } sub add { my ( $self , $path ) = #_; if (!exists $self->{paths}{$path}) { $self->{paths}{$path} = undef; print { $self->{fd} } "+" . $path . "\n"; $self->{nlines}++; $self->{changed} = 1; } undef } sub remove { my ( $self , $path ) = #_; if (exists $self->{paths}{$path}) { delete $self->{paths}{$path}; print { $self->{fd} } "-" . $path . "\n"; $self->{nlines}++; $self->{changed} = 1; } undef } sub save { my ( $self ) = #_; return undef unless $self->{changed}; my $fd = $self->{fd}; my #keys = keys %{$self->{paths}}; if ( $self->{nlines} - #keys > 5000 ) { #msg "compacting"; close($fd); my $bkpdir = dirname($self->{dbname}); ($fd, my $bkpname) = tempfile(DIR => $bkpdir , SUFFIX => ".tmp" ) or die "cannot create backup file in: $bkpdir: $!"; $self->{nlines} = 1; for (#keys) { print { $fd } "+" . $_ . "\n" or die "cannot write backup file: $!"; $self->{nlines}++; } print { $fd } "c\n"; close($fd); move($bkpname, $self->{dbname}) or die "cannot rename " . $bkpname . " => " . $self->{dbname} . ": $!"; open($self->{fd}, ">>", $self->{dbname}) or die "cannot open < $self->{dbname}: $!"; } else { print { $fd } "c\n"; $self->{nlines}++; # flush: my $previous_default = select($fd); $| ++; $| --; select($previous_default); } $self->{changed} = 0; #print "entries = " . (#keys+0) . ", nlines = " . $self->{nlines} . "\n"; undef } 1;
Find unused "use'd" Perl modules
I am working on a very large, very old "historically grown" codebase. In the past, there were often people thinking "Oh, I may need this and that module, so I just include it...", and later, people often "cached" Data inside of modules ("use ThisAndThat" needing a few seconds to load some hundred MB from DB to RAM, yeah, its really a stupid Idea, we are working on that too) and so, often, we have a small module use'ing like 20 or 30 modules, from who 90% are totally unused in the source itself, and, because of "caching" in several use'd submodules, modules tend to take up one minute to load or even more, which is, of course, not acceptable. So, Im trying to get that done better. Right now, my way is looking through all the modules, understanding them as much as possible and I look at all the modules including them and see whether they are needed or not. Is there any easier way? I mean: There are functions returning all subs a module has like ... return grep { defined &{"$module\::$_"} } keys %{"$module\::"} , so, aint there any simple way to see which ones are exported by default and which ones come from where and are used in the other modules? A simple example is Data::Dumper, which is included in nearly every file, even, when all debug-warns and prints and so on arent in the script anymore. But still the module has to load Data::Dumper. Is there any simple way to check that? Thanks!
The following code could be part of your solution - it will show you which symbols are imported for each instance of use: package traceuse; use strict; use warnings; use Devel::Symdump; sub import { my $class = shift; my $module = shift; my $caller = caller(); my $before = Devel::Symdump->new($caller); my $args = \#_; # more robust way of emulating use? eval "package $caller; require $module; $module\->import(\#\$args)"; my $after = Devel::Symdump->new($caller); my #added; my #after_subs = $after->functions; my %before_subs = map { ($_,1) } $before->functions; for my $k (#after_subs) { push(#added, $k) unless $before_subs{$k}; } if (#added) { warn "using module $module added: ".join(' ', #added)."\n"; } else { warn "no new symbols from using module $module\n"; } } 1; Then just replace "use module ..." with "use traceuse module ...", and you'll get a list of the functions that were imported. Usage example: package main; sub foo { print "debug: foo called with: ".Dumper(\#_)."\n"; } use traceuse Data::Dumper; This will output: using module Data::Dumper added: main::Dumper i.e. you can tell which functions were imported in robust way. And you can easily extend this to report on imported scalar, array and hash variables - check the docs on Devel::Symdump. Determine which functions are actually used is the other half of the equation. For that you might be able to get away with a simple grep of your source code - i.e. does Dumper appear in the module's source code that's not on a use line. It depends on what you know about your source code. Notes: there may be a module which does what traceuse does - I haven't checked there might be a better way to emulate "use" from another package
I kind of got of got it to work with PPI. It looks like this: #!/usr/local/bin/perl use strict; use warnings; use Data::Dumper; use Term::ANSIColor; use PPI; use PPI::Dumper; my %doneAlready = (); $" = ", "; our $maxDepth = 2; my $showStuffOtherThanUsedOrNot = 0; parse("/modules/Test.pm", undef, undef, 0); sub parse { my $file = shift; my $indent = shift || 0; my $caller = shift || $file; my $depth = shift || 0; if($depth && $depth >= $maxDepth) { return; } return unless -e $file; if(exists($doneAlready{$file}) == 1) { return; } $doneAlready{$file} = 1; my $skript = PPI::Document->new($file); my #included = (); eval { foreach my $x (#{$skript->find("PPI::Statement::Include")}) { foreach my $y (#{$x->{children}}) { push #included, $y->{content} if (ref $y eq "PPI::Token::Word" && $y->{content} !~ /^(use|vars|constant|strict|warnings|base|Carp|no)$/); } } }; my %double = (); print "===== $file".($file ne $caller ? " (Aufgerufen von $caller)" : "")."\n" if $showStuffOtherThanUsedOrNot; if($showStuffOtherThanUsedOrNot) { foreach my $modul (#included) { next unless -e createFileName($modul); my $is_crap = ((exists($double{$modul})) ? 1 : 0); print "\t" x $indent; print color("blink red") if($is_crap); print $modul; print color("reset") if($is_crap); print "\n"; $double{$modul} = 1; } } foreach my $modul (#included) { next unless -e createFileName($modul); my $anyUsed = 0; my $modulDoc = parse(createFileName($modul), $indent + 1, $file, $depth + 1); if($modulDoc) { my #exported = getExported($modulDoc); print "Exported: \n" if(scalar #exported && $showStuffOtherThanUsedOrNot); foreach (#exported) { print(("\t" x $indent)."\t"); if(callerUsesIt($_, $file)) { $anyUsed = 1; print color("green"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot; } else { print color("red"), "$_, ", color("reset") if $showStuffOtherThanUsedOrNot; } print "\n" if $showStuffOtherThanUsedOrNot; } print(("\t" x $indent)."\t") if $showStuffOtherThanUsedOrNot; print "Subs: " if $showStuffOtherThanUsedOrNot; foreach my $s (findAllSubs($modulDoc)) { my $isExported = grep($s eq $_, #exported) ? 1 : 0; my $rot = callerUsesIt($s, $caller, $modul, $isExported) ? 0 : 1; $anyUsed = 1 unless $rot; if($showStuffOtherThanUsedOrNot) { print color("red") if $rot; print color("green") if !$rot; print "$s, "; print color("reset"); } } print "\n" if $showStuffOtherThanUsedOrNot; print color("red"), "=========== $modul wahrscheinlich nicht in Benutzung!!!\n", color("reset") unless $anyUsed; print color("green"), "=========== $modul in Benutzung!!!\n", color("reset") if $anyUsed; } } return $skript; } sub createFileName { my $file = shift; $file =~ s#::#/#g; $file .= ".pm"; $file = "/modules/$file"; return $file; } sub getExported { my $doc = shift; my #exported = (); eval { foreach my $x (#{$doc->find("PPI::Statement")}) { my $worthATry = 0; my $isMatch = 0; foreach my $y (#{$x->{children}}) { $worthATry = 1 if(ref $y eq "PPI::Token::Symbol"); if($y eq '#EXPORT') { $isMatch = 1; } elsif($isMatch && ref($y) ne "PPI::Token::Whitespace" && ref($y) ne "PPI::Token::Operator" && $y->{content} ne ";") { push #exported, $y->{content}; } } } }; my #realExported = (); foreach (#exported) { eval "\#realExported = $_"; } return #realExported; } sub callerUsesIt { my $subname = shift; my $caller = shift; my $namespace = shift || undef; my $isExported = shift || 0; $caller = `cat $caller`; unless($namespace) { return 1 if($caller =~ /\b$subname\b/); } else { $namespace = createPackageName($namespace); my $regex = qr#$namespace(?:::|->)$subname#; if($caller =~ $regex) { return 1; } } return 0; } sub findAllSubs { my $doc = shift; my #subs = (); eval { foreach my $x (#{$doc->find("PPI::Statement::Sub")}) { my $foundName = 0; foreach my $y (#{$x->{children}}) { no warnings; if($y->{content} ne "sub" && ref($y) eq "PPI::Token::Word") { push #subs, $y; } use warnings; } } }; return #subs; } sub createPackageName { my $name = shift; $name =~ s#/modules/##g; $name =~ s/\.pm$//g; $name =~ s/\//::/g; return $name; } Its really ugly and maybe not 100% working, but it seems, with the tests that Ive done now, that its good for a beginning.