Here is the code:
#!/usr/bin/perl
use warnings;
use strict;
my #user;
my $count = 0;
my #test;
my #last = qx(last);
my #logins;
# Opens the file /etc/passwd and puts the users with an uid
# over 1000 but less that 65000 into an array.
open( my $passwd, "<", "/etc/passwd") or die "/etc/passwd failed to open.\n";
while (my $lines = <$passwd>) {
my #splitarray = split(/\:/, $lines );
if( $splitarray[2] >= 1000 && $splitarray[2] < 65000) {
$user[$count] = $splitarray[0];
#print "$user[$count]\n";
$count++;
}
}
close $passwd;
for my $i (0 .. $#user) {
my $counter = 0;
#logins =qx(last $user[$i]);
for my $j (0 .. $#logins) {
if ($logins[$j] =~ /$user[$i]/) {
$counter++;
}
}
print $user[$i] . ":" . $counter . "\n";
}
And the output from this looks like this:
user1:15
user2:3
user3:6
user4:2
How can i sort this so that it shows the users with the most logins at the top? I tried with a hash but couldn't seem to get it right. Since they are not arrays i don't know how to sort them.
You only have one login counter. In order to sort the users by login count, you will need each user's login count.
my %logins_by_user;
for my $user (#users) {
$logins_by_user{$user} = grep /^\Q$user\E /, `last '$user'`;
}
for my $user (
sort { $logins_by_user{$b} <=> $logins_by_user{$a} || $a cmp $b } #users
) {
print("$user: $logins_by_user{$user}\n");
}
Here's a version which uses a hash and prints the users sorted by login count:
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
open(my $passwd, "<", "/etc/passwd") or die "/etc/passwd failed to open.\n";
my %login_count;
while (<$passwd>) {
my #splitarray = split(/\:/);
if ($splitarray[2] >= 1000 && $splitarray[2] < 65000) {
my $user = $splitarray[0];
my #logins = qx(last $user);
# Remove lines that don't mention the user name
#logins = grep /$user/, #logins;
# Using an array in scalar context returns the number of items in the array
$login_count{$user} = #logins;
}
}
close $passwd;
# List users sorted by login count
say "$_ : $login_count{$_}"
for sort { $login_count{$a} cmp $login_count{$b} } keys %login_count;
Related
I am very new at perl and had discovered the solution at:
Perl: Compare Two CSV Files and Print out differences
I have gone through dozens of other solutions and this comes closest, except that instead of finding the differences between 2 CSV files, I want to find where the second CSV file matches the first one in column and row. How could I modify the following script to find the matches in column/row instead of the differences. I am hoping to dissect this code and learn arrays from there, but wanted to find out the solution to this application. Much thanks.
use strict;
my #arr1;
my #arr2;
my $a;
open(FIL,"a.txt") or die("$!");
while (<FIL>)
{chomp; $a=$_; $a =~ s/[\t;, ]*//g; push #arr1, $a if ($a ne '');};
close(FIL);
open(FIL,"b.txt") or die("$!");
while (<FIL>)
{chomp; $a=$_; $a =~ s/[\t;, ]*//g; push #arr2, $a if ($a ne '');};
close(FIL);
my %arr1hash;
my %arr2hash;
my #diffarr;
foreach(#arr1) {$arr1hash{$_} = 1; }
foreach(#arr2) {$arr2hash{$_} = 1; }
foreach $a(#arr1)
{
if (not defined($arr2hash{$a}))
{
push #diffarr, $a;
}
}
foreach $a(#arr2)
{
if (not defined($arr1hash{$a}))
{
push #diffarr, $a;
}
}
print "Diff:\n";
foreach $a(#diffarr)
{
print "$a\n";
}
# You can print to a file instead, by: print FIL "$a\n";
ok, I realize that this was more what I was looking for:
use strict;
use warnings;
use feature qw(say);
use autodie;
use constant {
FILE_1 => "file1.txt",
FILE_2 => "file2.txt",
};
#
# Load Hash #1 with value from File #1
#
my %hash1;
open my $file1_fh, "<", FILE_1;
while ( my $value = <$file1_fh> ) {
chomp $value;
$hash1{$value} = 1;
}
close $file1_fh;
#
# Load Hash #2 with value from File #2
#
my %hash2;
open my $file2_fh, "<", FILE_2;
while ( my $value = <$file2_fh> ) {
chomp $value;
$hash2{$value} = 1;
}
close $file2_fh;
Now I want to search file2's hash to check if there are ANY matches from file1's hash. That is where I am stuck
With new code suggestion, code now looks like this
#!/usr/bin/env perl
use strict;
use warnings;
use feature qw(say);
use autodie;
use constant {
FILE_1 => "masterlist.csv",
FILE_2 => "pastebin.csv",
};
#
# Load Hash #1 with value from File #1
#
my %hash1;
open my $file1_fh, "<", FILE_1;
while ( my $value = <$file1_fh> ) {
chomp $value;
$hash1{$value} = 1;
}
close $file1_fh;
my %hash2;
open my $file2_fh, "<", FILE_2;
while ( my $value = <$file2_fh> ) {
chomp $value;
if ( $hash1{$value} ) {
print "Match found $value\n";
$hash2{$value}++;
}
}
close $file2_fh;
print "Matches found:\n";
foreach my $key ( keys %hash2 ) {
print "$key found $hash2{$key} times\n";
}
I updated one part with split() and it seems to work, but have to test more to confirm if it fits the solution I'm looking for or I have more work to do one it
#
# Load Hash #1 with value from File #1
#
my %hash1;
open my $file1_fh, "<", FILE_1;
while ( my $value = <$file1_fh> ) {
chomp $value;
$hash1{$value} = ( %hash1, (split(/,/, $_))[1,2] );
}
close $file1_fh;
So, with your code there - you've read in 'file1' to a hash.
Why not instead of reading file 2 into a hash, do instead:
my %hash2;
open my $file2_fh, "<", FILE_2;
while ( my $value = <$file2_fh> ) {
chomp $value;
if ( $hash1{$value} ) {
print "Match found $value\n";
$hash2{$value}++;
}
}
close $file2_fh;
print "Matches found:\n";
foreach my $key ( keys %hash2 ) {
print "$key found $hash2{$key} times\n";
}
I think this code identifies every place that a data field in file A matches a data field in file B (at least it does on my limited test data):
use strict;
use warnings;
my #arr1;
my #arr2;
# a.txt -> #arr1
my $file_a_name = "poster_a.txt";
open(FIL,$file_a_name) or die("$!");
my $a_line_counter = 0;
while (my $a_line = <FIL>)
{
$a_line_counter = $a_line_counter + 1;
chomp($a_line);
my #fields = (split /,/,$a_line);
my $num_fields = scalar(#fields);
s{^\s+|\s+$}{}g foreach #fields;
push #arr1, \#fields if ( $num_fields ne 0);
};;
close(FIL);
my $file_b_name = "poster_b.txt";
open(FIL,$file_b_name) or die("$!");
while (my $b_line = <FIL>)
{
chomp($b_line);
my #fields = (split /,/,$b_line);
my $num_fields = scalar(#fields);
s{^\s+|\s+$}{}g foreach #fields;
push #arr2, \#fields if ( $num_fields ne 0)
};
close(FIL);
# b.txt -> #arr2
#print "\n",#arr2, "\n";
my #match_array;
my $file_a_line_ctr = 1;
foreach my $file_a_line_fields (#arr1)
{
my $file_a_column_ctr = 1;
foreach my $file_a_line_field (#{$file_a_line_fields})
{
my $file_b_line_ctr = 1;
foreach my $file_b_line_fields(#arr2)
{
my $file_b_column_ctr = 1;
foreach my $file_b_field (#{$file_b_line_fields})
{
if ( $file_b_field eq $file_a_line_field )
{
my $match_info =
"$file_a_name line $file_a_line_ctr column $file_a_column_ctr" .
" (${file_a_line_field}) matches: " .
"$file_b_name line $file_b_line_ctr column $file_b_column_ctr ";
push(#match_array, $match_info);
print "$match_info \n";
}
$file_b_column_ctr = $file_b_column_ctr + 1;
}
$file_b_line_ctr = $file_b_line_ctr + 1;
}
$file_a_column_ctr = $file_a_column_ctr + 1;
}
$file_a_line_ctr = $file_a_line_ctr + 1;
}
print "there were ", scalar(#match_array)," matches\n";
I am new to perl require your help to build a logic.
I have some let say 10 files in a directory, each file has some data like below. Each file contain lines depends upon the number of users setting. For example if 4 users are there then 4 lines will get printed from the server.
1405075666889,4044,SOA_breade,200,OK,Thread Group 1-1,text,true,623,4044
1405075666889,4041,SOA_breade,200,OK,Thread Group 1-1,text,true,623,4041
1405075666889,4043,SOA_breade,200,OK,Thread Group 1-1,text,true,623,4043
1405075666889,4045,SOA_breade,200,OK,Thread Group 1-1,text,true,623,4044
I want to some piece of logic that should create single file in a output directory and that file should contain 10 lines
Min_Value, Max_Value, Avg_Value, User1, User2, User3......User4
and their corresponding values from second line in this case corresponding values are coming from second column.
Min_Value, Max_Value, Avg_Value, User1, User2, User3......User4
4.041,4.045,4.044,4.041,4.043,4.045
.
.
.
.
.
10th file data
Here is my code... It is working however I am not getting how to print user1, user2... in sequence and its corresponding values
my #soaTime;
my #soaminTime;
my #soamaxTime;
my #soaavgTime;
my $soadir = $Json_Time;
foreach my $inputfile (glob("$soadir/*Overview*.txt")) {
open(INFILE, $inputfile) or die("Could not open file.");
foreach my $line (<INFILE>) {
my #values = split(',', $line); # parse the file
my $time_ms = $values[1]/1000;
push (#soaTime, $time_ms);
}
my $min = min #soaTime;
push (#soaminTime, $min);
print $soaminTime[0];
my $max = max #soaTime;
push (#soamaxTime, $max);
sub mean { return #_ ? sum(#_) / #_ : 0 };
#print mean(#soaTime);
push (#soaavgTime, mean());
close(INFILE);
}
my $outputfile = $report_path."abc.txt";
open (OUTFILE, ">$outputfile");
print OUTFILE ("Min_Value,Max_Value,User1,User2,User3,User4"."\n"); # Prining the data
for (my $count = 0; $count <= $#soaTC; $count++) {
print OUTFILE("$soaminTime[0],$soamaxTime[0],$soaTime[0],$soaTime[1],$soaTime[2],$soaTime[3]"."\n" ); #Prining the data
}
close(OUTFILE);
Please help.
is it?
use strict;
use List::Util qw( min max );
my $Json_Time="./test";
my $report_path="./out/";
my #soaTime;
my #soaminTime;
my #soamaxTime;
my #soaavgTime;
my #users;
my $maxusers;
my $soadir = $Json_Time;
foreach my $inputfile (glob("$soadir/*Overview*.txt")) {
open(INFILE, $inputfile) or die("Could not open file.");
my $i=0;
my #m_users;
my #m_soaTime;
foreach my $line (<INFILE>) {
my #values = split(',', $line); # parse the file
my $time_ms = $values[1]/1000;
push (#m_soaTime, $time_ms);
$i++;
push(#m_users, "User".$i);
}
push(#soaTime,\#m_soaTime);
if ($maxusers<$#m_users) {
#users=#m_users;
$maxusers=$#m_users;
}
my $min = min(#m_soaTime);
push (#soaminTime, $min);
my $max =max(#m_soaTime);
push (#soamaxTime, $max);
sub mean { return #_ ? sum(#_) / #_ : 0 };
push (#soaavgTime, mean());
close(INFILE);
}
my $outputfile = $report_path."abc.txt";
open (OUTFILE, ">$outputfile");
print OUTFILE "Min_Value,Max_Value,Avg_Value,".join(',',#users)."\n"; # Prining the data
for (my $count = 0; $count <= $#soaavgTime; $count++) {
print OUTFILE $soaminTime[$count].","
.$soamaxTime[$count].","
.$soaavgTime[$count].","
.join(',',#{$soaTime[$count]})
."\n"; #Prining the data
}
close(OUTFILE);
I have two files with two columns each:
FILE1
A B
1 #
2 #
3 !
4 %
5 %
FILE 2
A B
3 #
4 !
2 &
1 %
5 ^
The Perl script must compare column A in both both files, and only if they are equal, column B of FIlE 2 must be printed
So far I have the following code but all I get is an infinite loop with # from column B
use strict;
use warnings;
use 5.010;
print "enter site:"."\n";
chomp(my $s = <>);
print "enter protein:"."\n";
chomp(my $p = <>);
open( FILE, "< $s" ) or die;
open( OUT, "> PSP.txt" ) or die;
open( FILE2, "< $p" ) or die;
my #firstcol;
my #secondcol;
my #thirdcol;
while ( <FILE> )
{
next if $. <2;
chomp;
my #cols = split;
push #firstcol, $cols[0];
push #secondcol, $cols[1]."\t"."\t".$cols[3]."\t"."\t"."\t"."N\/A"."\n";
}
my #firstcol2;
my #secondcol2;
my #thirdcol2;
while ( <FILE2> )
{
next if $. <2;
my #cols2 = split(/\t/, $_);
push #firstcol2, $cols2[0];
push #secondcol2, $cols2[4]."\n";
}
my $size = #firstcol;
my $size2 = #firstcol2;
for (my $i = 0; $i <= #firstcol ; $i++) {
for (my $j = 0; $j <= #firstcol2; $j++) {
if ( $firstcol[$i] eq $firstcol2[$j] )
{
print $secondcol2[$i];
}
}
}
my (#first, #second);
while(<first>){
chomp;
my $foo = split / /, $_;
push #first , $foo;
}
while(<second>){
chomp;
my $bar = split / / , $_;
push #second, $bar;
}
my %first = #first;
my %second = #second;
Build a hash of the first file as %first and second file as %second with first column as key and second column as value.
for(keys %first)
{
print $second{$_} if exists $second{$_}
}
I couldn't check it as I am on mobile. hope that gives you an idea.
I assume that column A is ordered and that you actually want to compare the first entry in File 1 to the first entry in File 2, and so on.
If that's true, you have nested loop that you don't need. Simplify your last while as such:
for my $i (0..$#firstcol) {
if ( $firstcol[$i] eq $firstcol2[$i] )
{
print $secondcol2[$i];
}
}
Also, if you're at all concerned about the files being of different length, then you can adjust the loop:
use List::Util qw(min);
for my $i (0..min($#firstcol, $#firstcol2)) {
Additional Note: You aren't chomping your data in the second file loop while ( <FILE2> ). That might introduce a bug later.
If your files are called file1.txt and file2.txt the next:
use Modern::Perl;
use Path::Class;
my $files;
#{$files->{$_}} = map { [split /\s+/] } grep { !/^\s*$/ } file("file$_.txt")->slurp for (1..2);
for my $line1 (#{$files->{1}}) {
my $line2 = shift #{$files->{2}};
say $line2->[1] if ($line1->[0] eq $line2->[0]);
}
prints:
B
^
equals in column1 only the lines A and 5
without the CPAN modules - produces the same result
use strict;
use warnings;
my $files;
#{$files->{$_}} = map { [split /\s+/] } grep { !/^\s*$/ } do { local(#ARGV)="file$_.txt";<> } for (1..2);
for my $line1 (#{$files->{1}}) {
my $line2 = shift #{$files->{2}};
print $line2->[1],"\n" if ($line1->[0] eq $line2->[0]);
}
Given a set of genes and existing pair of genes, I want to generate new pairs of genes which are not already existing.
The genes file has the following format :
123
134
23455
3242
3423
...
...
The genes pairs file has the following format :
12,345
134,23455
23455,343
3242,464452
3423,7655
...
...
But I still get few common elements between known_interactions and new_pairs. I'm not sure where the error is.
For the arguments,
perl generate_random_pairs.pl entrez_genes_file known_interactions_file 250000
I got a common elements of 15880. The number 250000 is to tell how many random pairs I want the program to generate.
#! usr/bin/perl
use strict;
use warnings;
if (#ARGV != 3) {
die "Usage: generate_random_pairs.pl <entrez_genes> <known_interactions> <number_of_interactions>\n";
}
my ($e_file, $k_file, $interactions) = #ARGV;
open (IN, $e_file) or die "Error!! Cannot open $e_file\n";
open (IN2, $k_file) or die "Error!! Cannot open $k_file\n";
my #e_file = <IN>; s/\s+\z// for #e_file;
my #k_file = <IN2>; s/\s+\z// for #k_file;
my (%known_interactions);
my %entrez_genes;
$entrez_genes{$_}++ foreach #e_file;
foreach my $line (#k_file) {
my #array = split (/,/, $line);
$known_interactions{$array[0]} = $array[1];
}
my $count = 0;
foreach my $key1 (keys %entrez_genes) {
foreach my $key2 (keys %entrez_genes) {
if ($key1 != $key2) {
if (exists $known_interactions{$key1} && ($known_interactions{$key1} == $key2)) {next;}
if (exists $known_interactions{$key2} && ($known_interactions{$key2} == $key1)) {next;}
if ($key1 < $key2) { print "$key1,$key2\n"; $count++; }
else { print "$key2,$key1\n"; $count++; }
}
if ($count == $interactions) {
die "$count\n";
}
}
}
I can see nothing wrong with your code. I wonder if you have some whitespace in your data - either after the comma or at the end of the line? It would be safer to extract just the digit fields with, for instance
my #e_file = map /\d+/g, <IN>;
Also, you would be better off keeping both elements of the pair as the hash key, so that you can just check the existence of the element. And if you make sure the lower number is always first you don't need to do two lookups.
This example should work for you. It doesn't address the random selection part of your requirement, but that wasn't in your own code and wasn't your immediate problem
use strict;
use warnings;
#ARGV = qw/ entrez_genes.txt known_interactions.txt 9 /;
if (#ARGV != 3) {
die "Usage: generate_random_pairs.pl <entrez_genes> <known_interactions> <number_of_interactions>\n";
}
my ($e_file, $k_file, $interactions) = #ARGV;
open my $fh, '<', $e_file or die "Error!! Cannot open $e_file: $!";
my #e_file = sort { $a <=> $b } map /\d+/g, <$fh>;
open $fh, '<', $k_file or die "Error!! Cannot open $k_file: $!";
my %known_interactions;
while (<$fh>) {
my $pair = join ',', sort { $a <=> $b } /\d+/g;
$known_interactions{$pair}++;
}
close $fh;
my $count = 0;
PAIR:
for my $i (0 .. $#e_file-1) {
for my $j ($i+1 .. $#e_file) {
my $pair = join ',', #e_file[$i, $j];
unless ($known_interactions{$pair}) {
print $pair, "\n";
last PAIR if ++$count >= $interactions;
}
}
}
print "\nTotal of $count interactions\n";
first of all, you are not chomping (removing newlines) from your file of known interactions. That means that given a file like:
1111,2222
you will build this hash:
$known_interactions{1111} = "2222\n";
That is probably why you are getting duplicate entries. My guess is (can't be sure without your actual input files) that these loops should work ok:
map{
chomp;
$entrez_genes{$_}++ ;
}#e_file;
and
map {
chomp;
my #array = sort(split (/,/));
$known_interactions{$array[0]} = $array[1];
}#k_file;
Also, as a general rule, I find my life is easier if I sort the interacting pair (the joys of bioinformatics :) ). That way I know that 111,222 and 222,111 will be treated in the same way and I can avoid multiple if statements like you have in your code.
Your next loop would then be (which IMHO is more readable):
my #genes=keys(%entrez_genes);
for (my $i=0; $i<=$#genes;$i++) {
for (my $k=$n; $k<=$#genes;$k++) {
next if $genes[$n] == $genes[$k];
my #pp=sort($genes[$n],$genes[$k]);
next unless exists $known_interactions{$pp[0]};
next if $known_interactions{$pp[0]} == $pp[1];
print "$pp[0], $pp[1]\n";
$count++;
die "$count\n" if $count == $interactions;
}
}
I learning Perl and I want to create a simple application that gets all my emails and save they to a file, but how I can do this? Thanks.
I used to use the following script to filter SpamAssassin flagged email before switching ISPs:
#!/usr/bin/perl
use strict;
use warnings;
$| = 1;
use constant SEVERITY => 5;
use Mail::POP3Client;
use Term::ReadKey;
my $user = shift;
my $pop = Mail::POP3Client->new(
HOST => '127.0.0.1',
PORT => 9999
);
my $pass = prompt_password();
print "\n";
$pop->User($user);
$pop->Pass($pass);
$pop->Connect or die $pop->Message;
my $count = $pop->Count;
$count >= 0 or die "Failed to get message count.\n";
$count > 0 or die "No messages in mailbox.\n";
my #to_delete;
print "Scanning messages: ";
my $to_delete = 0;
for my $msg_num (1 .. $count) {
my #headers = $pop->Head($msg_num);
for my $h (#headers) {
if($h =~ /^X-Spam-Level: (\*+)/) {
if(SEVERITY <= length $1) {
$to_delete += 1;
$pop->Delete($msg_num);
print "\b*>";
} else {
print "\b->";
}
}
}
}
print "\b ... done\n";
use Lingua::EN::Inflect qw( PL );
if( $to_delete ) {
printf "%d %s will be deleted. Commit: [Y/N]?\n",
$to_delete, PL('message', $to_delete);
$pop->Reset unless yes();
}
$pop->Close;
print "OK\n";
sub yes {
while(my $r = <STDIN>) {
$r = lc substr $r, 0, 1;
return 1 if $r eq 'y';
next unless $r eq 'n';
last;
}
0;
}
sub prompt_password {
print 'Password: ';
ReadMode 2;
my $pass = ReadLine 0;
ReadMode 0;
chomp $pass;
return $pass;
}
It is trivial to change this so it saves messages. See Mail::POP3Client.
POP3 example in Perl
The answer to almost any such question is "Find the right module on CPAN Search".
Most modules come with examples in the documentation and tests.
Good luck, :)