PerlScript to take certain words from text file - perl

I have a text File with data such that:
#alstrong textert tcp $EXTERNAL_NET $HTTP_PORTS -> $HOME_NET any (msg:"ET ACTIVEX Microsoft Whale Intelligent Application Gateway ActiveX Buffer Overflow-1"; flow:established,to_client; file_data; **content:"8D9563A9-8D5F-459B-87F2-BA842255CB9A"**; nocase; **content:"CheckForUpdates"**; nocase; distance:0; pcre:"/<OBJECT\s+[^>]*classid\s*=\s*[\x22\x27]?\s*clsid\s*\x3a\s*\x7B?\s*8D9563A9-8D5F-459B-87F2-BA842255CB9A/si";reference:url,dev.metasploit.com/redmine/projects/framework/repository/entry/modules/exploits/windows/browser/mswhale_checkforupdates.rb; reference:url,www.kb.cert.org/vuls/id/789121; reference:url,doc.emergingthreats.net/2010562; classtype:web-application-attack; sid:2010562; rev:6; metadata:affected_product Windows_XP_Vista_7_8_10_Server_32_64_Bit, attack_target Client_Endpoint, created_at 2010_07_30, deployment Perimeter, signature_severity Major, tag ActiveX, updated_at 2016_07_01;)
I need to extract all words in the fields named "Content" and store them in another text file. I found this code in Perl (I have no experience with it), Is it extract all fields or only the first one?
#!/local/bin/perl5 -w
# Description:
# Extract bit-pattern from content-part of Snort-rules.
# Choose rules that have only one content-part.
# Store distinct patterns only.
# Choose length of shortest and longest pattern to store.
$rulesdir = "/hom/geirni/www_docs/research/snort202_win32/Snort/rules";
#rulefiles = `ls $rulesdir/*.rules`;
$camfile = "camdata.txt";
#
$minLength = 4; # Bytes
$maxLength = 32;
#
# Find content-part of rules
for $rulefile(#rulefiles){
#
open(INFILE, "<".$rulefile) or die
"Can't open ".$rulefile."\n";
#rules = <INFILE>;
close(INFILE);
#
for $rule(#rules){
#
$contentParts = 0;
#
if($rule =~ /content:/){
#parts = split(/;/, $rule);
for $part(#parts){
if($part =~ /content:/){
$content = $part;
$contentParts++;
# Remove anything before content-part
$content =~ s/^.*content:.*?\"//i;
# Remove anything after content-part
$content =~ s/\"$.*//g;
}
}
}
#
# Store content-part
if ($contentParts == 1){
push(#contents, $content);
}
}
}
#
#
#
# Convert content-strings to hex. Store only distinct patterns
for $content(#contents){
#
$pipe = 0; # hex patterns are limited by pipes; |00 bc 55|
$char = ""; # Current character in content; ASCII or hex
$pattern = ""; # Content converted to hex
#
# Loop through current content-string
for ($i=0; $i<=length($content)-1; $i++){ # -1 for newline
#
$char = substr($content, $i, 1);
#
# Control over pipes
if($char =~ /\|/){
if(!$pipe){
$pipe = 1;
}
else {
$pipe = 0;
}
next; # Skip to next character
}
#
# Convert to lowcase hex
if(!$pipe){ # ASCII-value
$pattern .= sprintf("%x", ord($char));
}
else { # hex-value
$char =~ s/ //; # Remove blanks
$pattern .= "\l$char";
}
}
#
# Store converted pattern
if((length($pattern) >= $minLength*2) &&
(length($pattern) <= $maxLength*2)){
$hexPatterns{$pattern} = "dummyValue"; # Keys will be distinct
}
}
#
#
#
# Print patterns, that have no subsets, to file
open(OUTFILE, ">".$camfile) or die
"Can't open ".$camfile."\n";
#
#patterns = keys %hexPatterns;
$count = 0; # Count patterns that are written to file
#
HEXLOOP:
for($i=0; $i<=$#patterns; $i++){
for($j=0; $j<=$#patterns; $j++){ # Search for subsets
#
next if($i==$j); # Do not compare a pattern with itself
#
next HEXLOOP if # Skip if subset is found
((length($patterns[$i]) <= length($patterns[$j])) &&
($patterns[$j] =~ /$patterns[$i]/));
}
print OUTFILE $patterns[$i]."\n";
$count++;
}
#
close(OUTFILE);
#
#
#
# msg
print
"\n".
" Wrote ".$count." patterns to file: \"".$camfile."\"\n".
"\n";

The following perl script extracts "content" data to screen (on line by line basis). To store data, redirect output into a file.
#!/usr/bin/env perl
#
# vim: ai ts=4 sw=4
use strict;
use warnings;
use feature 'say';
while( my $line = <> ) {
my #array = $line =~ /content:"(.*?)"/g;
say join "\t", #array;
}
Run the script as script.pl filename
Output
8D9563A9-8D5F-459B-87F2-BA842255CB9A CheckForUpdates

I found this code in Perl (I have no experience with it)
Rather than a Perl script you dare not even read the comments of which, consider just to use:
grep -Po 'content:".*?"' <text >another_text
To remove content:, the quotes and the dashes, you can use:
grep -Po '(?<=content:").*?(?=")' <text | tr -d - >another_text

#Armali
I want to edit this part in the code so that it can check again in the same line if there are other content Parts and also extract them and print them in different lines:
#
if($rule =~ /content:/){
#parts = split(/;/, $rule);
for $part(#parts){
if($part =~ /content:/){
$content = $part;
$contentParts++;
# Remove anything before content-part
$content =~ s/^.*content:.*?\"//i;
# Remove anything after content-part
$content =~ s/\"$.*//g;
}
}
}
#
# Store content-part
if ($contentParts == 1){
push(#contents, $content);
}
}
}

Related

How to check whether one file's value contains in another text file? (perl script)

I would like to check one of the file's values contains on another file. if one of the value contains it will show there is existing bin for that specific, if no, it will show there is no existing bin limit. the problem is I am not sure how to check all values at once.
first DID1 text file value contain :
L84A:D:O:M:
L84C:B:E:D:
second DID text file value contain :
L84A:B:E:Q:X:F:i:M:Y:
L84C:B:E:Q:X:F:i:M:Y:
L83A:B:E:Q:X:F:i:M:Y:
if first 4words value are match, need to check all value for that line.
for example L84A in first text file & second text file value has M . it should print out there is an existing M bin
below is my code :
use strict;
use warnings;
my $filename = 'DID.txt';
my $filename1 = 'DID1.txt';
my $count = 0;
open( FILE2, "<$filename1" )
or die("Could not open log file. $!\n");
while (<FILE2>) {
my ($number) = $_;
chomp($number);
my #values1 = split( ':', $number );
open( FILE, "<$filename" )
or die("Could not open log file. $!\n");
while (<FILE>) {
my ($line) = $_;
chomp($line);
my #values = split( ':', $line );
foreach my $val (#values) {
if ( $val =~ /$values1[0]/ ) {
$count++;
if ( $values[$count] =~ /$values1[$count]/ ) {
print
"Yes ,There is an existing bin & DID\n #values1\n";
}
else {
print "No, There is an existing bin & DID\n";
}
}
}
}
}
I cannot check all value. please help to give any advice on it since this is my first time learning for perl language. Thanks a lot :)
Based on my understanding I write this code:
use strict;
use warnings;
#use ReadWrite;
use Array::Utils qw(:all);
use vars qw($my1file $myfile1cnt $my2file $myfile2cnt #output);
$my1file = "did1.txt"; $my2file = "did2.txt";
We are going to read both first and second files (DID1 and DID2).
readFileinString($my1file, \$myfile1cnt); readFileinString($my2file, \$myfile2cnt);
In first file, as per the OP's request the first four characters should be matched with second file and then if they matched we need to check rest of the characters in the first file with the second one.
while($myfile1cnt=~m/^((\w){4})\:([^\n]+)$/mig)
{
print "<LineStart>";
my $lineChk = $1; my $full_Line = $3; #print ": $full_Line\n";
my #First_values = split /\:/, $full_Line; #print join "\n", #First_values;
If the first four digit matched then,
if($myfile2cnt=~m/^$lineChk\:([^\n]+)$/m)
{
Storing the rest of the content in the same and to be split with colon and getting the characters to be matched with first file contents.
my $FullLine = $1; my #second_values = split /:/, $FullLine;
Then search each letter first and second content which matched line...
foreach my $sngletter(#First_values)
{
If the letters are matched with first and second file its going to be printed.
if( grep {$_ eq "$sngletter"} #second_values)
{
print "Matched: $sngletter\t";
}
}
}
else { print "Not Matched..."; }
This is just information that the line end.
print "<LineEnd>\n"
}
#------------------>Reading a file
sub readFileinString
#------------------>
{
my $File = shift;
my $string = shift;
use File::Basename;
my $filenames = basename($File);
open(FILE1, "<$File") or die "\nFailed Reading File: [$File]\n\tReason: $!";
read(FILE1, $$string, -s $File, 0);
close(FILE1);
}
Read search pattern and data into hash (first field is a key), then go through data and select only field included into pattern for this key.
use strict;
use warnings;
use feature 'say';
my $input1 = 'DID1.txt'; # look for key,pattern(array)
my $input2 = 'DID.txt'; # data - key,elements(array)
my $pattern;
my $data;
my %result;
$pattern = file2hash($input1); # read pattern into hash
$data = file2hash($input2); # read data into hash
while( my($k,$v) = each %{$data} ) { # walk through data
next unless defined $pattern->{$k}; # skip those which is not in pattern hash
my $find = join '|', #{ $pattern->{$k} }; # form search pattern for grep
my #found = grep {/$find/} #{ $v }; # extract only those of interest
$result{$k} = \#found; # store in result hash
}
while( my($k,$v) = each %result ) { # walk through result hash
say "$k has " . join ':', #{ $v }; # output final result
}
sub file2hash {
my $filename = shift;
my %hash;
my $fh;
open $fh, '<', $filename
or die "Couldn't open $filename";
while(<$fh>) {
chomp;
next if /^\s*$/; # skip empty lines
my($key,#data) = split ':';
$hash{$key} = \#data;
}
close $fh;
return \%hash;
}
Output
L84C has B:E
L84A has M

prepend the next read in line with information from the previous read in line

Development Platform: Ubuntu 17.10 mainly command line work
Tools: perl 5.26 and postgresql 9.6
goal: Convert a file so I can \COPY it into postgresql
Information: Line delimiter is the # sign
Database table columns: id work composer artist conductor orchestra album_title
problem: append next info line to current id line
In the following how do I preserve the 'mmfmm01#' so upon the next line iteration I can prepend it to that next line? As this is my first post please let me know if the code example is too much or too little.
I am going from this:
Le Nozze di Figaro, K. 492 / The Marriage of Figaro: Le Nozze di
Figaro, K. 492 / The Marriage of Figaro: Cinque ... dieci ... venti /
Five ...Ten ...Twenty
Eventually to this:
mfmm01#Cinque dieci venti#Mozart####Entertaining Made Simple Merlot,
Filet Mignon, Mozart
After running the script I have the following:
mfmm01#
How do I have to preserve the 'mfmm01#' so upon the next line iteration I can prepend it to that next line?
#!/usr/bin/perl
# use clauses for File, cwd, etc#
# usage statment
# Variables - append _orig to input_file
my $id = $ARGV[1];
my $input_file = $ARGV[0];
my $album_title = $ARGV[2];
my $output_file = output_file;
my $input_file_orig = $input_file;
$input_file_orig = $input_file_orig .= _orig;
##############################################
# Ensure that the provided input file exists #
##############################################
##########################################################
# Read all file lines into an array #
##########################################################
###########################################################
# Modify each line to meet the following specs: #
# id#work#composer#artist#conductor#orchestra#album_title #
###########################################################
for my $line (#lines) {
$line =~ s/[\n\r\t]+//g;
######################################################
# Ignore lines with num:num, lines that begin with $ #
# and emptry string lines #
# ####################################################
if ( $line =~ /[0-9]:/m ) {
next;
} elsif ( $line =~ /^\$/m ) {
next;
} else {
if ( $line =~ /^\s*$/m ) {
next;
}
}
########################################################
# If line is a number followed by a space, prepend id #
# and replace space with the # character #
########################################################
if ( $line =~ /^\d\d\s/m ) {
$id_num = $line;
$id_num =~ s/(\d\d)\s/$id$1#/g;
} else {
if ( $line =~ /^\d\s/m ) {
$id_num = $line;
$id_num =~ s/(\d)\s/$id$1#/g;
# print ("\$line after removing space: \"$line\"\n");
}
}
####################################################
# If line begins with an alphabetic character then #
# prepend id_num and append album_title #
####################################################
if ( $line =~ /Sold/m ) {
next;
}
if ( $line =~ /^[A-Z]/m ) {
################################################i##
# At this point $line exists but $id_num is empty #
# I thought $id_num would live through the next #
# line read #
###################################################
$prepend_line =~ s/($line)/$id_num$1/g;
print("$prepend_line");
$append_line =~ s/($prepend_line)/$1#Mozart###$album_title/g;
open my $ofh, '>>', $output_file or die $!;
print $ofh "$append_line\n";
close $ofh or die $!;
print("\$append_line: $append_line\n");
}
}
1;
I will fix this by capturing the value to a file. Upon the next line read I will extract the value and prepend it to the string. Thank you for looking at this. I don't know why I didn't think of this before.
Thank you;
Sherman

Perl WWW::Mechanize doesn't print results when reading input data from a data file

I am using Perl with WWW::Mechanize to retrieve the stock exchanges from Yahoo Finance, given a list of stock symbols.
The following code writes to a file
#!/usr/bin/perl
# program name: FindStockExchange.pl
use strict;
use warnings;
use WWW::Mechanize;
use Storable;
use Getopt::Long;
#cmd: clear; ./FindStockExchange.pl A AA AA.V AAA.TO -f ~/symbol_out.txt
# Find Stock Exchange for a given Stock Symbole
# Command line options:
# -s Symbol
# -f Output filename
# Initialize variables:
my $urlBase = 'http://finance.yahoo.com/q?s = '; # Before symbol
my $urlSuffix = '&ql = 0'; # After symbol
my $url = '';
my $oFile = '';
my $symbol = '';
my $c = '';
# Read command line options.
GetOptions(
'f=s' => \$oFile #Output filename
) or die "Incorrect usage!\n";
# Ouptput file(s)
open(OUTSYM, ">$oFile") || die "Couldn't open file $oFile, $!";
my $m = WWW::Mechanize->new(autocheck => 0);
foreach $symbol (#ARGV) {
$url = $urlBase . $symbol . $urlSuffix;
$m->get($url);
$c = $m->content; # Places html page source text into variable
# Text pattern: <div class="title"><h2>Electrolux AB (ELUXY)</h2> <span class="rtq_exch"><span class="rtq_dash">-</span>OTC Markets </span></div>
$c =~ m{rtq_dash\">-</span>(.*?)</span>}s or next;
print OUTSYM "$symbol\t$1\n"; # Write output file
print "$symbol\t$1\t" . "\n"; # Write to STDOUT
}
close OUTFIL;
The following code reads from an input file and creates an empty data file. The input file contained the following stock symbols:
A
AA
AA.V
AAA.TO
#!/usr/bin/perl
# program name: FindStockExchange2.pl
use strict;
use warnings;
use WWW::Mechanize;
use Storable;
use Getopt::Long;
#cmd: clear; ./FindStockExchange2.pl -i ~/symbol_in.txt -o ~/symbol_out2.txt
# Find Stock Exchange for a given Stock Symbole
# Command line options:
# -i Input filename
# -o Output filename
# Initialize variables:
my $urlBase = 'http://finance.yahoo.com/q?s='; # Before symbol
my $urlSuffix = '&ql=0'; # After symbol
my $url = '';
my $oFile = '';
my $iFile = '';
my $symbol = '';
my $c = '';
# Read command line options.
GetOptions(
'o=s' => \$oFile, #Output filename
'i=s' => \$iFile #Input filename
) or die "Incorrect usage!\n";
# File(s)
open(OUTSYM, ">$oFile") || die "Couldn't open file $oFile, $!";
open(INSYM, "<$iFile") || die "Couldn't open file $iFile, $!";
my $m = WWW::Mechanize->new(autocheck => 0);
while (<INSYM>) {
$symbol = chomp($_);
$url = $urlBase . $symbol . $urlSuffix;
$m->get($url);
$c = $m->content; # Places html page source text into variable
# Text pattern: <div class="title"><h2>Electrolux AB (ELUXY)</h2> <span class="rtq_exch"><span class="rtq_dash">-</span>OTC Markets </span></div>
$c =~ m{rtq_dash\">-</span>(.*?)</span>}s or next;
print OUTSYM "$symbol\t$1\n"; # Write output file
print "$symbol\t$1\t" . "\n"; # Write to STDOUT
}
close INSYM;
close OUTSYM;
Why would changing from a foreach loop to reading an input file using a while loop produce different results?
foreach code creates a file containing the following:
A NYSE
AA NYSE
AA.V TSXV
AAA.TO Toronto
To-Air-Is:~ vlis
But a while loop creates an empty file.
Two problems here:
1) chomp returns the number of characters removed. But you are setting $symbol to the result of chomp. It should be something like this:
chomp;
$symbol = $_;
Clarification in response to comment by #Vin
You could even do this:
$symbol = $_;
chomp($symbol);
But, you should NOT do this:
$symbol = chomp($_);
Because chomp($_) will remove the newline from $_ but it will return the number of characters removed.
2) If you are putting symbols on one line within the input file, then $symbol could end up being a string of more than one symbol. So you probably need to split those up or require every symbol to be on its own line

When comparing two files, how do I skip (ignore) blank lines?

I'm comparing line against line of two text files, ref.txt (reference) and log.txt. But there may be an arbitrary number of blank lines in either file that I'd like to ignore; how can I accomplish this?
ref.txt
one
two
three
end
log.txt
one
two
three
end
There would be no incorrect log lines in the output, in other words log.txt matches with ref.txt.
What I like to accomplish in pseudo code:
while (traversing both files at same time) {
if ($l is blank line || $r is blank line) {
if ($l is blank line)
skip to next non-blank line
if ($r is blank line)
skip to next non-blank line
}
#continue with line by line comparison...
}
My current code:
use strict;
use warnings;
my $logPath = ${ARGV [0]};
my $refLogPath = ${ARGV [1]} my $r; #ref log line
my $l; #log line
open INLOG, $logPath or die $!;
open INREF, $refLogPath or die $!;
while (defined($l = <INLOG>) and defined($r = <INREF>)) {
#code for skipping blank lines?
if ($l ne $r) {
print $l, "\n"; #Output incorrect line in log file
$boolRef = 0; #false==0
}
}
If you are on a Linux platform, use :
diff -B ref.txt log.txt
The -B option causes changes that just insert or delete blank lines to be ignored
You can skip blank lines by comparing it to this regular expression:
next if $line =~ /^\s*$/
This will match any white space or newline characters which can potentially make up a blank line.
This way seems the most "perl-like" to me. No fancy loops or anything, just slurp the files and grep out the blank lines.
use warnings;
$f1 = "path/file/1";
$f2 = "path/file/2";
open(IN1, "<$f1") or die "Cannot open file: $f1 ($!)\n";
open(IN2, "<$f2") or die "Cannot open file: $f2 ($!)\n";
chomp(#lines1 = <IN1>); # slurp the files
chomp(#lines2 = <IN2>);
#l1 = grep(!/^\s*$/,#lines1); # get the files without empty lines
#l2 = grep(!/^\s*$/,#lines2);
# something like this to print the non-matching lines
for $i (0 .. $#l1) {
print "[$f1 $i]: $l1[$i]\n[$f2 $i]: $l2[$i]\n" if($l1[$i] ne $l2[$i]);
}
You can loop to find each line, each time:
while(1) {
while(defined($l = <INLOG>) and $l eq "") {}
while(defined($r = <INREF>) and $r eq "") {}
if(!defined($l) or !defined($r)) {
break;
}
if($l ne $r) {
print $l, "\n";
$boolRef = 0;
}
}
man diff
diff -B ref.txt log.txt
# line skipping code
while (defined($l=<INLOG>) && $l =~ /^$/ ) {} # no-op loop exits with $l that has length
while (defined($r=<INREF>) && $r =~ /^$/ ) {} # no-op loop exits with $r that has length

Cleanest Perl parser for Makefile-like continuation lines

A perl script I'm writing needs to parse a file that has continuation lines like a Makefile. i.e. lines that begin with whitespace are part of the previous line.
I wrote the code below but don't feel like it is very clean or perl-ish (heck, it doesn't even use "redo"!)
There are many edge cases: EOF at odd places, single-line files, files that start or end with a blank line (or non-blank line, or continuation line), empty files. All my test cases (and code) are here: http://whatexit.org/tal/flatten.tar
Can you write cleaner, perl-ish, code that passes all my tests?
#!/usr/bin/perl -w
use strict;
sub process_file_with_continuations {
my $processref = shift #_;
my $nextline;
my $line = <ARGV>;
$line = '' unless defined $line;
chomp $line;
while (defined($nextline = <ARGV>)) {
chomp $nextline;
next if $nextline =~ /^\s*#/; # skip comments
$nextline =~ s/\s+$//g; # remove trailing whitespace
if (eof()) { # Handle EOF
$nextline =~ s/^\s+/ /;
if ($nextline =~ /^\s+/) { # indented line
&$processref($line . $nextline);
}
else {
&$processref($line);
&$processref($nextline) if $nextline ne '';
}
$line = '';
}
elsif ($nextline eq '') { # blank line
&$processref($line);
$line = '';
}
elsif ($nextline =~ /^\s+/) { # indented line
$nextline =~ s/^\s+/ /;
$line .= $nextline;
}
else { # non-indented line
&$processref($line) unless $line eq '';
$line = $nextline;
}
}
&$processref($line) unless $line eq '';
}
sub process_one_line {
my $line = shift #_;
print "$line\n";
}
process_file_with_continuations \&process_one_line;
How about slurping the whole file into memory and processing it using regular expressions. Much more 'perlish'. This passes your tests and is much smaller and neater:
#!/usr/bin/perl
use strict;
use warnings;
$/ = undef; # we want no input record separator.
my $file = <>; # slurp whole file
$file =~ s/^\n//; # Remove newline at start of file
$file =~ s/\s+\n/\n/g; # Remove trailing whitespace.
$file =~ s/\n\s*#[^\n]+//g; # Remove comments.
$file =~ s/\n\s+/ /g; # Merge continuations
# Done
print $file;
If you don't mind loading the entire file in memory, then the code below passes the tests.
It stores the lines in an array, adding each line either to the previous one (continuation) or at the end of the array (other).
#!/usr/bin/perl
use strict;
use warnings;
my #out;
while( <>)
{ chomp;
s{#.*}{}; # suppress comments
next unless( m{\S}); # skip blank lines
if( s{^\s+}{ }) # does the line start with spaces?
{ $out[-1] .= $_; } # yes, continuation, add to last line
else
{ push #out, $_; } # no, add as new line
}
$, = "\n"; # set output field separator
$\ = "\n"; # set output record separator
print #out;