I'm trying to unzip all files in a directory using IO::Compress, but my script is failing with no error details. I used IO::Uncompress::Unzip as a reference, and it seems pretty simple, but it just dies with:
root#test:/home/user# ./unzip.pl
Its there.
unzip failed:
my $outputdir = "/tmp";
if ( <$outputdir/*.zip> ){
print "Its there.\n";
unzip '<$outputdir/*.zip>' => '<$outputdir/#1>'
or die "unzip failed: $UnzipError\n";
}
What am I doing wrong?
It took me a while to figure out the code and syntax. Basically, the syntax is:
Open the zip file.
While you read in the next file stream (nextStream).
Find the name of the file stream you're reading.
Create a new file to write to (using open or File::IO->new)
While there is data in the file stream (read)
Write to the new file's buffer.
Close the file you created.
Close the zip file.
The trick is that the two while statements will return a status of less than zero (specifically a -1) if there is a problem with the reading. They return a status of zero when they are finished. Thus, right after the while statement, you've got to check the status.
This is the code I used. Notice I don't import $UnzipError, but instead, I use the full name of the variable including it's package name.
#
# Unzip Artifact
#
my $zip_fh = IO::Uncompress::Unzip->new( $old_zip_file )
or die qq(Cannot open zip "$old_zip_file" for reading.);
#
# Go through each element in Zip file
#
while ( my $status = $zip_fh->nextStream ) {
if ( $status < 0 ) {
die qq(Error in Zip: $IO::Uncompress::Unzip::UnzipError.);
}
#
# Get name of the file you're unzipping in the zip
#
my $element_name = $zip_fh->getHeaderInfo->{Name};
next if $element_name =~ m{/$}; # Skip Directories
my $element_dir = dirname $element_name;
my $full_element_dir = File::Spec->join( $unzip_directory, $element_dir );
#
# Create the directory for the file if it doesn't exist
#
my $full_element_name = File::Spec->join( $unzip_directory, $element_name );
if ( not -d $full_element_dir ) {
make_path $full_element_dir
or die qq(Can't make directory "$full_element_dir".);
}
my $unzipped_fh = IO::File->new( $full_element_name, "w" )
or die qq(Can't open file "$full_element_name" for writing: $!);
#
# Now repeatably read the file until you've written everything
#
my $buffer;
while ( my $status = $zip_fh->read( $buffer ) ) {
if ( $status < 0 ) {
die qq(Error in Zip: $IO::Uncompress::Unzip::UnzipError.);
}
$unzipped_fh->write( $buffer );
}
$unzipped_fh->close;
}
$zip_fh->close;
You can try below
use strict ;
use warnings ;
use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
for my $input ( glob "/tmp/*.zip" )
{
my $output = $input;
$output =~ s/.zip// ;
unzip $input => $output or die "Error compressing '$input': $UnzipError\n";
}
I believe the fix that was suggested by David W. 82.2k25154274 (see below) had a small bug.
Great example and I copied it, but it always ignored one file!
Fix is to initialize $status to 1 and only call next at end of loop.
Fix:
#
# Unzip Artifact
#
my $zip_fh = IO::Uncompress::Unzip->new( $old_zip_file )
or die qq(Cannot open zip "$old_zip_file" for reading.);
#
# Go through each element in Zip file
#
my $status = 1;
while ( $status ) {
if ( $status < 0 ) {
die qq(Error in Zip: $IO::Uncompress::Unzip::UnzipError.);
}
#
# Get name of the file you're unzipping in the zip
#
my $element_name = $zip_fh->getHeaderInfo->{Name};
next if $element_name =~ m{/$}; # Skip Directories
my $element_dir = dirname $element_name;
my $full_element_dir = File::Spec->join( $unzip_directory, $element_dir );
#
# Create the directory for the file if it doesn't exist
#
my $full_element_name = File::Spec->join( $unzip_directory, $element_name );
if ( not -d $full_element_dir ) {
make_path $full_element_dir
or die qq(Can't make directory "$full_element_dir".);
}
my $unzipped_fh = IO::File->new( $full_element_name, "w" )
or die qq(Can't open file "$full_element_name" for writing: $!);
#
# Now repeatably read the file until you've written everything
#
my $buffer;
while ( my $status = $zip_fh->read( $buffer ) ) {
if ( $status < 0 ) {
die qq(Error in Zip: $IO::Uncompress::Unzip::UnzipError.);
}
$unzipped_fh->write( $buffer );
}
$unzipped_fh->close;
$status = $zip_fh->nextStream; # Getting next file if any... or 0 to quit loop...
}
$zip_fh->close;
Related
I want to modify this code
#!/usr/bin/perl -w
# Call of CPAN
use warnings;
use strict;
use Cwd;
# Variables
my $i = 0;
my $directory = getcwd;
my $file = "options";
# Opening output file and adding the header on first row
open( FILE, ">>OLTP.txt" ) or die( "Could not create file OLTP.txt" );
print FILE "User script,Serveur Name,Instance Name,Date of script,Serveur Name2,Instance Name2,ADVANCED_COMPRESSION~HEADER,TABLE_COMPRESSION~HEADER,2,count,DATA_DICTIONARY_VIEW,TABLE_OWNER,TABLE_NAME,PARTITION_NAME,COMPRESSION,COMPRESS_FOR,\n";
close FILE;
# Loop while files are found
foreach my $files ( list_files( $directory, 1, $file ) ) {
print "File : $files\n";
singlefile( $files, $file );
}
# Recursion and list integration
sub list_files {
my ( $directory, $recurse, $file ) = #_;
require File::Spec;
# Search in subdirectory or not
if ( ( not defined $recurse ) || ( $recurse != 1 ) ) {
$recurse = 0;
}
# verification directory
if ( not defined $directory ) {
die "No named directory\n";
}
# Opening a directory
opendir my $fh_rep, $directory or die "Can not open directory $directory\n";
# List files and directories, except (. and ..)
my #fic_rep = grep { !/^\.\.?$/ } readdir $fh_rep;
# Closing directory
closedir $fh_rep or die "Unable to close directory $directory\n";
# Fill list with found files
my #files;
# File or folder? if file: add files to the table. if record: start the recursion
foreach my $nom ( #fic_rep ) {
my $mycurrentfile = File::Spec->catdir( $directory, $nom );
if ( -f $mycurrentfile
and $mycurrentfile =~ m/$file/
and $mycurrentfile =~ m/\.csv$/i ) {
push( #files, $mycurrentfile );
}
elsif ( -d $mycurrentfile and $recurse == 1 ) {
push( #files, list_files( $mycurrentfile, $recurse, $file ) ); # recursion
}
}
return #files;
}
## Merge data after filtering
sub singlefile {
my ( $file, $out ) = #_;
# Open file
open( FILE, $file ) or die( "error occured while opening file" );
# Create list from file
my #save = <FILE>;
close( FILE );
# Empty table rows which do not meet criteria
foreach ( #save ) {
$_ = "" unless ( $_ =~ m/"ENABLED","OLTP",/ && $_ =~ m/^GREP/ );
$_ = "" if ( $_ =~ m/SYSMAN/ || m/SYS/ );
chomp $_;
}
# Open output file, add data, close
open( FILE, ">>OLTP.txt" ) or die( "error occured while opening file OLTP.txt" );
foreach ( #save ) {
print FILE $_ . "\n" if ( $_ );
}
close( FILE );
}
It seems to do the following
Create txt with header
Create list of files based on every files that match with criteria (option, csv)
For each file in the list, fill with all rows and then remove what do not match with the criteria unless and if
Push everything into the file with header (oltp.txt)
My Goal :
Create txt with header
Create list of file based on every files that match with criteria (option, csv)
For each files of the list, fill only with the First rows that match with the criteria unless and if
Push everything into the file with header oltp.txt. The final result should be the text with header and then a maximum of one line per file if the criteria match.
I want to a add a code in the next script to eliminate those empty output files.
The script convert a single fastq file or all the fastq files in a folder to fasta format, all the output fasta files keep the same name of the fastq file; the script present an option to exclude all the sequences that present a determinate number of NNN repeats (NNNNNNNNNNNNNNNNNNATAGTGAAGAATGCGACGTACAGGATCATCTA), I added this option because some sequences present only NNNNN in the sequences, example: if the -n option is equal to 15 (-n 15) it will exclude all the sequences that present 15 o more N repeats, to this point the code works well, but it generate an empty files (in those fastq files that all the sequences present 15 or more N repeats are excluded). I want to eliminate all the empty files (without sequences) and add a count of how many files were eliminate because it were empty.
Code:
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Long;
my ($infile, $file_name, $file_format, $N_repeat, $help, $help_descp,
$options, $options_descrp, $nofile, $new_file, $count);
my $fastq_extension = "\\.fastq";
GetOptions (
'in=s' => \$infile,
'N|n=i' =>\$N_repeat,
'h|help' =>\$help,
'op' =>\$options
);
# Help
$help_descp =(qq(
Ussaje:
fastQF -in fastq_folder/ -n 15
or
fastQF -in file.fastq -n 15
));
$options_descrp =(qq(
-in infile.fastq or fastq_folder/ required
-n exclude sequences with more than N repeat optional
-h Help description optional
-op option section optional
));
$nofile =(qq(
ERROR: "No File or Folder Were Chosen !"
Usage:
fastQF -in folder/
Or See -help or -op section
));
# Check Files
if ($help){
print "$help_descp\n";
exit;
}
elsif ($options){
print "$options_descrp\n";
exit;
}
elsif (!$infile){
print "$nofile\n";
exit;
}
#Subroutine to convert from fastq to fasta
sub fastq_fasta {
my $file = shift;
($file_name = $file) =~ s/(.*)$fastq_extension.*/$1/;
# eliminate old files
my $oldfiles= $file_name.".fasta";
if ($oldfiles){
unlink $oldfiles;
}
open LINE, '<', $file or die "can't read or open $file\n";
open OUTFILE, '>>', "$file_name.fasta" or die "can't write $file_name\n";
while (
defined(my $head = <LINE>) &&
defined(my $seq = <LINE>) &&
defined(my $qhead = <LINE>) &&
defined(my $quality = <LINE>)
) {
substr($head, 0, 1, '>');
if (!$N_repeat){
print OUTFILE $head, $seq;
}
elsif ($N_repeat){
my $number_n=$N_repeat-1;
if ($seq=~ m/(n)\1{$number_n}/ig){
next;
}
else{
print OUTFILE $head, $seq;
}
}
}
close OUTFILE;
close LINE;
}
# execute the subrutine to extract the sequences
if (-f $infile) { # -f es para folder !!
fastq_fasta($infile);
}
else {
foreach my $file (glob("$infile/*.fastq")) {
fastq_fasta($file);
}
}
exit;
I have tried to use the next code outside of the subroutine (before exit) but it just work for the last file :
$new_file =$file_name.".fasta";
foreach ($new_file){
if (-z $new_file){
$count++;
if ($count==1){
print "\n\"The choosen File present not sequences\"\n";
print " \"or was excluded due to -n $N_repeat\"\n\n";
}
elsif ($count >=1){
print "\n\"$count Files present not sequences\"\n";
print " \" or were excluded due to -n $N_repeat\"\n\n";
}
unlink $new_file;
}
}
and I just have tried something similar inside of the subroutine but this last code donĀ“t work !!!!
Any Advise !!!!???
Thanks So Much !!!
you should check, if something was written to your new file at the end of our fastq_fasta subroutine. Just put your code after the close OUTFILE statement:
close OUTFILE;
close LINE;
my $outfile = $file_name.".fasta";
if (-z $outfile)
{
unlink $outfile || die "Error while deleting '$outfile': $!";
}
Additionally, it will be better to add the die/warn statement also to the other unlink line. Empty files should be deleted.
Maybe another solution if you are not fixed to perl, but allowed to use sed and a bash loop:
for i in *.fastq
do
out=$(dirname "$i")/$(basename "$i" .fastq).fasta
sed -n '1~4{s/^#/>/;N;p}' "$i" > "$out"
if [ -z $out ]
then
echo "Empty output file $out"
rm "$out"
fi
done
Hope that helps!
Best Frank
The easiest thing to do is probably to add a counter to your subroutine to keep track of the number of sequences in the outfile:
sub fastq_fasta {
my $counter1 = 0;
my $file = shift;
($file_name = $file) =~ s/(.*)$fastq_extension.*/$1/;
# eliminate old files
my $oldfiles= $file_name.".fasta";
if ($oldfiles){
unlink $oldfiles;
}
open LINE, '<', $file or die "can't read or open $file\n";
open OUTFILE, '>>', "$file_name.fasta" or die "can't write $file_name\n";
while (
defined(my $head = <LINE>) &&
defined(my $seq = <LINE>) &&
defined(my $qhead = <LINE>) &&
defined(my $quality = <LINE>)
) {
$counter1 ++;
substr($head, 0, 1, '>');
if (!$N_repeat){
print OUTFILE $head, $seq;
}
elsif ($N_repeat){
my $number_n=$N_repeat-1;
if ($seq=~ m/(n)\1{$number_n}/ig){
$counter1 --;
next;
}
else{
print OUTFILE $head, $seq;
}
}
}
close OUTFILE;
close LINE;
return $counter1;
}
You can then delete files when the returned count is zero:
if (-f $infile) { # -f es para folder !!
fastq_fasta($infile);
}
else {
foreach my $file (glob("$infile/*.fastq")) {
if (fastq_fasta($file) == 0) {
$file =~ s/(.*)$fastq_extension.*/$1.fasta/;
unlink $file;
}
}
}
I am trying to copy zipped files which are stored in one directory to another folder by matching there names.
Below image are the folder names which is the input for my program.
From this list i am taking only "Sample_51" folder , inside this folder i have Zipped files like below,
Now i need to copy all "R1" name files to R1 folder and all "R2" named files to R2 folder.
Below i have tried to do so , help me out to do as explained above,
#! use/bin/perl
print "Enter Folder name \n";
print "File name: ";
chomp( my $Filename = <> );
system("mkdir R1");
system("mkdir R2");
opendir( DIR, "$Filename" ) or die "cannot open directory";
foreach my $name ( readdir(DIR) ) {
next if ( $name =~ /^\./ );
next if ( $name =~ /^\.\./ );
if ( $Filename =~ /\S+(R\d)/ ) {
system("cp -r $Filename R1");
}
else {
system("cp -r $Filename R2");
}
}
After copying this one more step is there, than i will update after fixing this issue,
thanks in advance
Use File::Copy to copy the files. As #sobrique said $Filename =~ /\S+(R\d) it matches any digit followed by R.But I guess you are intersted in R1 and R2 only. Try this:
#!/usr/bin/perl
use File::Copy;
use strict;
use warnings;
print"Enter Folder name \n";
print"File name: ";
chomp(my $Filename=<>);
mkdir "R1";
mkdir "R2";
opendir(DIR,"$Filename") or die "cannot open directory";
foreach my $name (readdir(DIR))
{
next if ($name =~ /^\./);
if($name =~ /R1/) { #compare $name not $Filename
copy("$Filename/$name", "R1"); # copy the file from folder to R1 directory
}
elsif($name =~ /R2/){
copy("$Filename/$name","R2"); ## copy the file from folder to R2 directory
}
}
I think your problem is this line:
if ( $Filename =~ /\S+(R\d)/ ) {
Because what it's matching is R followed by any digit. So will be true for files called R1, R2, R9 or indeed R99999
You need to capture the result of that regular expression match. (By the way - an unanchored \S+ isn't going to do much in this example either. That just means 'one or more non spaces' before the R.
How about:
if ( my ( $R_Num ) = ( $Filename =~ /_(R\d+)_/ ) {
print "R number is $R_Num\n";
}
Because then you can work based on what that's set to (and if the match fails, the if will test false).
I am still working on cleaning up Oracle files, having to replace strings in files where the Oracle schema name is prepended to the function/procedure/package name within the file, as well as when the function/procedure/package name is double-quoted. Once the definition is corrected, I write the correction back to the file, along with the rest of the actual code.
I have code written to replace simple declarations (no input/output parameters) Now I am trying to get my regex to operate on (Note: This post is a continuation from this question) Some examples of what I'm trying to clean up:
Replace:
CREATE OR REPLACE FUNCTION "TRON2000"."DC_F_DUMP_CSV_MMA" (
p_trailing_separator IN BOOLEAN DEFAULT FALSE,
p_max_linesize IN NUMBER DEFAULT 32000,
p_mode IN VARCHAR2 DEFAULT 'w'
)
RETURN NUMBER
IS
to
CREATE OR REPLACE FUNCTION DC_F_DUMP_CSV_MMA (
p_trailing_separator IN BOOLEAN DEFAULT FALSE,
p_max_linesize IN NUMBER DEFAULT 32000,
p_mode IN VARCHAR2 DEFAULT 'w'
)
RETURN NUMBER
IS
I have been trying to use the following regex to separate the declaration, for later reconstruction after I've cleaned out the schema name / fixed the name of the function/procedure/package to not be double-quoted. I am struggling with getting each into a buffer - here's my latest attempt to grab all the middle input/output into it's own buffer:
\b(CREATE\sOR\sREPLACE\s(PACKAGE|PACKAGE\sBODY|PROCEDURE|FUNCTION))(?:\W+\w+){1,100}?\W+(RETURN)\s*(\W+\w+)\s(AS|IS)\b
Any / all help is GREATLY appreciated!
This is the script that I'm using right now to evaluate / write the corrected files:
#!/usr/bin/perl
use strict;
use warnings;
use File::Find;
use Data::Dumper;
# utility to clean strings
sub trim($) {
my $string = shift;
$string = "" if !defined($string);
$string =~ s/^\s+//;
$string =~ s/\s+$//;
# aggressive removal of blank lines
$string =~ s/\n+/\n/g;
return $string;
}
sub cleanup_packages {
my $file = shift;
my $tmp = $file . ".tmp";
my $package_name;
open( OLD, "< $file" ) or die "open $file: $!";
open( NEW, "> $tmp" ) or die "open $tmp: $!";
while ( my $line = <OLD> ) {
# look for the first line of the file to contain a CREATE OR REPLACE STATEMENT
if ( $line =~
m/^(CREATE\sOR\sREPLACE)\s*(PACKAGE|PACKAGE\sBODY)?\s(.+)\s(AS|IS)?/i
)
{
# look ahead to next line, in case the AS/IS is next
my $nextline = <OLD>;
# from the above IF clause, the package name is in buffer 3
$package_name = $3;
# if the package name and the AS/IS is on the same line, and
# the package name is quoted/prepended by the TRON2000 schema name
if ( $package_name =~ m/"TRON2000"\."(\w+)"(\s*|\S*)(AS|IS)/i ) {
# grab just the name and the AS/IS parts
$package_name =~ s/"TRON2000"\."(\w+)"(\s*|\S*)(AS|IS)/$1 $2/i;
trim($package_name);
}
elsif ( ( $package_name =~ m/"TRON2000"\."(\w+)"/i )
&& ( $nextline =~ m/(AS|IS)/ ) )
{
# if the AS/IS was on the next line from the name, put them together on one line
$package_name =~ s/"TRON2000"\."(\w+)"(\s*|\S*)/$1/i;
$package_name = trim($package_name) . ' ' . trim($nextline);
trim($package_name); # remove trailing carriage return
}
# now put the line back together
$line =~
s/^(CREATE\sOR\sREPLACE)\s*(PACKAGE|PACKAGE\sBODY|FUNCTION|PROCEDURE)?\s(.+)\s(AS|IS)?/$1 $2 $package_name/ig;
# and print it to the file
print NEW "$line\n";
}
else {
# just a normal line - print it to the temp file
print NEW $line or die "print $tmp: $!";
}
}
# close up the files
close(OLD) or die "close $file: $!";
close(NEW) or die "close $tmp: $!";
# rename the temp file as the original file name
unlink($file) or die "unlink $file: $!";
rename( $tmp, $file ) or die "can't rename $tmp to $file: $!";
}
# find and clean up oracle files
sub eachFile {
my $ext;
my $filename = $_;
my $fullpath = $File::Find::name;
if ( -f $filename ) {
($ext) = $filename =~ /(\.[^.]+)$/;
}
else {
# ignore non files
return;
}
if ( $ext =~ /(\.spp|\.sps|\.spb|\.sf|\.sp)/i ) {
print "package: $filename\n";
cleanup_packages($fullpath);
}
else {
print "$filename not specified for processing!\n";
}
}
MAIN:
{
my ( #files, $file );
my $dir = 'C:/1_atest';
# grab all the files for cleanup
find( \&eachFile, "$dir/" );
#open and evaluate each
foreach $file (#files)
{
# skip . and ..
next if ( $file =~ /^\.$/ );
next if ( $file =~ /^\.\.$/ );
cleanup_file($file);
};
}
Assuming the entire content of a file is stored as scalar in a var, the following should do the trick.
$Str = '
CREATE OR REPLACE FUNCTION "TRON2000"."DC_F_DUMP_CSV_MMA" (
p_trailing_separator IN BOOLEAN DEFAULT FALSE,
p_max_linesize IN NUMBER DEFAULT 32000,
p_mode IN VARCHAR2 DEFAULT w
)
RETURN NUMBER
IS
CREATE OR REPLACE FUNCTION "TRON2000"."DC_F_DUMP_CSV_MMA" (
p_trailing_separator IN BOOLEAN DEFAULT FALSE,
p_max_linesize IN NUMBER DEFAULT 32000,
p_mode IN VARCHAR2 DEFAULT w
)
RETURN NUMBER
IS
';
$Str =~ s#^(create\s+(?:or\s+replace\s+)?\w+\s+)"[^"]+"."([^"]+)"#$1 $2#mig;
print $Str;
I'm trying to use Archive::Zip to zip a directory but the resulting zip I get is empty.
What am I doing wrong?
my ($inDirectory, $outFile) = #_;
# Create a Zip file
my $zip = Archive::Zip->new();
# Add a directory
my $dir_member = $zip->addDirectory($inDirectory. "/");
# Save the Zip file
unless ( $zip->writeToFileNamed($outFile) == AZ_OK ) {
die 'Could not zip file';
}
Got it working , I had to use $zip->addTree
Maybe your directory $inDirectory ... is not a directory as expected and I'm not sure but it looks like you don't need the end slash for the dirname :
print $inDirectory::Find::name;
if ( -d $inDirectory::Find::name ) { # just grab directories, not files.
print "adding a dir\n";
$zip->addDirectory($inDirectory::Find::name);
} else {
#zip files
print "adding a file\n";
$zip->addFile($inDirectory::Find::name) != AZ_OK || print "couldn't add file \n";
}