#! /usr/local/bin/perl
=head1 DESCRIPTION
This script will attempt to parse all cif files in
a given directory and save the parsed binaries.
=head1 USAGE
parseMulti.pl [-i -r -s -o -f -l -c]
Options:
-i input directory
-r recursively search all subdirectories
-o output directory
-f filter through dictionary
-l log file
-c compress the binaries
-s size limit: skip files that are greater than MB uncompressed
Comments:
-i defaults to working directory if omitted
-o defaults to working directory if omitted
-l defaults to cifParse.log if omitted
=cut
use STAR::Parser;
use STAR::Filter;
use strict;
use Getopt::Std;
use vars qw( $opt_i $opt_r $opt_o $opt_f $opt_l $opt_c $opt_s );
getopt('iofls');
$opt_i or $opt_i = ".";
$opt_o or $opt_o = ".";
$opt_l or $opt_l = "cifParse.log";
my $compress = "/bin/compress -f";
my $uncompress = "/bin/uncompress -f";
my @tmp; # temporary file list (find command output)
my @files; # file list
my $file; # one file
my $uncompressed; # uncompressed file
my $status; # status of system call
my $id; # pdbid
my $parse_opt; # parse options
my $data; # parsed data object
my $filtered; # filtered data object
my $dict; # dictionary
my $date; # date and time
my $size; # size limit for files (uncompressed, in MB)
my $pwd; # working directory
if ( -e "temp.cif.Z" or -e "temp.cif" ) {
die "Please remove file(s) temp.cif* from working directory";
}
$pwd = `pwd`;
# open log file
#
open (LOG, ">$opt_l");
print LOG "Working directory: $pwd";
print LOG "Directory of cif files: $opt_i\n";
print LOG "Subdirectories included? ", $opt_r?"yes":"no","\n";
print LOG "Size limit for uncompressed files? ", $opt_s?"$opt_s MB":"none", "\n";
print LOG "Dictionary used for filtering: ", $opt_f?"$opt_f":"none","\n";
print LOG "\n";
# open dictionary
#
if ( $opt_f ) {
$opt_f =~ /\.cob/ or die "Dictionary must be a binary (.cob file)";
$dict = STAR::Dictionary->new( $opt_f );
}
# assemble file list
#
if ( $opt_r ) {
@tmp = `find $opt_i -name "*.cif" -print`;
@tmp = ( @tmp, `find $opt_i -name "*.cif.Z" -print` );
}
else {
@tmp = `ls -1 $opt_i/*.cif $opt_i/*.cif.Z`;
}
foreach ( @tmp ) {
/^(.*\.cif[\.Z]*)/;
push @files, $1;
}
$date = `date`;
print LOG "Started parsing: $date";
# process all files
#
foreach $file ( sort @files ) {
$file =~ /(....)\.cif/;
$id = $1;
if ( $file =~ /^(.*)\.Z/ ) {
$uncompressed = $1;
eval{ system( "cp -f $file temp.cif.Z; $uncompress temp.cif.Z" ); };
if ( ! $@ ) {
&parse( "temp.cif", $file );
}
else {
print LOG "Could not uncompress $file\n";
}
}
else {
&parse( $file, $file );
}
}
eval{ system( "/bin/rm -f temp.cif" ); };
$date = `date`;
print LOG "Finished parsing: $date";
close LOG;
exit(0);
sub parse {
if ( $opt_s ) {
$size = -s "$_[0]";
if ( $size > ( $opt_s * 1048576 ) ) {
print LOG "File $_[1] ($size bytes uncompressed) exceeds $opt_s MB size limit\n";
return;
}
}
eval { ( $data ) = STAR::Parser->parse(-file=>$_[0]); };
if ( $@ ) {
print LOG "Could not parse $_[1]\n";
return;
}
else {
print LOG "Parsed $_[1]\n";
if ( $opt_f ) {
eval { $filtered = STAR::Filter->filter_through_dict(-data=>$data, -dict=>$dict); };
if ( $@ ) {
print LOG "Could not filter $_[0]\n";
return;
}
else {
$filtered->store( "$opt_o/$id.cob" );
}
}
else {
$data->store( "$opt_o/$id.cob" );
}
}
if ( $opt_c ) {
eval{ system( "$compress $opt_o/$id.cob" ); };
if ( $@ ) {
print LOG "Could not compress $opt_o/$id.cob\n";
}
}
return;
}