#!/bin/sh -- # -*- perl -*- -w
eval 'exec perl -S $0 ${1+"$@"}'
    if 0;

# @(#)reep	18.1 10/15/98

&initialize();

&utf_filter_ref();
&utf_filter_hyp();

&csr_filter_stm();
&csr_filter_ctm();

&sclite();
&aldistsm();
&mucscorer();

&report_results();

exit(0);

############################################################

sub utf_filter_ref{
    print "$script is running utf_filt on ref...\n";
    my $command;

    $command = "perl ";
    if( $SGMLSPMDIR ) {
	$command .= "-I $SGMLSPMDIR ";
    }

    $command .= "$UTFFILT -k -p -e $DTD -o $IESTM -f iestm -i $UTFREF";

    &call_sys($command);
}

############################################################

sub utf_filter_hyp{
    print "$script is running utf_filt on hyp...\n";
    my $command;
    
    $command = "perl ";
    if( $SGMLSPMDIR ){
	$command .= "-I $SGMLSPMDIR ";
    }
    $command .= "$UTFFILT -e $DTD -o $IECTM -f iectm ".
	($TEXTMODE?"":"-w")." -i $UTFHYP";

    &call_sys($command);
}

############################################################

sub csr_filter_stm{
    print "$script is running csrfilt on ref...\n";
    my $command = 
	"cat $IESTM | " .
	"perl -pe 's/(ignore_time_segment_in_scoring)/NE_IGNORE_TIME/i' | ".
	'perl -pe '."'".
	    'if ($_ !~ /;;/) { $ln=sprintf "%05d",$.;'.
	                      's/^(\S+\s+\S+\s+)/$1 ${ln}_/;}'."'".' | '.
	"$CSRFILT -dh -i stm $GLM > " .
        "$IESTMFILT";
    &call_sys($command);
    &clean_up($IESTM);
}

############################################################

sub csr_filter_ctm{
    print "$script is running csrfilt on hyp...\n";
    my $command = 
	"cat $IECTM | " .
	"$CSRFILT -dh -i ctm $GLM  > $IECTMFILT";
    &call_sys($command);
    &clean_up($IECTM);
}

############################################################

sub sclite{
    print "$script is running sclite...\n";
    my $command = 
	"$SCLITE ".
	    "-r $IESTMFILT stm ".
		"-h $IECTMFILT ctm ".
		"-o sgml stdout > $SCLITESGML";
    &call_sys($command);

    $command = "$SGML2TALD < $SCLITESGML > ${SCLITESGML}.tald ";
    &call_sys($command);

    # Second filter removes spaces before and after SGML
    # tags.
    $command = "cat ${SCLITESGML}.tald | perl -pe \'" .
	           's:\s+</:</:g; ' .
		       's/(<(enam|time|num)[^<>]+>)\s+/$1/gi\'' .
			   "> $SCLITEOUT";
    &call_sys($command);
    &clean_up($IESTMFILT, $IECTMFILT, $SCLITESGML, "${SCLITESGML}.tald");
}

############################################################

sub aldistsm{
    print "$script is running wrapper for tald3e_sm_export ...\n";
    my $command = 
	join " ", ("$TALDWRAP",
		   "--scliteout=$SCLITEOUT",
		   "--taldin=$TALDIN",
		   "--taldout=$TALDOUT",
		   "--mucscorein=$MUCSCOREIN",
		   "--aldistsmdir=$ALDISTSMDIR");
    &call_sys($command);
    &clean_up($SCLITEOUT,$TALDIN,$TALDOUT);
}

############################################################

sub mucscorer{
    print "$script is running the muc scorer wrapper...\n";
    my $command =
	join " ", ("$MSWRAP",
		   "--alignment_filename=$MUCSCOREIN",
		   "--tagbytag=$TAGBYTAG",
		   "--scores=$SCORES",
		   "--muc_scorer_ref=$MUCSCOREREF",
		   "--muc_scorer_hyp=$MUCSCOREHYP",
		   "--muc_scorer_config=$MUCSCORECFG",
		   "--muc_scorer_report=$MUCSCORERPT",
		   "--muc_scorer_scores=$MUCSCORESCR",
		   "--mucscorer=$MUCSCORER");
    if( $NOPRETTYREPORT){
	$command .= " --no_pretty_report";
    }
    if( $MUCMETRIC ) {
	$command .= " --mucmetric";
    }
    &call_sys($command);
    &clean_up($MUCSCOREREF,
	      $MUCSCOREHYP,
	      $MUCSCORECFG,
	      $MUCSCORERPT,
	      $MUCSCORESCR,
	      $MUCSCOREIN);
}

########################################

sub initialize{
    &set_global_variables();
    &get_commandline_options();
    &check_global_variables();
}

########################################

sub set_global_variables{
    $script = (reverse(split(/\//, $0)))[0];

    if(not exists($ENV{'IEEVAL'})
       or
       not defined($ENV{'IEEVAL'})
       or
       not -d $ENV{'IEEVAL'}){
	print STDERR "'IEEVAL' environment variable \n".
	    "should be defined to point to the top of \n".
            "the ieeval directory tree.\n";
	exit(1);
    } 
    # make sure IEEVAL doesn't have a "/" on the end
    $ENV{'IEEVAL'} =~ s:/$::;

    # software in the pipeline:
    $UTFFILT = $ENV{'IEEVAL'}   . "/scripts/utf_filt";
    $CSRFILT = $ENV{'IEEVAL'}   . "/software/tranfilt/csrfilt.sh";
    $SCLITE = $ENV{'IEEVAL'}    . "/software/sctk/src/sclite";
    $TALDWRAP = $ENV{'IEEVAL'}  . "/scripts/taldwrap";
    $ALDISTSMDIR = $ENV{'IEEVAL'}  . "/software/aldistsm";
    $MSWRAP = $ENV{'IEEVAL'}    . "/scripts/mswrap";
    $MUCSCORER = $ENV{'IEEVAL'} . "/software/MUC_scorer/src/MUC_scorer";
    $SGML2TALD = $ENV{'IEEVAL'} . "/scripts/sgml2tald";

    # required software that we're not packaging
    # The SGMLS Perl module - check for SGMLSPMDIR env var
    if( not exists($ENV{'SGMLSPMDIR'}) ){
	$SGMLSPMDIR = "";
    } else {
	$SGMLSPMDIR = $ENV{'SGMLSPMDIR'};
    }

    # configuration data files
    $GLM = $ENV{'IEEVAL'}       . "/refdata/en.glm.ne";
    $DTD = $ENV{'IEEVAL'}       . "/refdata/utf.dtd";

    # command line files
    $UTFREF = "";
    $UTFHYP = "";
    $OUTPUTDIR = "";
    $NOCLEANUP = 0;
    $TEXTMODE = 0;
    $NOPRETTYREPORT = 0;
    $MUCMETRIC = "0";
}

########################################

sub get_commandline_options{
    use Getopt::Long;
    my $pid = $$;

    &GetOptions(
		# required
		"ref=s"     => \$UTFREF,
		"hyp=s"     => \$UTFHYP,
		"outdir=s"     => \$OUTPUTDIR,

		# alternative component locations
		"glm:s"           => \$GLM,
		"dtd:s"           => \$DTD,
		"utffilt:s"       => \$UTFFILT,
		"csrfilt:s"       => \$CSRFILT,
		"sclite:s"        => \$SCLITE,
		"aldistsmdir:s"   => \$ALDISTSMDIR,
		"mucscorer:s"     => \$MUCSCORER,
		"sgml2tald:s"     => \$SGML2TALD,

		# whether or not to remove intermediate files
		"nocleanup"        =>  \$NOCLEANUP,
		# whether to make a human-readable tag-by-tag
		"no_pretty_report" => \$NOPRETTYREPORT,
		# the type of scoring; 'muc' or 'hub' (default both)
		"mucmetric"        => \$MUCMETRIC,
		# XX
		"text_mode"        =>  \$TEXTMODE
		);
    
    # strip quotes from values
    map {s/^\"(.*)\"$/$1/ or s/^\'(.*)\'$/$1/} # "
    ( 
     $UTFREF,
     $UTFHYP,
     $OUTPUTDIR,
     
     $GLM,
     $DTD,
     $UTFFILT,
     $CSRFILT,
     $SCLITE,
     $ALDISTSMDIR,
     $MUCSCORER,
     $SGML2TALD
     );

    # trim "/" from $OUTPUTDIR
    $OUTPUTDIR =~ s:/$::;
    # make full output filenames
    $IESTM       = $OUTPUTDIR . "/" . "iestm.$pid";
    $IECTM       = $OUTPUTDIR . "/" . "iectm.$pid";
    $IESTMFILT   = $OUTPUTDIR . "/" . "iestmfilt.$pid";
    $IECTMFILT   = $OUTPUTDIR . "/" . "iectmfilt.$pid";
    $SCLITESGML  = $OUTPUTDIR . "/" . "sclitesgml.$pid";
    $SCLITEOUT   = $OUTPUTDIR . "/" . "scliteout.$pid";
    $TALDIN      = $OUTPUTDIR . "/" . "taldin.$pid";
    $TALDOUT     = $OUTPUTDIR . "/" . "taldout.$pid";
    $MUCSCOREIN  = $OUTPUTDIR . "/" . "mucscorein.$pid";
    $MUCSCOREREF = $OUTPUTDIR . "/" . "mucscore.ref.$pid";
    $MUCSCOREHYP = $OUTPUTDIR . "/" . "mucscore.hyp.$pid";
    $MUCSCORECFG = $OUTPUTDIR . "/" . "mucscore.cfg.$pid";
    $MUCSCORERPT = $OUTPUTDIR . "/" . "mucscore.rpt.$pid";
    $MUCSCORESCR = $OUTPUTDIR . "/" . "mucscore.scr.$pid";
    $TAGBYTAG    = $OUTPUTDIR . "/" . "tagbytag.$pid";
    $SCORES      = $OUTPUTDIR . "/" . "scores.$pid";
    # something created by csrfilt
}

##############################

sub check_global_variables{
    my $var;
    my $hadproblems = 0;

    # check non-executable files
    foreach $var ($UTFREF,
		  $UTFHYP,
		  $GLM,
		  $DTD){
	if( ! $var ){
	    &print_usage();
	    exit(1);
	}
	if( ! -r $var ){
	    print STDERR "$script: file '$var' cannot be read\n";
	    $hadproblems = 1;
	}
    }

    # check directories
    foreach $var ($OUTPUTDIR,
		  $ALDISTSMDIR){
	if((! -d $var) && (! -l $var)){
	    print STDERR "$var is neither a directory or a symbolic link\n";
	    $hadproblems = 1;
	}
    }

    # check executables
    foreach $var ($UTFFILT,
		  $CSRFILT,
		  $SCLITE,
		  $TALDWRAP,
		  $MSWRAP,
		  $MUCSCORER,
		  "$ALDISTSMDIR/tald3e_sm_export",
		  $SGML2TALD
		  ){
	if(! -x $var){
	    print STDERR "$var cannot be executed\n";
	    $hadproblems = 1;
	}
    }
    if( $hadproblems ){
	exit(1);
    }
}

##############################

sub call_sys{
    my ($command) = @_;
    my $sysstatus;

    if($sysstatus = system( $command ) ) {
	print "command '$command' failed\n";
	print "result code was: $sysstatus\n";
	print "error message was: '$!'\n";
	exit(1);
    }
}



sub clean_up{
    my @fnames = @_;
    my $fname;
    if( ! $NOCLEANUP ){
	foreach $fname (@fnames){
	    system "rm -f $fname";
	}
    }
}

sub print_usage{
    print <<__;
usage: $script <OPTIONS>

  where <OPTIONS> are:

    --ref    <reference utf file>
    --hyp    <hypothesis utf file>
    --outdir <output-directory>
    [--glm   <glm-file>]
    [--dtd   <utf dtd>]
    [--utffilt <utf_filt program>]	
    [--csrfilt <csrfilt program>]
    [--sclite  <sclite program>]
    [--aldistsmdir <directory containing tald3e_sm_export program>]
    [--mucscorer   <MUC_scorer program>]
    [--nocleanup]
    [--no_pretty_report]	
    [--mucmetric]    
    [--sgml2tald  <sclite output conversion program>]
    [--text_mode]
__
}

sub report_results{
    print "\n";
    print "Tag-by-tag comparison report is in $TAGBYTAG\n";
    print "Score report is in $SCORES\n";
    print "$script complete.\n";
    print "\n";
}
