clear
LocalAnInputFolder='/home/cloudera/BioCTS_Hadoop_Complete/Input_Files_To_Test'
LocalAnBase64Folder='/home/cloudera/BioCTS_Hadoop_Complete/Input_Files_To_Test/base64'
LocalSequenceFileJavaFolder='/home/cloudera/BioCTS_Hadoop_Complete/SequenceFileGenerator'
LocalSequenceFileJavaClassFolder='/home/cloudera/BioCTS_Hadoop_Complete/SequenceFileGenerator/SmallFilesToSequenceFileConverter_classes'
LocalStreamingResultsFolder='/home/cloudera/BioCTS_Hadoop_Complete/Output_Files_From_Hadoop'
LocalBioCTSSourceFolder='/home/cloudera/BioCTS_Hadoop_Complete/BioCTS_Hadoop_Map'

HadoopFolder='/user/cloudera'
HadoopBase64Folder='/an/base64'
HadoopSequenceFileFolder='/an/sequencefile'
HadoopStreamingResults='/an/StreamingResults'

echo ""
echo "*******************************************************************************"
echo "*                        BioCTS Complete Hadoop Script                        *"
echo "*******************************************************************************"
echo ""

if [ ! -d "$LocalAnInputFolder" ]
then
    echo ""
    echo "*******************************************************************************"
    echo "Creating ANSI/NIST-ITL Input Folder"
    echo "-- Need to add sample files to it"
    echo "*******************************************************************************"
    mkdir $LocalAnInputFolder
    echo ""
fi


if [ ! -d "$LocalAnBase64Folder" ]
then
    echo ""
    echo "*******************************************************************************"
    echo "Creating Base64 folder"
    echo "-- This folder will be populated with base64 encoded files"
    echo "*******************************************************************************"
    mkdir $LocalAnBase64Folder
    echo ""
fi



echo ""
echo "*******************************************************************************"
echo "Removing Previous Base64 Files"
echo "*******************************************************************************"
rm -f $LocalAnBase64Folder/*.base64
echo ""

echo ""
echo "*******************************************************************************"
echo "Converting ANSI/NIST-ITL Input files to base64 versions"
echo "*******************************************************************************"
find $LocalAnInputFolder -name "*.an2" -exec bash -c "base64 {} > {}.base64" \;
echo ""

echo ""
echo "*******************************************************************************"
echo "Moving Base64 Files to Base64 Folder"
echo "*******************************************************************************"
cp $LocalAnInputFolder/*.base64 $LocalAnBase64Folder/
echo ""

echo ""
echo "*******************************************************************************"
echo "Removing Base64 Files from ANSI/NIST-ITL Input Folder"
echo "*******************************************************************************"
rm $LocalAnInputFolder/*.base64
echo ""

echo ""
echo "*******************************************************************************"
echo "Removing Base64 Files from Hadoop Base64 Folder"
echo "*******************************************************************************"
hadoop fs -rm -f $HadoopFolder$HadoopBase64Folder/*.base64 2>&1 >/dev/null
echo ""

echo ""
echo "*******************************************************************************"
echo "Putting Base64 Files into Hadoop Base64 Folder"
echo "*******************************************************************************"
hadoop fs -put $LocalAnBase64Folder/*.base64 $HadoopFolder$HadoopBase64Folder
echo ""

echo ""
echo "*******************************************************************************"
echo "Compiling Java SequenceFile Generator"
echo "*******************************************************************************"
javac -cp /usr/lib/hadoop/*:/usr/lib/hadoop/client-0.20/* -d $LocalSequenceFileJavaClassFolder/ $LocalSequenceFileJavaFolder/*.java
echo ""

echo ""
echo "*******************************************************************************"
echo "Creating SmallFilesToSequenceFileConverter.jar"
echo "*******************************************************************************"
jar -cvf $LocalSequenceFileJavaFolder/SmallFilesToSequenceFileConverter.jar -C $LocalSequenceFileJavaClassFolder/ . 2>&1 >/dev/null
echo ""

echo ""
echo "*******************************************************************************"
echo "Copying SmallFilesToSequenceFileConverter.jar"
echo "*******************************************************************************"
cp $LocalSequenceFileJavaFolder/*.jar .
echo ""

echo ""
echo "*******************************************************************************"
echo "Clearing any Previous SequenceFile output"
echo "*******************************************************************************"
hadoop fs -rm -r $HadoopFolder$HadoopSequenceFileFolder
echo ""

echo ""
echo "*******************************************************************************"
echo "Run MapReduce Job to Create Sequence File from Directory of Base64 files"
echo "*******************************************************************************"
hadoop jar SmallFilesToSequenceFileConverter.jar \
    SmallFilesToSequenceFileConverter \
    $HadoopFolder$HadoopBase64Folder \
    $HadoopFolder$HadoopSequenceFileFolder
echo "*******************************************************************************"
echo "Finished MapReduce Job"
echo "*******************************************************************************"
echo ""

echo ""
echo "*******************************************************************************"
echo "Compiling BioCTS Mono Streaming Mapper"
echo "*******************************************************************************"
xbuild "$LocalBioCTSSourceFolder/BioCTS_Hadoop_Map.sln"  2>&1 >/dev/null
echo ""

echo ""
echo "*******************************************************************************"
echo "Removing Previous BioCTS Files"
echo "*******************************************************************************"
rm GOV.CSD.ITL.NIST.CTS.AN2K11.dll
rm GOV.CSD.ITL.NIST.AN_CTS.dll
rm GOV.CSD.ITL.NIST.Library.dll
rm BioCTS_Map.exe
echo ""

echo ""
echo "*******************************************************************************"
echo "Copying new BioCTS Files"
echo "*******************************************************************************"
cp "$LocalBioCTSSourceFolder/BioCTS_Hadoop_Map/bin/Debug/BioCTS_Map.exe" .
cp "$LocalBioCTSSourceFolder/BioCTS_Hadoop_Map/bin/Debug/GOV.CSD.ITL.NIST.CTS.AN2K11.dll" .
cp "$LocalBioCTSSourceFolder/BioCTS_Hadoop_Map/bin/Debug/GOV.CSD.ITL.NIST.AN_CTS.dll" .
cp "$LocalBioCTSSourceFolder/BioCTS_Hadoop_Map/bin/Debug/GOV.CSD.ITL.NIST.Library.dll" .
echo ""

echo ""
echo "*******************************************************************************"
echo "Clearing any Previous BioCTS Streaming Results"
echo "*******************************************************************************"
hadoop fs -rm -r $HadoopFolder$HadoopStreamingResults
echo ""

echo ""
echo "*******************************************************************************"
echo "Running BioCTS Mono MapReduce Streaming Job on Sequence File"
echo "*******************************************************************************"
hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-*.jar \
    -inputformat org.apache.hadoop.mapred.SequenceFileAsTextInputFormat \
    -input "$HadoopFolder$HadoopSequenceFileFolder" \
    -output "$HadoopFolder$HadoopStreamingResults" \
    -mapper "mono BioCTS_Map.exe" \
    -reducer NONE \
    -file "BioCTS_Map.exe" \
    -file "GOV.CSD.ITL.NIST.AN_CTS.dll" \
    -file "GOV.CSD.ITL.NIST.CTS.AN2K11.dll" \
    -file "GOV.CSD.ITL.NIST.Library.dll"
echo ""

echo ""
echo "*******************************************************************************"
echo "Clearing Previous Local Streaming Results"
echo "*******************************************************************************"
rm -r $LocalStreamingResultsFolder/*
echo ""

echo ""
echo "*******************************************************************************"
echo "Copying Streaming Results from Hadoop to Local"
echo "*******************************************************************************"
hadoop fs -get $HadoopFolder$HadoopStreamingResults/p* $LocalStreamingResultsFolder/
echo ""

echo ""
echo "*******************************************************************************"
echo "Splitting Local Log Files"
echo "*******************************************************************************"
awk -v dir="$LocalStreamingResultsFolder" '/BEGIN LOG/{n++}{print > dir"/logFile" n ".log" }' $LocalStreamingResultsFolder/part*
echo ""

echo "*******************************************************************************"
echo "*                    End BioCTS Complete Hadoop Script                        *"
echo "*******************************************************************************"