#!/usr/bin/env bash #----------------------------------------------------------------------- # makes tcl files for concatenation. This script is the 1st step of the # concatenation. Location of the output files will be defined at the 2nd # step - see cdfopr/scripts/submit_prod_concat # # call: make_prod_concat_tcl output_dataset input_dir output_dir name_pattern [verbose] # # examples: # # make_prod_concat_tcl bewk2d fcdfdata034:/cdf/scratch/cdfopr/datasets/bhmu0d "" ewkm # make_prod_concat_tcl sopr00 "" "" ewke # # output_dir: where the TCL files will go # # CAF_CURRENT should be set to CAF # # output directory for the concatenated files is defined by -o in the # cdfopr/scripts/submit_prod_concat # # datasets: # --------- # electrons: bhel0d -> bewk0d # muons: bhmu0d -> bewk2d # # defaults (defaults assume that the script is run on FCDFLNX3): # -------------------------------------------------------------- # input_dir fcdfdata034.fnal.gov:/cdf/scratch/cdfopr/datasets/$book/$dataset # output_dir fcdflnx3.fnal.gov:/cdf/opr2/cdfopr/datasets/$book/$dataset/requests #----------------------------------------------------------------------- verbose=0 book=cdfpewk dataset=$1 host="" INPUT_DIR=$2 export OUTPUT_DIR=$3 name_pattern=$4 if [ .$name_pattern == "." ] ; then name_pattern=`echo $dataset | \ awk '{print substr($1,1,1)substr($1,6,1)"*.*"substr($1,2,4)}'` fi x=`echo $INPUT_DIR | awk -F : '{print $2}'` if [ .$x != "." ] ; then host=`echo $INPUT_DIR | awk -F : '{print $1}'` INPUT_DIR=$x fi if [[ .$INPUT_DIR == "." || .$INPUT_DIR == ".." ]] ; then host=fcdfdata034.fnal.gov export INPUT_DIR=/cdf/scratch/cdfopr/datasets/$book/$dataset fi if [ ."$5" != "." ] ; then verbose=$5 ; fi if [ $verbose ] ; then echo host = $host echo INPUT_DIR = $INPUT_DIR fi #----------------------------------------------------------------------- # this is where the tcl files will go #----------------------------------------------------------------------- if [[ .$OUTPUT_DIR == "." || .$OUTPUT_DIR == ".." ]] ; then export OUTPUT_DIR=/cdf/opr2/cdfopr/datasets/$book/$dataset/requests fi if [ ! -d $OUTPUT_DIR ] ; then mkdir -p $OUTPUT_DIR ; fi input_list_file=$PWD/.input_list_file echo .. emoe .host=.$host. .OUTPUT_DIR.=.$OUTPUT_DIR. if [ .$host == "." ] ; then ls -l $INPUT_DIR | grep $name_pattern > $input_list_file elif [ $host == "fcdfsgi2.fnal.gov" ] ; then rsh fcdfsgi2 ls -l $INPUT_DIR | grep $name_pattern > $input_list_file else echo cafhostdir $host $INPUT_DIR export CAF_CURRENT=caf cafhostdir $host $INPUT_DIR | grep $name_pattern > $input_list_file fi cat $input_list_file list_of_files=`cat $input_list_file | sed 's/\*//' | awk '{print $9}' | grep $name_pattern` echo list_of_files=$list_of_files export REQUEST=`date +%Y_%m_%d.%H_%M_%S` export IND=1 export SIZE=0 export CONCAT_LIST= #----------------------------------------------------------------------- write_request() { if [ ! -d $OUTPUT_DIR ] ; then mkdir -p $OUTPUT_DIR ; fi output_file=$OUTPUT_DIR/$REQUEST.`printf "%03i" $IND` if [ -e $output_file ] ; then rm $output_file ; fi echo "[write_request]" : IND=$IND output_file=$output_file let i=IND+1 ; export IND=$i # echo "[write_request]" : incremented IND=$IND # echo CONCAT_LIST=$CONCAT_LIST cat < $output_file #----------------------------------------------------------------------- # size = $SIZE #----------------------------------------------------------------------- module input DHInput talk DHInput EOF for fn in $CONCAT_LIST ; do if [ .$host == "." ] ; then # echo host undefined echo " include file "$fn >> $output_file else # echo host = $host echo " include file root://"${host}/${fn} >> $output_file fi done cat <> $output_file show include exit EOF cat <> $output_file path create NULL_PATH talk FileOutput dhCache set KAHUNA output stream AA $dataset AA fileSize set 1700000 dataSetId set $dataset dataSetBook set enemoe abortOnDBFailure set false compression set t show exit output paths AA NULL_PATH exit cont exit EOF export CONCAT_LIST="" export SIZE=0 # echo --------------------------------------------------- } #----------------------------------------------------------------------- for file in $list_of_files ; do sz=`cat $input_list_file | grep $file | awk '{ print $5 }'` if [ $verbose != 0 ] ; then printf " new file %s %12i %12i \n" $file $sz ; fi if [ $sz -gt 1500000000 ] ; then #----------------------------------------------------------------------- # file size .gt. 1.5 GB #----------------------------------------------------------------------- let s1=SIZE+sz if [ $verbose != 0 ] ; then printf " total size %12i \n" $s1 ; fi if [ $s1 -gt 1600000000 ] ; then if [ $verbose != 0 ] ; then echo large file, total size $s1 gt 1.1 GB, write request file fi write_request export CONCAT_LIST=$INPUT_DIR/$file export SIZE=$sz else if [ $verbose != 0 ] ; then echo large file, total size $s1 lt 1.1 GB, write request file fi export CONCAT_LIST="$CONCAT_LIST $INPUT_DIR/$file" export SIZE=$s1 write_request fi else #----------------------------------------------------------------------- # file size .le. 1.5 GB #----------------------------------------------------------------------- if [ $verbose != 0 ] ; then printf " total size %12i \n" $SIZE ; fi let s1=SIZE+sz if [ $s1 -gt 1600000000 ] ; then if [ $verbose != 0 ] ; then echo -------------------------------------------------- echo small file, total size gt 1.7 GB, write request file fi write_request export CONCAT_LIST="$CONCAT_LIST $INPUT_DIR/$file" export SIZE=$sz else export CONCAT_LIST="$CONCAT_LIST $INPUT_DIR/$file" export SIZE=$s1 fi fi done if [ $s1 != 0 ] ; then export SIZE=$s1 write_request fi