#! perl -w

# written by troy sometime in september 2005

$documentation =  
"to compare jobsets over a run range with those in PROD_PHYSICS_CDF:

   1) select either:

     -n  PROCESS_NAME  latest version for each run is used
     -j  JOBSET        static jobset 
     
      with options:

     -u  switches PROCES_NAME checking off (faster for large run range)
     -m  if -n is used only check when a more recent version exists (much faster!)

   2) specify PROD_PHYSICS_CDF version (latest is default) by either:

     -p  PASSNAME
     -i  PASS_INDEX

   3) specify a start run (180000 is default) or a range

     -r LORUN           
     -r LORUN HIRUN      


to compare two jobsets:

   -j JOBSET JOBSET

to list the table content of a jobset

   -j JOBSET 

to list all the PASS_INDEX's for a given pass: 

   -l PASSNAME      

comments/bugs: let me know at tvine\@fnal.gov
";

$short_doc =
"quick usage (-h for more options/details)
-n PROCESS_NAME 
-j JOBSET
-p PASSNAME
-i PASS_INDEX
-r LORUN (HIRUN)
";

use DBI;
use Getopt::Std;

use lib '/cdf/scratch/cdfopr/cdfcalib/perl_libs';
require "db_queries.pl";

# check all the options are valid
if (!getopts('n:j:p:i:r:l:umh')) { die $short_doc }

if ($Getopt::Std::opt_h) 
{ 
    print $documentation; 
    exit; 
}


use DBI;
$dbh = DBI->connect('dbi:Oracle:cdfonprd','cdf_reader','reader') 
	or die "Error connecting $DBI::errstr\n";

# first compare two jobsets or list pass_index's if specified
if (!$Getopt::Std::opt_n 
    && !$Getopt::Std::opt_i 
    && !$Getopt::Std::opt_r
    && !$Getopt::Std::opt_p
    && !$Getopt::Std::opt_h
    && !$Getopt::Std::opt_u
    && !$Getopt::Std::opt_m)
{
    if ($Getopt::Std::opt_j)
    {
	my $js = $Getopt::Std::opt_j;

	if ($#ARGV == 0) #ie if there is one argument
	{
	    &print_diff_all_tables($dbh,$js,$ARGV[0]);   
	    exit;
	}
	else 
	{ 
	    print "jobset (name version run): ",&check_jobset($dbh,$js),"\n";
	    print &list_tables($dbh,$js);
	    exit;
	}
    }
    if ($Getopt::Std::opt_l)
    {
	if ($#ARGV < 0) #ie if there is one argument
	{
	    print &get_all_pass_index($dbh,$Getopt::Std::opt_l);
	    exit;    
	}
	else 
	{ 
	    die "to list all the PASS_INDEX's for a given pass use -l PASSNAME\n";
	}
    }
    
    die $short_doc;    
}

# this is a list of parameters that need to be set from the different options

my $process_name;  
my $jobset;        
my $txt = '';             
my $start_run;  
my $end_run;


# get the start and end run if they are 
if ($Getopt::Std::opt_r) 
{
    if ($#ARGV < 0 ) 
    { 
	$start_run = $Getopt::Std::opt_r;
	$end_run   = &get_max_run($dbh); 
	$run_txt = "from run $start_run to most recent ($end_run)";
    }
    elsif ($#ARGV == 0) 
    { 
	$start_run = $Getopt::Std::opt_r;
	$end_run    = $ARGV[0];
	$run_txt = "from run $start_run to $end_run";
    }
    else
    {
	die "to set run range use -r LORUN_RUN (HIGH_RUN)\n";
    }
}
elsif ($#ARGV >= 0) # if additional arguments are specified
{
    print $short_doc;    
    exit;
}
else  # set the default run range
{
    $start_run = 180000;
    $end_run   = &get_max_run($dbh);
    $run_txt = "from run $start_run to most recent ($end_run)";
}




if ($Getopt::Std::opt_u && !$Getopt::Std::opt_n)
{
    die "option -u is for use with option -n\n";
}


# now get the process_name 
if ($Getopt::Std::opt_n && !$Getopt::Std::opt_j)
{
    $process_name = $Getopt::Std::opt_n;
    
    unless ($Getopt::Std::opt_u)
    {
	$found = &check_process_name($dbh,$process_name,$start_run,$end_run);
	{
	    if ($found ne $process_name)
	    {
		die "process_name $process_name not in USED_SETS in run range $start_run to $end_run\n";
	    }
	}
    }
    $txt = "comparing latest version of $process_name";

    if ($Getopt::Std::opt_m)
    {
	$txt .= " for which a more than on versions exists";
    }
}
elsif ($Getopt::Std::opt_j && !$Getopt::Std::opt_n)
{
    $jobset = $Getopt::Std::opt_j;

    my $js = &check_jobset($jobset);
    unless ($js)
    {
	print "jobset $jobset does not exit in used_sets table\n";
	exit;
    } 
    
    $txt = "comparing jobset (name version run): ($js)";
    if ($Getopt::Std::opt_m)
    {
	die "option -m is for use with -n process_name"; 
    }

}
else
{
    die $short_doc;
}


# check and assign options -p and -i

my $passname = 'latest';  
my $pass_index;           # not set if 'latest' is specified

if (!$Getopt::Std::opt_p && !$Getopt::Std::opt_i)
{
    $txt .= ' with tables in latest version of PROD_PHYSICS_CDF for runs that are in pass 10A (all physics runs)';
    $pass_index = &get_pass_index($dbh, '10A');
}
elsif ($Getopt::Std::opt_p && !$Getopt::Std::opt_i)
{
    $passname = $Getopt::Std::opt_p;
    $pass_index = &get_pass_index($dbh, $passname);
    
    unless ($pass_index)
    {
	print "pass $passname not found in PASSES table\n";
	exit;
    }
    $txt .= " with tables in pass $passname (pass_index $pass_index) version of PROD_PHYSICS_CDF";
}
elsif ($Getopt::Std::opt_i && !$Getopt::Std::opt_p)
{
    $pass_index = $Getopt::Std::opt_i;
    my $sth = $dbh->prepare("select passname,retired from passes 
                             where pass_index =  ?");

    $sth->execute($pass_index);

    my @row = $sth->fetchrow_array;
    my $status = 'current';

    if ($row[1] =~ /YES/ ) {$status = 'RETIRED'}
    unless ($passname = $row[0])
    {
	print "PASS_INDEX $pass_index not found in PASSES table\n";
	exit;
    }
    
    $txt .= " with tables in the $status pass $passname (PASS_INDEX $pass_index) version of $row[2]";

}



print "$txt $run_txt\n";


# get runs to loop over
if ($Getopt::Std::opt_m)  # get runs that have version 2 or greater
{
    my $sql = "select used_sets.process_run
               from used_sets,passcalibs 
               where used_sets.process_run = passcalibs.lorun
               and used_sets.process_name = ?
               and passcalibs.pass_index = ?
               and used_sets.proc_calib_version = 2
               and used_sets.process_run between ? and ?" ;

    $sth = $dbh->prepare($sql) or die "Error reading DBI::errstr\n";
    $sth->execute($process_name,$pass_index,$start_run,$end_run);
}
else
{
    my $sql = "select lorun from passcalibs where pass_index = ?
               and lorun between ? and ?";

    $sth = $dbh->prepare($sql) or die "Error reading DBI::errstr\n";
    $sth->execute($pass_index,$start_run,$end_run);

}


while (my $run = $sth->fetchrow)
{
    if ($passname =~ /latest/i) 
    {
	$cdf_version = &get_latest_version($dbh,$run,'PROD_PHYSICS_CDF'); 
    }
    else
    {
	$cdf_version = &get_pass_version($dbh,$run,$pass_index); 
    }
    
    my $latest_version = &get_latest_version($dbh,$run,$process_name); 
    my $latest_jobset  = &get_jobset($dbh,$run,$process_name,$latest_version); 
    my $cdf_jobset  = &get_jobset($dbh,$run,'PROD_PHYSICS_CDF',$cdf_version); 
    
    my $diff = &diff_tables($dbh,$latest_jobset,$cdf_jobset);

    if ($diff)
    { 
	print "run $run missing updated tables:\n$diff\n";
    }
}
print "finished checking\n";


$dbh->disconnect();

exit;


sub print_diff_all_tables
{
# arguments 1) database handle 2) jobset 3)jobset
    
# this takes two jobsets and ckecks them and prints tables with cids
# not common to both 
    
    my ($dbh, @jobsets) = @_;
    
    
    my $txt = "jobsets (name version run): ";
    
    foreach my $jobset (@jobsets)
    {
	my $js = &check_jobset($dbh,$jobset);
	
	if ($js)
	{
	    $txt .= " $js  ";
	}
	else
	{
	    die "jobset $jobset does not exit in the USED_SETS table\n";
	}
    }
    
    $diff = &diff_all_tables($dbh,$jobsets[0],$jobsets[1]);
    print $txt, "\nhave following different (or absent) table cids:\n$diff" ;
}

# --------------------------------------------

sub get_all_pass_index
{
# arguments 1) database handle 2) pass_name
# prints all the pass_index's associated with a specific pass

    my ($dbh,$pass_name) = @_;
    
    my $txt = "listing all pass index's for pass $pass_name\n";
    my $sth = $dbh->prepare("select pass_index,retired from passes where passname like ?");
    $sth->execute($pass_name);	    
    while(@row = $sth->fetchrow_array)  {$txt .= "$row[0]    $row[1]\n"}

    $txt =~ s/YES/RETIRED/g;    

    return $txt;
}

# ---------------------------------------------------------------

sub check_process_name
{
#arguments 1) database handle 2)process_name 3) start run 4) end run
    my ($dbh,$process_name,$start_run,$end_run) = @_;
    
    my $sth = $dbh->prepare("select distinct process_name from used_sets 
                             where process_name like ?
                             and process_run between ? and ?");

    
    $sth->execute($process_name,$start_run,$end_run);

    my $txt =  $sth->fetchrow;
    return $txt;
}
# ---------------------------------------------------------------
sub check_jobset
# arguments 1) database handle 2)jobset
# returns a strig of the process_name, proc_calib_version and the run
{
    my ($dbh,$jobset) = @_;
    
    my $sth = $dbh->prepare("select process_name,proc_calib_version,process_run 
                             from used_sets where jobset = ? ");

    $sth->execute($jobset);

    if (my @row = $sth->fetchrow_array)
    {
	return "$row[0] $row[1] $row[2]";
    }

    return;
}
# ------------------------------------------------------------------
 sub get_all_process_name
{
#arguments 1) database handle 2) start run 3) end run

# returns a string of all the valid process_names in the run range specified

    my ($dbh,$start_run,$end_run) = @_;
    
    my $sth = $dbh->prepare("select distinct process_name from used_sets where process_run between ? and ?");
    
    $sth->execute($start_run,$end_run); 
    
    my $txt = '';
    while($row = $sth->fetchrow)  {$txt .= "$row\n"}

    return $txt;
}

# -----------------------------------------------------

