#! perl -w

# written by troy sometime in september 2005

my $documentation =  
"to compare jobsets over a run range with those in PROD_PHYSICS_CDF:

   1) select either:

     -n  PROCESS_NAME 

           additional optionsi with option -n:
           -u   unchecked name:  switches PROCESS_NAME checking off
           -m   modified only:   only check when a more recent version exist
      
     -j  JOBSET       
     -p  PASSNAME
     -i  PASS_INDEX
     

   2) optionally specify PROD_PHYSICS_CDF version (latest is default) :

     -p  PASSNAME
     -i  PASS_INDEX
     
     note that if the chosed option is the same as step 1 then 
     do not specify it again

3) optionally specify a run or run range (entire range is defult)
      
     -r LORUN           
     -r LORUN-HIRUN  

also the content of two jobsets can be compared:

     -j JOBSET JOBSET

to list the table content of a jobset or a passname

   -lj JOBSET 
   -lp PASSNAME      

comments/bugs: let me know at tvine\@fnal.gov
";

my $short_doc =
"
  n PROCESS_NAME 
  j JOBSET       (JOBSET)
  p PASSNAME     (PASSNAME)
  i PASS_INDEX   (PASS_INDEX)
  r LORUN        (HIRUN)
  
  h : for help
  m : only print when a PROCESS_NAME has 2 or more version
  u : do not check PROCESS_NAME
  l : list the contents of a jobset or a pass

  NOTE: perl is a total pile of rubbish and you will have to specify the run range first before any values that are not preceeded by an option
";

use strict;
use DBI;
use Getopt::Std;

use lib '/cdf/scratch/cdfopr/cdfcalib/perl_libs';
require "db_queries.pl";

# check all the options are valid
my %opt;
getopts('n:j:p:i:r:lumh',\%opt) or die $short_doc;

die $documentation if $opt{h};

use DBI;
my $dbh = DBI->connect('dbi:Oracle:cdfonprd','cdf_reader','reader') 
    or die "Error connecting $DBI::errstr\n";


die $short_doc if ($#ARGV > 0); # there is never more than one extra argument

# first call all the subroutines that take  option l

if ($opt{l})
{
  if ( $#ARGV < 0 )  # no extra optons
  {
    if ($opt{j}) 
    { 
      print &check_jobset($dbh,$opt{j}),"\n";
      print &list_tables($dbh,$opt{j});
      exit;
    }
    if ($opt{p}) 
    { 
      print &get_all_pass_index($dbh,$opt{p});
      exit
    }
  }
  die $short_doc;    
}


# parse the options for pass or jobset  comparison
if ($#ARGV == 0)
{
  if ( ($opt{r} && keys(%opt) == 2) || keys(%opt) == 1 )
  {
    if ($opt{p}) 
    {
      my $pass_indexA = &get_pass_index($dbh,$opt{p});
      my $pass_indexB = &get_pass_index($dbh,$ARGV[0]);
      print_pass_diff($dbh,$pass_indexA,$pass_indexB,$opt{r});
      exit;
    }
    if ($opt{i})
    {
      print_pass_diff($dbh, $opt{i}, $ARGV[0], $opt{r} );
      exit;
    }
    if ($opt{j} && !$opt{r})
    {
      print &check_jobset($dbh,$opt{j}),"\n";
      print &check_jobset($dbh,$ARGV[0]),"\n";

      my $diff = &diff_all_tables($dbh,$opt{j},$ARGV[0]);
      if ($diff)
      {
	print "have following different (or absent) table cids:\n$diff\n" ;
	exit;
      }
      print "are identical\n";
      exit;
    }
  }
# there are no other combinations that have an extra argument
  die $short_doc;
}


if ($opt{i} && $opt{p})
{
  my $pass_index = &get_pass_index($dbh,$opt{p});
  print_pass_diff($dbh,$opt{i},$pass_index,$opt{r});
  exit;
}


die $short_doc unless ( $opt{r} && keys(%opt) <= 3 || keys(%opt) <= 2 );

if ($opt{j})
{
  my $pass_index = &get_pass_index($dbh, '0-JUNK');

  if    ($opt{i}) { $pass_index = $opt{i}; }
  elsif ($opt{p}) { $pass_index = &get_pass_index($dbh,$opt{p}); }

  my @runs = ();
  print "comparing " . &check_jobset($dbh,$opt{j}) . " with ";
  print &check_pass_index($dbh, $pass_index);
  print " version of PROD_PHYSICS_CDF "; 
  print &get_runs($dbh, $pass_index, \@runs, $opt{r}) . "\n";
  &print_jobset_cdf_diff($dbh, $opt{j}, \@runs);
  exit;
}


if ($opt{n})
{
  unless ($opt{u}) { &check_process_name($dbh,$opt{n},$opt{r}); }

  my $pass_index = &get_pass_index($dbh, '0-JUNK');

  if    ($opt{i}) { $pass_index = $opt{i}; }
  elsif ($opt{p}) { $pass_index = &get_pass_index($dbh,$opt{p}); }

  print "comparing latest version of $opt{n} with ";

  if ($opt{i} || $opt{p})
  {
    print &check_pass_index($dbh, $pass_index);
    print " version of PROD_PHYSICS_CDF ";
  }
  else
  {
    print " latest version of PROD_PHYSICS_CDF ";
  }

  my @runs = ();

  if ($opt{m})
  {
    print " for which a more than one version of $opt{n}  exists ";
    print &get_modified_runs($dbh, $pass_index, \@runs, $opt{r}) . "\n";;
  }
  else
  {
    print &get_runs($dbh, $pass_index, \@runs, $opt{r}) . "\n";;
  }

  if ($opt{i} || $opt{p})
  {
    &print_pass_cdf_diff($dbh, $pass_index, $opt{n}, \@runs);
    exit;
  }

  &print_latest_cdf_diff($dbh, $opt{n}, \@runs);
  exit;
}

if ($opt{i} || $opt{p})
{
  my $pass_index = $opt{i};
  
  $pass_index = &get_pass_index($dbh,$opt{p}) if $opt{p};

  print "comparing " .  &check_pass_index($dbh, $pass_index);
  print " with latest version of PROD_PHYSICS_CDF ";

  my @runs = ();
  print &get_runs($dbh, $pass_index, \@runs, $opt{r}) . "\n";;
  &print_pass_cdf_diff($dbh, $pass_index, 'PROD_PHYSICS_CDF', \@runs);
  exit;
}

die $short_doc;

# ---------------------------------------------------------------
sub check_process_name
{
#arguments 1) database handle 2)process_name 3) opt_r

  my ($dbh,$process_name,$opt_r) = @_;

  my ($start_run,$end_run);

  my $run_txt = &get_run_range($dbh, \$start_run, \$end_run, $opt_r);

  my $sth = $dbh->prepare("select distinct process_name from used_sets 
    where process_name like ?
      and process_run between ? and ?");


  $sth->execute($process_name,$start_run,$end_run);

  my $txt = $sth->fetchrow;

  return $txt if ($txt);

  # print a list of all valid names
  
  $sth = $dbh->prepare("select distinct process_name from used_sets where process_run between ? and ?");

  $sth->execute($start_run,$end_run); 

  while(my $row = $sth->fetchrow)  {$txt .= "$row\n"}

  die "process_name $process_name not in USED_SETS $run_txt\nvalid names:\n$txt";
}
# ------------------------------------------------------------------
sub get_modified_runs
{
  # get the runs in the range for which the process_name has
  # got two or more versions

  my ($dbh, $process_name, $pass_index, $run_ref, $opt_r) = @_;

  my ($start_run, $end_run);
  
  my $txt = &get_run_range($dbh, \$start_run, \$end_run, $opt_r);
  my $sql = 
  "select used_sets.process_run
  from used_sets,passcalibs 
  where used_sets.process_run = passcalibs.lorun
    and used_sets.process_name = ?
    and passcalibs.pass_index = ?
    and used_sets.proc_calib_version = 2
    and used_sets.process_run between ? and ? 
    order by used_sets.process_run";

  my $sth = $dbh->prepare($sql);
  $sth->execute($process_name,$pass_index,$start_run,$end_run);

  while (my $run = $sth->fetchrow) { push (@{$run_ref}, $run); }

  return $txt;
}
# -----------------------------------------------------
sub get_runs
{

  my ($dbh, $pass_index, $run_ref, $opt_r) = @_;

  my ($start_run, $end_run);
  
  my $txt = &get_run_range($dbh, \$start_run, \$end_run, $opt_r);

  my $sql = "select lorun from passcalibs where pass_index = ?
      and lorun between ? and ? order by lorun";

  my $sth = $dbh->prepare($sql);
  $sth->execute($pass_index,$start_run,$end_run);

  while (my $run = $sth->fetchrow) { push(@{$run_ref}, $run); }

  return $txt;
}
#--------------------------------------------------------
sub print_pass_cdf_diff
{
  # 1) db handle 2) pass_index 3) jobset name 4) run reference
  
  # print the difference in, or report missing, tables in the latest
  # specified jobset name and specified pass version of PROD_PHYSICS_CDF

  my ($dbh, $pass_index, $process_name, $runs_ref) = @_;


  # loop over runs
  foreach my $run ( sort @$runs_ref)
  {
    my $cdf_version = &get_pass_version($dbh,$run,$pass_index); 
    unless ($cdf_version)
    {
      print "run $run not in pass_index $pass_index\n";
      next;
    }

    my $latest_version = &get_latest_version($dbh,$run,$process_name); 
    unless ($latest_version)
    {
      print "run $run has no process_name $process_name\n";
      next;
    }

    my $latest_jobset  = &get_jobset($dbh,$run,$process_name,$latest_version); 
    my $cdf_jobset  = &get_jobset($dbh,$run,'PROD_PHYSICS_CDF',$cdf_version); 

    my $diff;

    if ($process_name =~ /PROD_PHYSICS_CDF/)
    {
      $diff = &diff_tables($dbh,$latest_jobset,$cdf_jobset);
    }
    else
    {
      $diff = &diff_all_tables($dbh,$latest_jobset,$cdf_jobset);
    }


    if ($diff)
    { 
      print "run $run: ($process_name v$latest_version PROD_PHYSICS_CDF v$cdf_version):\n$diff\n";
    }
  }
  print "finished checking\n";
}
#--------------------------------------------------------
sub print_jobset_cdf_diff
{

  # 1) db handle 2) jobset 3) run array reference
  # 
  # print the difference in, or report missing, tables in either the latest version
  # of a jobset specified by name or the jobset specified by its number
  # and latest version of PROD_PHYSICS_CDF

  my ($dbh, $jobset, $runs_ref) = @_;


  # loop over the runs
  foreach my $run (@$runs_ref)
  {
    my $cdf_version = &get_latest_version($dbh,$run,'PROD_PHYSICS_CDF'); 
    my $cdf_jobset  = &get_jobset($dbh,$run,'PROD_PHYSICS_CDF',$cdf_version); 

    my $diff = &diff_tables($dbh,$jobset,$cdf_jobset);

    if ($diff)
    { 
      print "run $run: (jobset $jobset PROD_PHYSICS_CDF v$cdf_version):\n$diff\n";
    }
  }

print "finished checking\n";

}
#--------------------------------------------------------
sub print_latest_cdf_diff
{

  # 1) db handle 2) jobset name 3) run array reference
  # 
  # print the difference in, or report missing, tables in the
  # latest version of a jobset specified by name 
  # and latest version of PROD_PHYSICS_CDF

  my ($dbh, $name, $runs_ref) = @_;

  # loop over the runs
  foreach my $run (@$runs_ref)
  {
    my $cdf_version = &get_latest_version($dbh,$run,'PROD_PHYSICS_CDF'); 
    my $cdf_jobset  = &get_jobset($dbh,$run,'PROD_PHYSICS_CDF',$cdf_version); 

    my $version = &get_latest_version($dbh,$run,$name); 

    if ($version)
    {
      my $jobset  = &get_jobset($dbh,$run,$name,$version); 

      my $diff = &diff_tables($dbh,$jobset,$cdf_jobset);

      if ($diff)
      { 
	print "run $run: ($name v$version PROD_PHYSICS_CDF v$cdf_version):\n$diff\n";
      }
    }
  }

print "finished checking\n";

}
#-------------------------------------------------------
sub print_pass_diff
{
  # 1) db handle 2) pass_index 3) opt_r
  # print the difference in, or report missing, tables in two passes

  my ($dbh, $pass_indexA, $pass_indexB, $opt_r) = @_;

  my ($start_run, $end_run);

  print "comparing " . &check_pass_index($dbh,$pass_indexA);
  print " with " . &check_pass_index($dbh,$pass_indexB);
  print " " .  &get_run_range($dbh, \$start_run, \$end_run, $opt_r) . "\n";

  my $sth = $dbh->prepare("select lorun,proc_calib_version 
    from passcalibs where pass_index = ?
      and lorun between ? and ?");

  
  my %passA = ();
  $sth->execute( $pass_indexA, $start_run, $end_run );
  while ( my @row = $sth->fetchrow_array ) { $passA{$row[0]} = $row[1] }


  my %passB = ();
  $sth->execute( $pass_indexB, $start_run, $end_run );
  while ( my @row = $sth->fetchrow_array ) { $passB{$row[0]} = $row[1] }

  $sth->finish();


  my $pn = 'PROD_PHYSICS_CDF';

  for my $run (sort keys %passA)
  {
    my $versionA = $passA{$run};
    my $versionB = $passB{$run};
    if ($versionB)
    {
      if($versionB != $versionA) 
      {
	my $dateA = &get_version_create_date($dbh, $run, $versionA, $pn );
	my $dateB = &get_version_create_date($dbh, $run, $versionB, $pn );

	my $jobsetA = &get_jobset($dbh,$run,$pn,$versionA); 
	my $jobsetB = &get_jobset($dbh,$run,$pn,$versionB); 
	my $diff =  &diff_all_tables($dbh,$jobsetA,$jobsetB);

	if ($diff)
	{
	  print " \nrun $run v$versionA ($dateA) v$versionB ($dateB)\n";
	  print $diff;
	  print "\n";
	}
      }
      delete $passB{$run};
    }
    else
    {
      my $dateA = &get_version_create_date($dbh, $run, $versionA, $pn );
      print "run $run v$versionA ($dateA)  MISSING\n"
    }
  }

  for my $run (sort keys %passB)
  { 
    my $version = $passB{$run};
    my $date = &get_version_create_date($dbh, $run, $version, $pn );
    print "run $run MISSING v$version ($date)\n"
  }

  print "finished\n";

}

# ---------------------------------------------------------------
sub check_jobset
# arguments 1) database handle 2)jobset
# returns a strig of the process_name, proc_calib_version and the run or exits
{
  my ($dbh,$jobset) = @_;

  my $sth_us = $dbh->prepare("select process_name,proc_calib_version,process_run 
    from used_sets where jobset = ? ");

  my $sth_vs = $dbh->prepare("select process_name from valid_sets where jobset = ? ");
  $sth_us->execute($jobset);

  if (my @row = $sth_us->fetchrow_array)
  {
    return "jobset $jobset (used_sets) $row[0] version $row[1] run $row[2]";
  }
  else
  {
    $sth_vs->execute($jobset);
    if (my $row = $sth_vs->fetchrow)
    {
      return "jobset(valid_sets) $row";
    }
  }
  die "jobset $jobset not found in USED_SETS or VALID_SETS table";
}
#----------------------------------------------------------------------------
sub check_pass_index
{
  # checks the pass_index and returns an info string or exits if not found

  my ($dbh, $pass_index) = @_;

  my $sth = $dbh->prepare("select passname,retired from passes 
    where pass_index =  ?");

  $sth->execute($pass_index);

  my @row = $sth->fetchrow_array;
  my $status = 'current';

  if ($row[1]) 
  {
    $status = 'RETIRED';
  }

  if ($row[0])
  {
    return "$status pass $row[0] (pass_index $pass_index)"; 
  }
  die "PASS_INDEX $pass_index not found in PASSES table\n";
}

