// ------------------------------------------------------------- // // File mergeLuminosityLogs.cc // // Created 6 January 2005 // Author: W. David Dagenhart // Brandeis University // // Major update June 2006 to accomodate the good run lists // which include segment ranges in addition to only run // numbers. After this update this program can process // both the old and new types of good run list properly. // // This program processes a set of rsl files used in the luminosity // calculation. It merges the runs and segments listed in the // rsl files into one new rsl file, sorts the run/segment list, // removes and reports any duplicate segments, removes runs and // segments not on a good run list, and outputs a single text file // named "newFile.rsl". That file can be processed using the command // // "lumsum.pl offline newFile.rsl" // // on one of the central systems to calculate the luminosity. // (Don't forget to multiply by 1.019 afterwards). // // This is a small standalone C++ program. It is NOT run under the // AC++ framework. To compile and link this program: // // "g++ -o mergeLuminosityLogs mergeLuminosityLogs.cc" // // To run this program: // // "./mergeLuminosityLogs rsl/lum_ 1 123 goodRunList.txt" // // Parameters: // Base rsl filename and path // Index of first rsl file (0 to 9999) // Last index (>= first index and in range 0 to 9999) // Good run list file name // // RSL filenames are assumed to have a format like "lum_0045.rsl". // The first part is the base filename (the first argument) // concatenated with an index (in a 4 character field padded // to the left with 0's if necessary) // concatenated with ".rsl" at the end. // // Usually the index is the segment number from the CAF job, // although it does not have to be. It is easiest if you // use the TCL file in the CAF jobs to generate rsl filenames // with the format required by this script. // // The good run list is a simple text file with three numbers // on each line. First the run number, then the lowest good // segment, then the highest good segment. If the highest // good segment number is -1, that means the segments are // good all the way up to the highest segment. These // files can be made by just copying from the DQM web site. // Note the older format of good run list files will also // work. If there is only one number on a line, that is // interpreted as a run number and it is assumed that all // segments in that run are good. // // If the last argument is not provided a warning is printed // and all runs are assumed good. If the good run list file // cannot be opened, an error message is printed out and the // program aborts. // // If some rsl files are not present the process continues with // the next file. A warning will be printed for each file missing // in the sequence with a summary of the total number at the end. // You need to look for errors in the output. Also check the output // to verify the correct number of files were found, opened, etc ... // (This can vary depending on how you number your CAF segments // and do bookkeeping. Personally I use segment numbers at the top // of the sequence when recovering CAF segments that failed. In this // case, I expect there to be rsl files missing in the sequence and // will keep track of how many I expect to be missing. I will make // sure the number of missing rsl files equals the number of failed // CAF segments.) // // If you have more than 10,000 rsl files the naming scheme above // does not work because the filename format only allows 4 characters. // Change the value of the variable "fieldWidth" below to // extend the width to 5 or more characters, which will allow // 100,000 or more rsl files. // // If your rsl filenames do not follow the required format, there are // about three lines of code below where the variable "inputFileName" // gets its string formed. A little editing there will make this // program recognize a wider variety of rsl file naming conventions. // // ------------------------------------------------------------- #include #include #include #include #include #include #include #include #include const int fieldWidth = 4; class SegmentRange { public: SegmentRange(): _s1(0), _s2(0) {} SegmentRange(int s1, int s2): _s1(s1), _s2(s2) {} SegmentRange(const SegmentRange& sr) : _s1(sr._s1), _s2(sr._s2) {} ~SegmentRange() {} int s1() const { return _s1; } int s2() const { return _s2; } void set_s1(int value) { _s1 = value; } void set_s2(int value) { _s2 = value; } bool operator<(const SegmentRange& rhs) const { return _s1 < rhs._s1; } private: int _s1; int _s2; }; int main(int argc, char* argv[]) { std::cout << "\nBegin process mergeLuminosityLogs\n\n"; if (argc != 4 && argc != 5) { std::cerr << "Aborting \nError: Incorrect Number of Arguments\n" "Usage: ./mergeLuminosityLogs rsl_basename first_index " "last_index [goodRunList]\n\n"; return 1; } std::cout << "Parsing arguments:\n"; std::string baseName(argv[1]); int firstIndex = atoi(argv[2]); int lastIndex = atoi(argv[3]); std::cout << " rsl base filename = " << baseName << "\n"; std::cout << " First rsl file index = " << firstIndex << "\n"; std::cout << " Last rsl file index = " << lastIndex << "\n"; std::string goodRunList; bool goodRunListExists = false; if (argc == 5) { goodRunList.assign(argv[4]); goodRunListExists = true; std::cout << " Good run list filename = " << goodRunList << "\n\n"; } else { std::cout << " *** Warning: no good run list. " "Assuming all runs are good. *** \n\n"; } // Holds the run numbers that appear in the rsl files std::vector runVector; // Each map entry is keyed by the run number and // and holds a vector of segment ranges from the input // rsl files. std::map > runSegmentMap; typedef std::map >::iterator ITERATOR; int nSuccessfulOpens = 0; // Loop over rsl input files for (int index = firstIndex; index <= lastIndex; ++index) { // Build the input filename std::stringstream inputFilename; inputFilename << baseName << std::setw(fieldWidth) << std::setfill('0') << index << ".rsl"; // Open the file std::ifstream inputFile(inputFilename.str().c_str()); if ( !inputFile ) { std::cout << "*** WARNING: Failed to open input file " << inputFilename.str() << " ***\n\n"; continue; } else { ++nSuccessfulOpens; } // Loop over each line of the current input file std::string line; int currentRun = 0; while ( ! inputFile.eof() ) { getline(inputFile, line); if (line.length() > 2) { std::istringstream lineStream(line); // Look for lines with run numbers if (line[0] == 'R' && line[1] == ' ') { char c; lineStream >> c >> currentRun; } if (lineStream.fail()) { std::cout << "\n*** ERROR: Failed reading run number from " "rsl file\n\n"; return 1; } // Look for lines with segment ranges if (line[0] == 'S' && line[1] == ' ') { char c; int s1, s2; lineStream >> c >> s1 >> s2; if (lineStream.fail()) { std::cout << "\n*** ERROR: Failed reading segment numbers from " "rsl file\n\n"; return 1; } if (currentRun <= 0) { std::cout << "*** ERROR: Segments in input with nonpositive " "current run. ***\n\n"; return 1; } else if (s1 < 0 || s2 < 0) { std::cout << "*** ERROR: Negative segment numbers. ***\n\n"; return 1; } else if (s1 > s2) { std::cout << "*** ERROR: Segment number in input out of order. " "***\n\n"; return 1; } else { SegmentRange segmentRange(s1, s2); // Add runs to the run vector if they are not already there if (! std::binary_search(runVector.begin(), runVector.end(), currentRun)) { runVector.push_back(currentRun); std::sort(runVector.begin(), runVector.end()); } // Add segment ranges to the vectors in the map std::vector &vectorRef = runSegmentMap[currentRun]; vectorRef.push_back(segmentRange); /* ************************************************** // THIS code just prints out the contents of the // vector and map for debugging purposes for (std::vector::iterator ii = runVector.begin(); ii != runVector.end(); ++ii) { std::cout << "runVector = " << *ii << "\n"; } for (ITERATOR iter = runSegmentMap.begin(); iter != runSegmentMap.end(); ++iter) { std::cout << "run in map = " << iter->first << "\n"; std::vector &srv = iter->second; for (std::vector::iterator sri = srv.begin(); sri != srv.end(); ++sri) { std::cout << "segment in map = " << sri->s1() << " " << sri->s2() << "\n"; } } ****************************************** */ } } } // Skip lines without more than 2 characters } // Loop over lines of the current input file inputFile.close(); } // Loop over input files // Just double check that the runVector is sorted and has no // duplicates. Unless there is a bug above this check will // never fail. int previousRun = -1; for (std::vector::const_iterator r_iter = runVector.begin(); r_iter != runVector.end(); ++r_iter) { if (*r_iter <= previousRun) { std::cout << "ERROR: runVector out of order or duplicates\n\n"; } previousRun = *r_iter; // std::cout << "Run " << *r_iter << "\n"; } // --------------------------------------------------------- // Read the good run list file // --------------------------------------------------------- // Holds the run numbers that appear in the good run list std::vector goodRunVector; // Each map entry is keyed by the run number and // and holds a vector of segment ranges from the // good run list std::map > goodRunSegmentMap; int linesWithOnlyRunNumber = 0; if (goodRunListExists) { // Read in the good run list // Open the file std::ifstream inputFile(goodRunList.c_str()); if ( !inputFile ) { std::cout << "*** ERROR: Failed to open good run list file " << goodRunList << " ***\n\n"; return 1; } // Loop over each line of the file std::string line; while ( ! inputFile.eof() ) { getline(inputFile, line); if (line.length() > 5) { std::istringstream lineStream(line); int run; lineStream >> run; if (lineStream.fail()) { std::cout << "\n***ERROR: failed to read run number from" " good run list file.\n\n"; return 1; } if (! std::binary_search(goodRunVector.begin(), goodRunVector.end(), run)) { goodRunVector.push_back(run); std::sort(goodRunVector.begin(), goodRunVector.end()); } int segmentLow, segmentHigh; lineStream >> segmentLow >> segmentHigh; // Deal with old format good run list which just has a // run number with no segment ranges. if (lineStream.fail()) { segmentLow = 0; segmentHigh = -1; ++linesWithOnlyRunNumber; } // Check for other errors in the good run list else { if (segmentLow > segmentHigh && segmentHigh != -1) { std::cout << "\n***ERROR: Reading good run list and segmentLow" << " is greater than segmentHigh\n"; std::cout << "Run = " << run << " segmentLow = " << segmentLow << " segmentHigh = " << segmentHigh << "\n\n"; return 1; } else if (segmentLow < 0) { std::cout << "\n***ERROR: Reading good run list and segmentLow" << " is negative\n\n"; return 1; } } // Add segment ranges to the vectors in the map SegmentRange goodSegmentRange(segmentLow, segmentHigh); std::vector &vectorRef = goodRunSegmentMap[run]; vectorRef.push_back(goodSegmentRange); } } inputFile.close(); } // A sanity check, it should never fail previousRun = -1; for (std::vector::const_iterator r_iter = goodRunVector.begin(); r_iter != goodRunVector.end(); ++r_iter) { if (*r_iter <= previousRun) { std::cout << "ERROR: runVector out of order or duplicates\n"; } previousRun = *r_iter; } /* // THIS code just prints out the contents of the // vector and map for debugging purposes for (std::vector::iterator ii = goodRunVector.begin(); ii != goodRunVector.end(); ++ii) { std::cout << "goodRunVector = " << *ii << "\n"; } for (ITERATOR iter = goodRunSegmentMap.begin(); iter != goodRunSegmentMap.end(); ++iter) { std::cout << "New run\n"; std::vector &srv = iter->second; for (std::vector::iterator sri = srv.begin(); sri != srv.end(); ++sri) { std::cout << iter->first << " " << sri->s1() << " " << sri->s2() << " " << "\n"; } } */ // Now sort runs/segments and remove duplicates // First in the container filled from the rsl files int overlapTotal = 0; // Loop over runs in the map for (ITERATOR iter = runSegmentMap.begin(); iter != runSegmentMap.end(); ++iter) { // Sort each run std::sort(iter->second.begin(), iter->second.end()); // Then remove duplicates std::vector &v = iter->second; std::vector::iterator iter1 = v.begin(); std::vector::iterator iter2 = v.begin(); while (iter1 != v.end()) { ++iter2; if (iter2 != v.end()) { int overlapStart = -1; int overlapEnd = -1; if (iter1->s2() < iter2->s1()) { // Do nothing } else { overlapStart = iter2->s1(); overlapEnd = iter1->s2(); if (iter1->s2() > iter2->s2()) { overlapEnd = iter2->s2(); iter2->set_s2(iter1->s2()); } if (iter1->s1() == iter2->s1()) { // Set to invalid values, equivalent to deleting it iter1->set_s1(-2); iter1->set_s2(-2); } else { iter1->set_s2(iter2->s1() - 1); } if (overlapStart != 0 && overlapEnd != 0) { overlapTotal += (overlapEnd - overlapStart + 1); } else if (overlapStart == 0 && overlapEnd != 0) { overlapTotal += overlapEnd; } // Overlaps of segment 0 are common, // Only print out the other overlaps // I think run segments numbered 0 have zero // luminosity always. if (overlapStart != 0 || overlapEnd != 0) { std::cout << "Overlap: R " << iter->first << " S " << overlapStart << " " << overlapEnd << "\n"; } } } else break; ++iter1; } } std::cout << "\nTotal number of overlap segments in rsl files: " << overlapTotal << " (excluding segment 0)\n"; // Repeat for the container holding the good run segments // Sort runs/segments and remove duplicates int nRangesWithOverlap = 0; // Loop over runs in the map for (ITERATOR iter = goodRunSegmentMap.begin(); iter != goodRunSegmentMap.end(); ++iter) { // Sort each run std::sort(iter->second.begin(), iter->second.end()); // Then remove duplicates std::vector &v = iter->second; std::vector::iterator iter1 = v.begin(); std::vector::iterator iter2 = v.begin(); while (iter1 != v.end()) { ++iter2; if (iter2 != v.end()) { if (iter1->s2() < iter2->s1() && iter1->s2() != -1) { // Do nothing } else { ++nRangesWithOverlap; if (iter2->s2() != -1) { if (iter1->s2() == -1 || iter1->s2() > iter2->s2()) { iter2->set_s2(iter1->s2()); } } if (iter1->s1() == iter2->s1()) { // Set to invalid values, equivalent to deleting it iter1->set_s1(-2); iter1->set_s2(-2); } else { iter1->set_s2(iter2->s1() - 1); } } } else break; ++iter1; } } std::cout << "Number of segment ranges that overlap in good run file = " << nRangesWithOverlap << "\n\n"; /* for (ITERATOR iter = goodRunSegmentMap.begin(); iter != goodRunSegmentMap.end(); ++iter) { std::cout << "New run\n"; std::vector &srv = iter->second; for (std::vector::iterator sri = srv.begin(); sri != srv.end(); ++sri) { std::cout << iter->first << " " << sri->s1() << " " << sri->s2() << " " << "\n"; } } */ // Create the output rsl file by looping over the // runs/segments in the map and printing out the // necessary information. std::ofstream outputFile("newFile.rsl"); for (std::vector::const_iterator run_iter = runVector.begin(); run_iter != runVector.end(); ++run_iter) { // Deal with the case where there is no good run list if ( ! goodRunListExists ) { outputFile << "R " << *run_iter << "\n"; std::vector &segmentRangeVector = runSegmentMap[*run_iter]; for (std::vector::const_iterator s_iter = segmentRangeVector.begin(); s_iter != segmentRangeVector.end(); ++s_iter) { if (s_iter->s1() != -2) { outputFile << "S " << s_iter->s1() << " " << s_iter->s2() << "\n"; } } } else if (std::binary_search(goodRunVector.begin(), goodRunVector.end(), *run_iter)) { bool printedRunNumber = false; std::vector &segmentRangeVector = runSegmentMap[*run_iter]; std::vector &goodSegmentRangeVector = goodRunSegmentMap[*run_iter]; for (std::vector::const_iterator s_iter = segmentRangeVector.begin(); s_iter != segmentRangeVector.end(); ++s_iter) { if (s_iter->s1() != -2) { for (std::vector::const_iterator good_iter = goodSegmentRangeVector.begin(); good_iter != goodSegmentRangeVector.end(); ++good_iter) { if (good_iter->s1() != -2) { bool rangesOverlap = false; int low; int high; if ( (s_iter->s1() <= good_iter->s2() || good_iter->s2() == -1) && s_iter->s2() >= good_iter->s1() ) { rangesOverlap = true; } if (s_iter->s1() < good_iter->s1()) low = good_iter->s1(); else low = s_iter->s1(); if (good_iter->s2() == -1) high = s_iter->s2(); else if (good_iter->s2() > s_iter->s2()) high = s_iter->s2(); else high = good_iter->s2(); if (rangesOverlap) { if (!printedRunNumber) { outputFile << "R " << *run_iter << "\n"; printedRunNumber = true; } outputFile << "S " << low << " " << high << "\n"; } } } } } } } outputFile.close(); std::cout << "Succeeded in opening " << nSuccessfulOpens << " rsl files.\n"; std::cout << "Failed in opening " << (lastIndex - firstIndex + 1 -nSuccessfulOpens) << " rsl files.\n"; std::cout << "Lines in the good run list file with only a run number" << " = " << linesWithOnlyRunNumber << "\n\n"; return 0; }