// program : extract
// written by Volker Kr"atzig in august '97  
// This program was written to analyse the Amanda 'log'-files coming from the pole.
// It is called by two arguments  list
//                                output file
// where 'list' is an ASCII file consisting of the files (-names) to read
// the expected format is 
//                                 'first filename'
//                                 'second filename'
//                                   :
//                                   :
//                                 'last filename'
// as produced by 'ls ab_*-ext-log >> list'. 
// The second argument is simply the name of the output file.
//
// It should be easy to change this program in order to cope with a new or extended
// 'log'-file. In most cases the only thing you'd to change about three lines for each
// new keyword the file should be scaned for. 
//                                 1.) Add a new line like
//                                         'const newkeyword_col =  22;'
//                                     that indicates the column where the value is written to
//                                     the output file.
//                                 2.) Increase the constant 'columns'  by one.
//                                 3.) Add a new line like
//                                         '{func_1, "NewKey",     "default", newkeyword_col}};' 
//                                     in the 'key'-array below.
//                                     For example the above line would tell the program to search
//                                     the file for the string "NewKey" and write the result found
//                                     with the function 'func_1' to the column 'newkeyword_col'
//                                     (i.e. 22). If 'NewKey' couldn't be found the string "default"
//                                     string is written to this column.
//                                 4.) You might then change the 'header' text and insert your new
//                                     column.
//                                 5.) Change existing PAW-kumacs. 

#include<string.h>
#include<fstream.h>
#include<stdlib.h>

const int row_length = 128;                          // length of a standard string in this program
const int TRUE  = 1;
const int FALSE = 0;

struct key_type
{
  int   func;                                        // function to search .word if format changes
  char  word[row_length];                            // keyword to search
  char  def_value[row_length];                       // default value if .word couldn't be found
  int   col;                                         // entry number in the globally defined array
};                                                   // it equals the column it appears in out_file


//*******************************************    
// defines the format of a line in the output-file


const run_year_col = 0;                         // column where the year of the run appears
const run_day_col  = 1;                         //                  day
const run_no_col   = 2;                         //                  number
const run_file_col = 3;                         //                  file

const A_total_col =  4;                         //                  A-total                            
const A_1time_col =  5;                         //  ...                
const A_detim_col =  6;
const A_spase_col =  7;
const A_resyn_col =  8;

const B_total_col =  9;                                                   
const B_1time_col = 10;
const B_detim_col = 11;
const B_spase_col = 12;
const B_resyn_col = 13;

const rawcoin_col = 14;
const    coin_col = 15;
const updwcnt_col = 16;
const evtswdp_col = 17;
const overlap_col = 18;
const gaspcnt_col = 19;

const check_col   = 20;

// number of entries in line_out(= columns in out_file ) 
const columns = 21;                            

// For each file the data is processed with this global array
char* line_out[columns];

// user defined keywords and  their default values number of keywords to search for.

const int func_1 = 1;
const int func_2 = 2;


key_type key[] = {{func_1, "A-total",    "-1", A_total_col},    // A-total       
		  {func_1, "First time", "-1", A_1time_col},    // first time of Amanda A
		  {func_1, "Delta time", "-1", A_detim_col},    // A-delta-
		  {func_1, "Spase-A",    "-1", A_spase_col},    // Spase-A  
		  {func_1, "A resync",   "-1", A_resyn_col},    // A resync 
		  {func_1, "B-total",    "-1", B_total_col},    // B-total
		  {func_2, "First time", "-1", B_1time_col},    // first time of Amanda B
		  {func_2, "Delta time", "-1", B_detim_col},    // B-delta-time
		  {func_1, "Spase-B",    "-1", B_spase_col},    // Spase-B
		  {func_1, "B resync",   "-1", B_resyn_col},    // B resync
		  {func_1, "RawCoinCnt",  "0", rawcoin_col},    // Raw Coincidence Counter
		  {func_1, "CoinCnt",     "0",    coin_col},    // Coin Counter
		  {func_1, "UpDownCnt",   "0", updwcnt_col},    // Up-Down Counter
		  {func_1, "Evts w/dup",  "0", evtswdp_col},    // Events w/dup
		  {func_1, "Overlap",     "0", overlap_col},    // Overlap A-B 
		  {func_1, "GaspCnt",     "0", gaspcnt_col}};   // Gasp Counter

// The Size[Byte] of the 'key'-array divided by the Size[Byte] of 'key_type' 
// equals the number of entrys in key.
const key_no = sizeof(key) / sizeof(key_type);

//*******************************************

// This header is written to the output file before real data is send. It gives a short
// explanation of the columns listed. 

char* header[]={"/**********************************************************************\n",
                "/* A short list explaining in which column of this file the variables *\n",
                "/* from the                                                           *\n",
                "/* 'log' files  the variables are stored.                             *\n",
   		"/*  1) year		 2) day                  3) run number         *\n",
                "/*  4) file in the run 	 	  			       *\n",	
                "/*  5) A-total 		 6) A first time	 7) A Spase    *\n",
                "/*  8) A Delta time 	 9) A resync				       *\n",
                "/* 10) A-total 		11) A first time        12) A Spase    *\n",
                "/* 13) A Delta time 	14) A resync				       *\n",
                "/* 15) raw coinc. cnt.	16) coinc. counter	17) up down counter    *\n",
                "/* 18) events w/dp	19) overlap		20) GASP counter       *\n",
                "/* 21) check - an entry to measure the quality of the given file -    *\n",
                "/*             curently  not supported                                *\n",
                "/**********************************************************************\n",
                "\n"};

const header_lines = sizeof(header)/sizeof(char*) ; 

//*******************************************

ofstream out_file;                                   // the general output-file

//*******************************************

void error(char *mess)                               // fatal error 
{
  cerr << "ERROR: " << mess << " \n";
  exit(EXIT_FAILURE);
  return;
}

//*******************************************

void start(char* out_name)                           // basic startup
{
  out_file.open(out_name);                           // opens the output-file and append the data 
  if (!out_file) error ("Can't open output-file!");

  for (int line=0; line<header_lines; line++)        // Comment header of output-file
    {
      out_file << header[line]; 
    }
  return; 
}

//*******************************************

// name_check is a quite simple routine and may be enhanced to allow
// a more sophisticated data compression. There are about 50 files a day and thus 
// about 17500 files a year. ( Options like '<', '=' and '>' would be useful.)

int name_check(char* name)                           // checks file-name to be of format
{                                                    // "ab_X_X-X-X-X-ext-log" X in P({0,1,..,9})
  const char mask[row_length]="ab_*_*-*-*-ext-log";  // '*' means wildcard

    
  int mask_pos=0;
  int name_pos=0;
  int check = TRUE;
  int run = TRUE;

  while (check && run)
    {
      // move rigth as long as both strings equal
      while (name[name_pos]==mask[mask_pos] && name[name_pos])  
	{                                                    
	  name_pos++;
	  mask_pos++;
	}
      // Well, here they don't show the same character. Does the mask show a '*' ?
      // Then spkip characters til they equal each other again.
      if(mask[mask_pos]=='*')
	{
	  mask_pos++;
	  name_pos++;
	  check = FALSE;
	  // '*' is supposed to stand for digits only
	  const char jokers[]="0123456789";
	  while(name[name_pos]!=mask[mask_pos] && name[name_pos] && strchr(jokers,name[name_pos])!=NULL)
	    {
	      name_pos++;
	    }	    
	  if (name[name_pos]==mask[mask_pos]) check=TRUE;
	}
      // They are equal and the pointer is on the terminating zero or they are not equal.
      
      else
	{
	  if(name[name_pos]==0 && mask[mask_pos]==0)  run=FALSE;
	  else                                        check=FALSE;  
	}   
    }
  return check;
}

//*******************************************

// Extracts the run information from the filename taken from the list-file.

void extract_filename(char* name)                    
{
  const char delimiters[] = "abextlog_-";
  char* help;                                         // strtok changes would change name !
  help = strdup(name);
  if (!help) error("Memory allocation failed.");       
  line_out[run_no_col]   = strtok(help,delimiters);
  line_out[run_day_col]  = strtok(NULL,delimiters);
  line_out[run_year_col] = strtok(NULL,delimiters);
  line_out[run_file_col] = strtok(NULL,delimiters);
  return;
}

//*******************************************

// To get the digits you have to skip all the rest.
const char delimiters[]=" =(),;-/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

// Extracts the first number after the keyword
void extract_func_1(char* keyword, int col)     
{
  char* help;                                         // 'strtok' changes the string !
  help = strdup(keyword);                             
  if (!help) error("Memory allocation failed.");   
  line_out[col] = strtok(help,delimiters);
  return;
}

// Extracts the second number after the keyword
void extract_func_2(char* keyword, int col)      
{
  char* help;                                         // 'strtok' changes the string !
  help = strdup(keyword);
  if (!help) error("Memory allocation failed.");    
  char* dummy=strtok(help,delimiters );
  line_out[col] = strtok(NULL,delimiters);
  return;
}


// This little routine is the essential.
void extract_data_from_file(char* in_name)
{
  // Write the default values to the output line.
  for(int i=0; i<key_no; i++)                               
    {
      line_out[key[i].col]=key[i].def_value;
    }
  ifstream in_file(in_name);    
  if(!in_file) error("Can't open run-file.");
  char line_str[row_length];
  // Now I search every line in the given file for all of the predefined keywords. 
  // If a word is found the proper ( taken from the array ) function is called.
  in_file.getline(line_str,row_length,'\n');
  while (!in_file.eof())
    {
      in_file.getline(line_str,row_length,'\n');
      for(int j=0; j<key_no; j++)
  	{
	  char* keyword = strstr(line_str, key[j].word);
  	  if (keyword!=NULL)
	    {	      
	      switch(key[j].func)
		{
		case func_1:  {extract_func_1(keyword, key[j].col); break;} 
		case func_2:  {extract_func_2(keyword, key[j].col); break;}
		default:      {error ("Unknown extraction routine."); break;}
		}
	    }
	}
    }

  return;
}

//*******************************************

// computes an entry representating the quality of the data in the file
// '0' stands for the best quality

void data_check(void)
{
  double check = 0;

  for (int i=0; i<columns; i++)
    {
      if (i != check_col)                           // It would absurd to check the entry itself.
	{
	  if (line_out[i][0]=='-') 
	    {
	      check++;
	    }
	}
    }
  
  char help[row_length];
  sprintf(help,"%d",check); 
  line_out[check_col]=help;
  return;
}

// Writes a line to out_file.
void write_array()                             
{
  data_check();
  for(int i=0; i<columns; i++)
    {
      out_file << line_out[i] << "  ";
   }
  out_file << '\n';
  return;
}

//*******************************************


void end(void)                                        // ends the program
{
  out_file.close();
  return;
}

//*******************************************


int main(int argc, char *argv[])
{
  // We need exactly 2 arguments.
  if (argc==3)
  {                                            
    start(argv[2]);
    ifstream list_file(argv[1]);
    if (!list_file) error("Can't open list of files.");
    char file_name[row_length];
    list_file.getline(file_name,row_length,'\n');
    // For each line of the list try to open a input file.
    while (!list_file.eof())
      {
	if (name_check(file_name))
	  {
	    cout << "Extract: " << file_name << '\n';
 	    extract_filename(file_name);
	    extract_data_from_file(file_name);
	    write_array();
	  }
	else 
	  {
	    cout << endl << "The string '" << file_name << "' doesn't match the pattern." << '\n';
	    cout << "Press a key to continue." << endl;
	    int help=cin.get();
	  }
	list_file.getline(file_name,row_length,'\n');
      }
    end();
  }
  else error("\nUsage: extract file-list output-file.");
  return (EXIT_SUCCESS);
}