/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                      Copyright (c) 1995,1996                          */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                 Authors:  Simon King                                  */
/*                 Date   :  July 1995                                   */
/*-----------------------------------------------------------------------*/
/*                 EST_Ngrammar test program                             */
/*                                                                       */
/*=======================================================================*/
#include "EST.h"
#include "EST_Ngrammar.h"

int main(int argc, char **argv)
{
    //int order;
    EST_StrList files,script;
    EST_Option al, op;
    EST_String wordlist_file, script_file, in_file, format;
    EST_String prev_tag, prev_prev_tag, last_tag;
    EST_TBI *p;
    //EST_Ngrammar::representation_t representation = 
    //EST_Ngrammar::dense;

    EST_StrList wordlist;
    EST_Ngrammar ngrammar;
    bool per_file_stats=false;
    bool raw_stats=false;
    bool brief=false;
    EST_String input_format;

    double raw_entropy,count,entropy,perplexity,total_raw_H,total_count;
    total_count = 0;
    total_raw_H = 0;

    parse_command_line2(argc, argv, 
	 EST_String("Usage:\n")+
         "test_ngram -g <grammar filename> <options> <input files>\n"+
	 " (use \"-\" to make input/output files stdin/out)\n"+
	 "-g <ifile>   grammar file (required)\n"+
	 "-w <ifile>   filename containing word list (required for some grammar formats)\n"+
	 "-S <ifile>   script file\n"+
         "-raw_stats   print unnormalised entropy and sample count\n"+
         "-brief       print results in brief format\n"+
         "-f           print stats for each file\n"+
   	 "\n"+
	 "-input_format <string>\n"+
         "             format of input data (default sentence_per_line)\n"+
	 "             may also be sentence_per_file, or ngram_per_line.\n"+
         "\n"+
	 "Pseudo-words :\n"+
	 "-prev_tag <string>\n"+
         "             tag before sentence start\n"+
	 "-prev_prev_tag <string>\n"+
         "             all words before 'prev_tag'\n"+
	 "-last_tag <string>\n"+
         "             after sentence end\n"+
	 "-default_tags\n"+
         "             use default tags of "+SENTENCE_START_MARKER+
			","+SENTENCE_END_MARKER+" and "+SENTENCE_END_MARKER+"\n"+
         "             respectively\n",
			files, al);


    if (al.present("-w"))
	wordlist_file = al.val("-w");
    else{
	wordlist_file = "";
    }

    if (al.present("-f"))
	per_file_stats = true;
    if (al.present("-input_format"))
	input_format = al.val("-input_format");
    else
	input_format = "";

    if ( al.present("-raw_stats") || al.present("-r"))
	raw_stats = true;

    if ( al.present("-brief") || al.present("-b") )
	brief = true;


    if (al.present("-default_tags"))
    {	
	prev_tag = SENTENCE_START_MARKER;
	prev_prev_tag = SENTENCE_END_MARKER;
	last_tag = SENTENCE_END_MARKER;
    }
    
    if (al.present("-prev_tag"))
    {
	if (al.present("-default_tags"))
	    cerr << "test_ngram: WARNING : -prev_tag overrides -default_tags"
		<< endl;
	prev_tag = al.val("-prev_tag");
    }

    if (al.present("-prev_prev_tag"))
    {
	if (al.present("-default_tags"))
	    cerr << "test_ngram: WARNING : -prev_prev_tag overrides -default_tags" << endl;
	prev_prev_tag = al.val("-prev_prev_tag");
    }	
    
    if (al.present("-last_tag"))
    {
	if (al.present("-default_tags"))
	    cerr << "test_ngram: WARNING : -last_tag overrides -default_tags" << endl;
	last_tag = al.val("-last_tag");
    }

    if (   ( (prev_tag=="") ||  (prev_prev_tag=="") || (last_tag=="") )
	&& ( (prev_tag!="") ||  (prev_prev_tag!="") || (last_tag!="") )   )
    {
	cerr << "test_ngram: ERROR : if any tags are given, ALL must be given" << endl;
	exit(1);
    }


    // script
    if (al.present("-S"))
    {
	script_file = al.val("-S");
    
	if(load_StrList(script_file,script) != format_ok)
	{
	    cerr << "test_ngram: Could not read script from file " 
		<< script_file << endl;
	    exit(1);
	}
    }

    if (al.present("-g"))
	in_file = al.val("-g");
    else
    {
	cerr << "test_ngram: Must give a grammar filename using -g" << endl;
	exit(1);
    }

    // plus any files on command line
    // except file "-" unless there is no script
    if(script.head()==NULL)
	script += files;
    else
	for(p=files.head();p!=0;p=next(p))
	    if(files(p) != "-")
		script.append(files(p));

    if(script.head() == NULL)
    {
	cerr << "test_ngram: No test files given" << endl;
	exit(1);
    }

    if (wordlist_file != "")
    {
	// load wordlist
	if (load_StrList(wordlist_file,wordlist) != format_ok)
	{
	    cerr << "test_ngram: Could not read wordlist from file " << wordlist_file
		<< endl;
	    exit(1);
	}
    
	// load grammar using wordlist
	if (ngrammar.load(in_file,wordlist) != format_ok)
	{
	    cerr << "test_ngram: Failed to load grammar" << endl;
	    exit(1);
	}
    }
    else
    {
	if (ngrammar.load(in_file) != format_ok)
	{
	    cerr << "test_ngram: Failed to load grammar" << endl;
	    exit(1);
	}
    }

    if (!brief)
    {
	cout << "Ngram Test Results" << endl;
	cout << "==================" << endl;
    }

    for (p = script.head(); p; p = next(p))
    {
	// test each file
	if(ngrammar.stats(script(p),
			  raw_entropy,count,
			  entropy,perplexity,
			  prev_tag,
			  prev_prev_tag,
			  last_tag,
			  input_format))
	{
	    total_raw_H += raw_entropy;
	    total_count += count;
	    
	    if(per_file_stats)
	    {
		if (brief)
		    cout << basename(script(p)) << " \t";
		else
		    cout << script(p) << endl;

		if(raw_stats)
		{
		    if (brief)
			 cout << raw_entropy << " " << count << " ";
		    else
		    {
			cout << " raw entropy " << raw_entropy << endl;
			cout << " count       " << count << endl;
		    }
		}
		
		if (brief)
		    cout << entropy << " " << perplexity << endl;
		else
		{
		    cout << " entropy     " << entropy << endl;
		    cout << " perplexity  " << perplexity << endl << endl;
		}
	    }
	}
	else
	{
	    cerr << "test_ngram: WARNING : file '" << script(p)
		<< "' could not be processed" << endl;
	}
	
    }
    if (total_count > 0)
    {
	if (!brief)
	    cout << "Summary for grammar " << in_file << endl;
	else
	    if (per_file_stats)
		cout << "summary \t";

	if(raw_stats)
	{
	    if (brief)
		cout << total_raw_H << " " << total_count << " ";
	    else
	    {
		cout << " raw entropy " << total_raw_H << endl;
		cout << " count       " << total_count << endl;
	    }
	}
	if (brief)
	{
	    cout << total_raw_H / total_count;
	    cout << " " << pow(2.0,total_raw_H / total_count);
	    cout << endl;
	}
	else
	{
	    cout << " entropy     " << total_raw_H / total_count << endl;
	    cout << " perplexity  " <<  pow(2.0,total_raw_H / total_count);
	    cout << endl;
	}
    }
    else
    {
	cerr << "test_ngram: No data processed" << endl;
    }
    
    // everything went okay
    return 0;
}


void override_lib_ops(EST_Option &a_list, EST_Option &al)
{
    (void)a_list;
    (void)al;
}
