/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                      Copyright (c) 1995,1996                          */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, and modify this software and its            */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                   Author :  Paul Taylor                               */
/*                   Date   :  April 1994                                */
/*************************************************************************/

#include "EST.h"
#include "EST_pda.h"
#include "srpd.h"

void init_pda(void);
int read_next_wave_segment (EST_Wave &sig, struct Srpd_Op *paras, SEGMENT_ *p_seg);

EST_Track sub_do_srpd_fz(EST_Wave &sig, Srpd_Op &srpd_op);
struct Srpd_Op *default_srpd_op(struct Srpd_Op *srpd);
void parse_srpd_list(EST_Option &a_list, struct Srpd_Op *srpd);

EST_Track pda(EST_Wave &sig, EST_Option &op, EST_String method)
{
    EST_Track fz;
    if (method == "")
    {
	if (op.present("pda_method"))
	    method = op.val("pda_method");
    }
    if (method == "")	
	fz = do_srpd_fz(sig, op);
    else if  (method == "srpd")
	fz = do_srpd_fz(sig, op);
    else
	cerr << "Error: Unknown pda \"" << method << "\"" << endl;

    fz.set_contour_type(EST_ContourType::F0);
    if (op.present("pda_result_shift"))
	fz.sample(op.fval("pda_result_shift"));

    fz.set_field_name("F0", 0);

    return fz;
}

EST_Track icda(EST_Wave &sig, EST_Track &speech, EST_Option &op, EST_String method)
{ // intonation contour detection algorithm
    EST_Track raw_fz, fz;
    if (method == "")
    {
	if (op.present("pda_method"))
	    method = op.val("pda_method");
    }
    if (method == "")	
	raw_fz = do_srpd_fz(sig, op);
    else if  (method == "srpd")
	raw_fz = do_srpd_fz(sig, op);
    else
    {
	cerr << "Error: Unknown pda: " << method << endl;
	return raw_fz;
    }
//    cout << raw_fz;
//    cout << "hello\n";
    fz = smooth_phrase(raw_fz, speech, op);

    fz.set_contour_type(EST_ContourType::F0);
    if (op.present("icda_result_shift"))
	fz.sample(op.fval("icda_result_shift"));

    fz.set_field_name("F0", 0);

    return fz;
}

// note - this now takes a reference arg and hence the input waveform
// will be low pass filtered if that option is chosen.
EST_Track do_srpd_fz(EST_Wave &sig, EST_Option &op)
{
    Srpd_Op srpd_op;

    default_srpd_op(&srpd_op); // default values
    parse_srpd_list(op, &srpd_op); // override with options
    // This should be modified to account for missing key/vals

//    if (op.val("do_low_pass", 0) == "true")
//	low_pass_filter(sig, op.ival("lpf_cutoff", 1),op.ival("lpf_order", 1));

    if (op.val("do_low_pass", 0) == "true")
	FIRlowpass_filter(sig, op.ival("lpf_cutoff",1),op.ival("lpf_order",1));

    EST_Track fz = sub_do_srpd_fz(sig, srpd_op);

    fz.set_contour_type(EST_ContourType::F0);
    fz.set_space_type("FIXED");
    fz.set_break_type("MANY");

//    EST_Track fz2 = fz;
    fz.set_field_name("F0", 0);

    return fz;
}

EST_Track do_srpd_fz(EST_Wave &sig, Srpd_Op &srpd_op)
{
    return sub_do_srpd_fz(sig, srpd_op);
}

EST_Track do_srpd_fz(EST_Wave &sig)
{
    Srpd_Op srpd_op;
    default_srpd_op(&srpd_op);
    return sub_do_srpd_fz(sig, srpd_op);
}

EST_Track sub_do_srpd_fz(EST_Wave &sig, Srpd_Op &srpd_op)
{
    int i, rns, tracklen, j = 0;
    SEGMENT_ segment;
    CROSS_CORR_ cc;
    STATUS_ pda_status, held_status;
    srpd_op.sample_freq = sig.sample_rate();
    float min, max;
    min = srpd_op.min_pitch; // must store as set up routines corrupt
    max = srpd_op.max_pitch;

    initialise_structures (&srpd_op, &segment, &cc);
    initialise_status (&srpd_op, &pda_status);
    initialise_status (&srpd_op, &held_status);

//    cout << "Srpd Min " << srpd_op.min_pitch << endl;
//    cout << "Srpd Min " << srpd_op.max_pitch << endl;

    tracklen = (sig.num_samples() - segment.length) / segment.shift + 1;

    EST_Track fz(tracklen, 1);
    fz.amin = min;
    fz.amax = max;

    while ((rns = read_next_wave_segment (sig, &srpd_op, &segment)) != 0) 
    {
	if (rns == 2) 
	{
	    for (i = 0; i < cc.size; cc.coeff[i++] = 0.0);
	    initialise_status (&srpd_op, &pda_status);
	}
	else
	    super_resolution_pda (&srpd_op, segment, &cc, &pda_status);
	if (pda_status.s_h == HOLD) 
	{
	    held_status.pitch_freq = pda_status.pitch_freq;
	    held_status.v_uv = VOICED;
	    held_status.s_h = HELD;
	    held_status.cc_max = pda_status.cc_max;
	    held_status.threshold = pda_status.threshold;
	    continue;
	}
	if (held_status.s_h == HELD) 
	{
	    if (pda_status.pitch_freq == BREAK_NUMBER) 
	    {
		held_status.pitch_freq = BREAK_NUMBER;
		held_status.v_uv = UNVOICED;
	    }
	    held_status.s_h = SENT;
	    if (held_status.v_uv != VOICED) 
		fz.set_break(j);
	    fz.a(j++) = held_status.pitch_freq;
	}
	if (pda_status.v_uv != VOICED) 
	    fz.set_break(j);
	fz.a(j++) = pda_status.pitch_freq;
    }
    if (held_status.s_h == HELD) 
    {
	held_status.pitch_freq = BREAK_NUMBER;
	held_status.v_uv = UNVOICED;
	fz.set_break(j);
	fz.a(j++) = held_status.pitch_freq;
    }
    end_structure_use (&segment, &cc);
    float shift;
    shift = srpd_op.shift / 1000.0;
    fz.set_num_frames(j);
//    cout << "shift = " << shift << endl;
    fz.fill_time(shift, 0); // because srpd works in ms.

    return fz;
}
// This is a temporary low pass filerting routine that calls
// an audlab program afb. First the signal must be written out as an
// audlab file. 

void low_pass_filter(EST_Wave &sig, int cutoff, int order)
{
    char lpf_file[1000];
    char aud_file[1000];
    char system_str[1000];

    sprintf(aud_file,"/tmp/aud_%d",(int)getpid());
    sprintf(lpf_file,"/tmp/lpf_%d",(int)getpid());
    sig.save(aud_file, "audlab");
    sprintf(system_str, "/cstr/bin/afb -i %s -o %s -U %d -s %d", 
	    aud_file, lpf_file, cutoff, order );
    strcat(system_str, " -D -t lp -F -w Blackman -I 1 -O 1");
    printf("%s\n", system_str);
    system(system_str); // do lp filtering
    sprintf(system_str, "/bin/rm %s; /bin/rm %s", aud_file, lpf_file);
//    printf("%s\n", system_str);
    sig.load(lpf_file);   
    system(system_str); // remove temp files
}

struct Srpd_Op *default_srpd_op(struct Srpd_Op *srpd)
{ 
    srpd->L = DEFAULT_DECIMATION;
    srpd->min_pitch = DEFAULT_MIN_PITCH;
    srpd->max_pitch = DEFAULT_MAX_PITCH;
    srpd->shift = DEFAULT_SHIFT;
    srpd->length = DEFAULT_LENGTH;
    srpd->Tsilent = DEFAULT_TSILENT;
    srpd->Tmin = DEFAULT_TMIN;
    srpd->Tmax_ratio = DEFAULT_TMAX_RATIO;
    srpd->Thigh = DEFAULT_THIGH;
    srpd->Tdh = DEFAULT_TDH;
    srpd->make_ascii = 0;
    srpd->peak_tracking = 0;
    srpd->sample_freq = DEFAULT_SF;
      /* p_par->Nmax and p_par->Nmin cannot be initialised */
    return(srpd);
}

void parse_srpd_list(EST_Option &al, struct Srpd_Op *srpd)
{ 
    if (al.present("decimation"))
	srpd->L = al.ival("decimation");
    if (al.present("min_pitch"))
	srpd->min_pitch = al.fval("min_pitch");
    if (al.present("max_pitch"))
	srpd->max_pitch = al.fval("max_pitch");    
    if (al.present("pda_frame_shift"))
	srpd->shift = al.fval("pda_frame_shift") * 1000.0;
    if (al.present("pda_frame_length"))
	srpd->length = al.fval("pda_frame_length") * 1000.0;
    if (al.present("noise_floor"))
	srpd->Tsilent = al.ival("noise_floor");
    if (al.present("v2uv_coeff_thresh"))
	srpd->Thigh = al.dval("v2uv_coef_thresh");
    if (al.present("min_v2uv_coef_thresh"))
	srpd->Tmin = al.dval("min_v2uv_coef_thresh");
    if (al.present("v2uv_coef_thresh_ratio"))
	srpd->Tmax_ratio = al.dval("v2uv_coef_thresh_ratio");
    if (al.present("anti_doubling_thresh"))
	srpd->Tdh = al.dval("anti_doubling_thresh");
    if (al.present("peak_tracking"))
	srpd->peak_tracking = al.ival("peak_tracking");
    if (al.present("sample_frequency"))
	srpd->sample_freq = al.ival("sample_frequency");
}

void default_pda_options(EST_Option &al)
{
    al.add_item("min_pitch", "40.0");
    EST_String m = "max_pitch";
    al.add_item(m, "900.0");
//    al.add_item("max_pitch", "400.0");
    al.add_item("pda_frame_shift", "0.005");
    al.add_fitem("pda_frame_length", DEFAULT_LENGTH / 1000.0);
    al.add_item("lpf_cutoff", "600");
    al.add_item("lpf_order", "49");
    al.add_fitem("decimation", DEFAULT_DECIMATION);
    al.add_fitem("noise_floor", DEFAULT_TSILENT);
    al.add_fitem("min_v2uv_coef_thresh", DEFAULT_TMIN);
    al.add_fitem("v2uv_coef_thresh_ratio", DEFAULT_TMAX_RATIO);
    al.add_fitem("v2uv_coef_thresh", DEFAULT_THIGH);
    al.add_fitem("anti_doubling_thresh", DEFAULT_TDH);
    al.add_iitem("peak_tracking", 0);
}

EST_String options_pda_general(void)
{
    // The standard waveform input options 
    return
	EST_String("")+
        "-c <int>         Select channel from input data (starts from 0)\n"+
        "-n <int>         Number of channels of raw data\n"+
	"-L               perform low pass filtering on input\n" + 
	"-P               perform peak tracking\n" +
	"-R               don't perform low pass filtering on input\n" +
	"-fmin <int>      miniumum F0 value\n" +
	"-fmax <int>      maxiumum F0 value\n" +
	"-j <string>      <\"on\" \"off\"> join points in output\n" +
	"-l <float>       frame length in seconds.\n" +
	"-forder <int>    lp filter order (as an exponent of 2, eg 5)\n" +
	"-s <float>       frame shift in seconds.\n" +
	"-srpd            use srpd method\n" +
	"-u <int>         lp filter cutoff (eg 600\n";
}

EST_String options_pda_srpd(void)
{
    // The standard waveform input options 
    return
	EST_String("")+
	"-d <float>      decimation threshold (srpd\n" +
	"-n <float>      noise floor\n" +
	"-H <float>      unvoiced to voiced coeff threshold\n" +
	"-m <float>      min voiced to unvoiced coeff threshold\n" +
	"-r <float>      voiced to unvoiced coeff threshold-ratio\n" +
	"-t <float>      anti pitch doubling/halving threshold\n";
}


