// T1M_MM1PARAM.CPP

// Copyright (C) 2001 Tommi Hassinen.

// This program is free software; you can redistribute it and/or modify it
// under the terms of the license (GNU GPL) which comes with this package.

/*################################################################################################*/

#include "mpi++.h"

#include "notice.h"

#include "mm1eng9.h"
#include "mm1tab9.h"

#include <iomanip>
#include <strstream>
#include <algorithm>
using namespace std;

/*################################################################################################*/

int main(int argc, char * argv[])
{
	print_full_copyright_notice(cout);
	
/*##############################################*/
/*##############################################*/
/*
	// 2001-08-28 : generate inputs and a script for STAGE1 calculations (geometry optimization).
	
	const char ipath[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files";
	const char opath[] = "/home/thassine/DATABASE-mm1/organic_molecules/output_files/STAGE1";
	
	const char ifile1_name[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE1_files.txt";
	
	ifstream ifile1; ifile1.open(ifile1_name, ios::in);
	ofstream ofile1; ofile1.open("run_STAGE1.sh", ios::out); ofile1 << "#! /bin/sh" << endl;
	
	char buffer[65536];
	
	while (true)
	{
		if (ifile1.peek() == '#') break;	// #end
		
		char filename1[1024];
		
		ifile1 >> filename1;
		ifile1.getline(buffer, sizeof(buffer));
		
		cout << "reading file " << filename1 << endl;
		
		char filename2[1024];
		ostrstream str2(filename2, sizeof(filename2));
		str2 << ipath << "/" << filename1 << ends;
		
		mm1_mdl * mdl = new mm1_mdl(& cout, * console_class_factory::GetInstance());
		
		ifstream ifile2;
		ifile2.open(filename2, ios::in);
		if (ifile2.fail())
		{
			cout << "file operation failed!!!" << endl;
			ifile2.close(); delete mdl; return -1;
		}
		
		mdl->ReadStream(ifile2);
		ifile2.close();
		
		i32s search = strlen(filename1);
		while (search >= 0 && filename1[search] != '.') search--;
		if (search < 0) { cout << "search failed!!!" << endl; return -1; }
		filename1[search] = 0;
		
		const char * short_fn1 = filename1;
		for (i32s sfn = 0;sfn < strlen(filename1);sfn++)
		{
			if (filename1[sfn] == '/') short_fn1 = & filename1[sfn + 1];
		}
		
	const char stage[] = "S1";
	const char date[] = "oct2001";
	
//	const char procedure[] = "OPT_MP2";	// gaussian...
//	const char i_ext[] = ".g_in";
//	const char o_ext[] = ".g_out";
	const char procedure[] = "OPT_HF";	// mpqc...
	const char i_ext[] = ".m_in";
	const char o_ext[] = ".m_out";
		
		char filename3[1024];
		ostrstream str3(filename3, sizeof(filename3));
		str3 << opath << "/" << short_fn1 << "-" << stage << "-" << procedure << "-" << date << i_ext << ends;
		
	bool optimize = true;
	bool get_energy = true;
	bool get_gradient = true;
		
	//	gaussian_io_handler handler(* mdl);	// make gaussian inputs...
		mpqc_io_handler handler(* mdl);		// make mpqc inputs...
		
		handler.WriteInput(filename3, optimize, get_energy, get_gradient);
		delete mdl;
		
	//	ofile1 << "g98 < ";		// make a gaussian-script...
		ofile1 << "mpqc -f ";		// make an mpqc-script...
	
		ofile1 << filename1 << "-" << stage << "-" << procedure << "-" << date << i_ext;
		ofile1 << " > " << filename1 << "-" << stage << "-" << procedure << "-" << date << o_ext << endl;
	}
	
	ifile1.close();
	ofile1.close();
	
	cout << "generate files for STAGE1 OK" << endl;
//*/	
/*##############################################*/
/*##############################################*/
/*
	// 2001-10-29 : generate inputs and a script for STAGE3 calculations (single point).
	
	const char ipath[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE3";
	const char opath[] = "/home/thassine/DATABASE-mm1/organic_molecules/output_files/STAGE3";
	
	const char ifile1_name[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE3.txt";
	
	ifstream ifile1; ifile1.open(ifile1_name, ios::in);
	ofstream ofile1; ofile1.open("run_STAGE3.sh", ios::out); ofile1 << "#! /bin/sh" << endl;
	
	char buffer[65536];
	
	while (true)
	{
		if (ifile1.peek() == '#') break;	// #end
		
		char filename1[1024];
		
		ifile1 >> filename1;
		ifile1.getline(buffer, sizeof(buffer));
		
		cout << "reading file " << filename1 << endl;
		
		char filename2[1024];
		ostrstream str2(filename2, sizeof(filename2));
		str2 << ipath << "/" << filename1 << ends;
		
		mm1_mdl * mdl = new mm1_mdl(& cout, * console_class_factory::GetInstance());
		
		ifstream ifile2;
		ifile2.open(filename2, ios::in);
		if (ifile2.fail())
		{
			cout << "file operation failed!!!" << endl;
			ifile2.close(); delete mdl; return -1;
		}
		
		mdl->ReadStream(ifile2);
		ifile2.close();
		
		i32s search = strlen(filename1);
		while (search >= 0 && filename1[search] != '.') search--;
		if (search < 0) { cout << "search failed!!!" << endl; return -1; }
		filename1[search] = 0;
		
	const char stage[] = "S3";
	const char date[] = "oct2001";
	
	const char procedure[] = "SP_MP2";	// gaussian...
	const char i_ext[] = ".g_in";
	const char o_ext[] = ".g_out";
//	const char procedure[] = "SP_HF";	// mpqc...
//	const char i_ext[] = ".m_in";
//	const char o_ext[] = ".m_out";
	
		char filename3[1024];
		ostrstream str3(filename3, sizeof(filename3));
		str3 << opath << "/" << filename1 << "-" << stage << "-" << procedure << "-" << date << i_ext << ends;
		
	bool optimize = false;
	bool get_energy = true;
	bool get_gradient = true;
		
		gaussian_io_handler handler(* mdl);	// make gaussian inputs...
	//	mpqc_io_handler handler(* mdl);		// make mpqc inputs...
		
		handler.WriteInput(filename3, optimize, get_energy, get_gradient);
		delete mdl;
		
		ofile1 << "g98 < ";		// make a gaussian-script...
	//	ofile1 << "mpqc -f ";		// make an mpqc-script...
	
		ofile1 << filename1 << "-" << stage << "-" << procedure << "-" << date << i_ext;
		ofile1 << " > " << filename1 << "-" << stage << "-" << procedure << "-" << date << o_ext << endl;
	}
	
	ifile1.close();
	ofile1.close();
	
	cout << "generate files for STAGE3 OK" << endl;
//*/	
/*##############################################*/
/*##############################################*/
/*
	// 2001-10-02 : read in outputs to collect data in STAGE1; write out the STAGE1 force field.
	
	const char files_stage1[] = "/home/thassine/DATABASE-mm1/organic_molecules/output_files/STAGE1.txt";
	
	const char forcefield_path[] = "/home/thassine/DATABASE-mm1/organic_molecules/parameters";
	
	const char ipath1[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/ALLFILES";
	const char ipath2[] = "/home/thassine/DATABASE-mm1/organic_molecules/output_files/STAGE1";
	
	ifstream ifile1;
	
	char buffer[65536];
	
	// read in and append STAGE1 data; determine valid atomtypes and sort them, and determine valid terms.
	
	prmfit_tables * tab = NULL;	// calling ff.ValidateAtomTypesSTAGE1() will validate atomtypes.txt.out file...
	
	for (i32s loop = 0;loop < 3;loop++)
	{
		if (loop == 0 || loop == 2)
		{
			cout << "now creating object : prmfit_tables..." << endl;
			tab = new prmfit_tables(forcefield_path);
		}
		
		if (loop == 0)
		{
			cout << "reading in the initial atomtypes..." << endl;
			tab->InitSTAGE1();
		}
		
		ifile1.open(files_stage1, ios::in);
		
		while (true)
		{
			if (ifile1.peek() == '#') break;	// #end
			
			char filename1[1024];
			
			ifile1 >> filename1;
			ifile1.getline(buffer, sizeof(buffer));
			
			cout << "reading in " << filename1 << " : ";
			
			const char * short_fn1 = filename1;
			for (i32s sfn = 0;sfn < strlen(filename1);sfn++)
			{
				if (filename1[sfn] == '/') short_fn1 = & filename1[sfn + 1];
			}
			
			char filename2[1024];
			ostrstream str2(filename2, sizeof(filename2)); str2 << ipath1 << "/";
			i32s fn2 = 0; while (short_fn1[fn2] != '#') str2 << short_fn1[fn2++];
			str2 << short_fn1[fn2 + 0] << short_fn1[fn2 + 1] << short_fn1[fn2 + 2];
			str2 << ".gpr" << ends;
			
			mm1_mdl * mdl = new mm1_mdl(& cout, * console_class_factory::GetInstance());
			
			ifstream ifile2;
			ifile2.open(filename2, ios::in);
			if (ifile2.fail())
			{
				cout << "could not open project file!!!" << endl;
				ifile2.close(); delete mdl; return -1;
			}
			
			mdl->ReadStream(ifile2);
			ifile2.close();
			
			i32s search = strlen(filename1);
			while (search >= 0 && filename1[search] != '.') search--;
			if (search < 0) { cout << "search failed!!!" << endl; return -1; }
			
			file_io_handler * handler = NULL;
			switch (filename1[search + 1])
			{
				case 'g':	cout << "gaussian";
				handler = new gaussian_io_handler(* mdl);
				break;
				
				case 'm':	cout << "mpqc";
				handler = new mpqc_io_handler(* mdl);
				break;
				
				default:
				cout << "unknown output type id " << filename1[search + 1] << endl;
				return -1;
			}
			
			char filename3[1024];
			ostrstream str3(filename3, sizeof(filename3));
			str3 << ipath2 << "/" << filename1 << ends;
			
			bool success = handler->ReadOutput(filename3, true, true, true);
			if (!success) cout << " FAILED!!!"; cout << endl;
			
			switch (loop)
			{
				case 0:
				tab->AddCaseSTAGE1a(handler);
				break;
				
				case 1:
				tab->AddCaseSTAGE1b(handler);
				break;
				
				case 2:
				tab->AddCaseSTAGE1c(handler);
				break;
			}
		}
		
		ifile1.close();
		
		if (loop == 0)
		{
			cout << "now validating the atomtypes..." << endl;
			tab->ValidateAtomTypesSTAGE1a();
		}
		
		if (loop == 1)
		{
			cout << "now validating the atomtypes..." << endl;
			tab->ValidateAtomTypesSTAGE1b();
		}
		
		if (loop == 2)
		{
			cout << "now calculating STAGE1 parameters..." << endl;
			tab->CalcParamSTAGE1();
			
			cout << "now saving the initial param files..." << endl;
			tab->WriteAtomTypes(); tab->WriteParamFiles();
		}
		
		if (loop == 1 || loop == 2)
		{
			cout << "now deleting object : prmfit_tables..." << endl;
			delete tab; tab = NULL;
		}
	}
	
	cout << "STAGE 1 parameter set OK" << endl;
//*/
/*##############################################*/
/*##############################################*/

// stage2???

/*##############################################*/
/*##############################################*/
/*
	// 2001-10-29 : generate randomly distorted structures for STAGE3.
	
	const char forcefield_path[] = "/home/thassine/DATABASE-mm1/organic_molecules/parameters";
	
	const char ipath[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files";
	const char opath[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE3";
	
	const char ifile1_name[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE1_files.txt";
	
	char buffer[65536];
	
	prmfit_tables * tab = new prmfit_tables(forcefield_path);
	
	ifstream ifile1;
	ifile1.open(ifile1_name, ios::in);
	
	while (true)
	{
		if (ifile1.peek() == '#') break;	// #end
		
		char filename1[1024];
		
		ifile1 >> filename1;
		ifile1.getline(buffer, sizeof(buffer));
		
		cout << "reading in " << filename1 << " : ";
		
		char filename2[1024];
		ostrstream str2(filename2, sizeof(filename2));
		str2 << ipath << "/" << filename1 << ends;
		
		const char * short_fn1 = filename1;
		for (i32s sfn = 0;sfn < strlen(filename1);sfn++)
		{
			if (filename1[sfn] == '/') short_fn1 = & filename1[sfn + 1];
		}
		
		for (i32s n1 = 0;n1 < 10;n1++)		// the number of distorted structures.
		{
			mm1_mdl * mdl = new mm1_mdl(& cout, * console_class_factory::GetInstance());
			
			ifstream ifile2;
			ifile2.open(filename2, ios::in);
			if (ifile2.fail())
			{
				cout << "could not open project file!!!" << endl;
				ifile2.close(); delete mdl; return -1;
			}
			
			mdl->ReadStream(ifile2);
			ifile2.close();
			
			cout << n1 << " " << endl;
			
			// now, make an eng-object, and modify the parameters randomly;
			// then run geomopt, and save the resulting distorted structure.
			
			mm1_eng_prmfit * eng = new mm1_eng_prmfit(* mdl, * tab);
			CopyCRD(mdl, eng, 0); tab->DistortStructureSTAGE3(eng);
			
			mm1_geomopt * opt = new mm1_geomopt(eng, 100, 0.025);
			
			for (int n2 = 0;n2 < 1000;n2++)		// 1000 steps is enough???
			{
				opt->TakeCGStep(conjugate_gradient::Newton2An);
				
				cout << "step = " << (n2 + 1) << "   ";
				cout << "energy = " << opt->optval << "   ";
				cout << "step length = " << opt->optstp << endl;
			}
			
			delete opt;
			
			CopyCRD(eng, mdl, 0);
			delete eng;
			
			char filename3[1024];
			ostrstream str3(filename3, sizeof(filename3)); str3 << opath << "/";
			i32s fn3 = 0; while (short_fn1[fn3] != '#') str3 << short_fn1[fn3++];
			str3 << "#d" << short_fn1[fn3 + 1] << short_fn1[fn3 + 2];
			str3 << setw(2) << setfill('0') << n1;
			str3 << ".gpr" << ends;
			
			ofstream ofile1; ofile1.open(filename3, ios::out);
			mdl->WriteStream(ofile1); ofile1.close();
			
			delete mdl;
		}
	}
	
	ifile1.close();
	delete tab;
	
	cout << "stage3 file generation #1 OK" << endl;
//*/
/*##############################################*/
/*##############################################*/
/*
	// 2001-10-30 : generate structures with torsions twisted for STAGE3.
	
uusi ic-pohjainen systeemi, ei ole viel kokeiltu kunnolla!!! tarkasta tulostiedostot ett menee oikein...
uusi ic-pohjainen systeemi, ei ole viel kokeiltu kunnolla!!! tarkasta tulostiedostot ett menee oikein...
uusi ic-pohjainen systeemi, ei ole viel kokeiltu kunnolla!!! tarkasta tulostiedostot ett menee oikein...
	
	const char forcefield_path[] = "/home/thassine/DATABASE-mm1/organic_molecules/parameters";
	
	const char ipath[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files";
	const char opath[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE3";
	
	const char ifile1_name[] = "/home/thassine/DATABASE-mm1/organic_molecules/input_files/STAGE1_torsions.txt";
	
	char buffer[65536];
	
	prmfit_tables * tab = new prmfit_tables(forcefield_path);
	
	ifstream ifile1;
	ifile1.open(ifile1_name, ios::in);
	
	while (true)
	{
		if (ifile1.peek() == '#') break;	// #end
		
		i32s ind[2]; f64 inc; i32s cnt; char id; char filename1[1024];
		
		ifile1 >> ind[0] >> ind[1] >> inc >> cnt >> id >> filename1;
		ifile1.getline(buffer, sizeof(buffer));
		
		cout << "reading in " << filename1 << " : ";
		
		char filename2[1024];
		ostrstream str2(filename2, sizeof(filename2)); str2 << ipath << "/";
		str2 << ipath << "/" << filename1 << ends;
		
		const char * short_fn1 = filename1;
		for (i32s sfn = 0;sfn < strlen(filename1);sfn++)
		{
			if (filename1[sfn] == '/') short_fn1 = & filename1[sfn + 1];
		}
		
	// here we might make multiple copies of some of the conformations, both in S1 and S3 datasets.
	// but in S1 datasets they are geometry optimized structures though. difficult to avoid...
	// it boils down to what data use in fitting at all. do not use S1 there?!?!?!
		
		mm1_mdl * mdl = new mm1_mdl(& cout, * console_class_factory::GetInstance());
		
		ifstream ifile2;
		ifile2.open(filename2, ios::in);
		if (ifile2.fail())
		{
			cout << "could not open project file!!!" << endl;
			ifile2.close(); delete mdl; return -1;
		}
		
		mdl->ReadStream(ifile2);
		ifile2.close();
		
	// find a set of atoms that define the torsion. this set is not ambiguous, but we don't
	// have to worry about that if we use the lock_local_structure option when setting constraints...
	// try to select atoms with highest atomic number if possible -> main chain atoms in most cases?
		
		mm1_atom * atmr[4];
		
		mdl->UpdateIndex();
		for (i32s n2 = 0;n2 < 2;n2++)
		{
			iter_mm1al it1 = mdl->GetAtomsBegin();
			while (it1 != mdl->GetAtomsEnd())
			{
				if ((* it1).index == ind[n2]) break;
				else it1++;
			}
			
			if (it1 == mdl->GetAtomsEnd())
			{
				cout << "invalid atom index was found!!!" << endl;
				exit(EXIT_FAILURE);
			}
			
			atmr[n2 + 1] = & (* it1);
		}
		
		i32s atnum;
		iter_mm1cl iter;
		
		for (atnum = -1, iter = atmr[1]->cr_list.begin();iter != atmr[1]->cr_list.end();iter++)
		{
			if ((* iter).atmr == atmr[2]) continue;
			if ((* iter).atmr->el.GetAtomicNumber() < atnum) continue;
			
			atmr[0] = (* iter).atmr; atnum = atmr[0]->el.GetAtomicNumber();
		}
		
		for (atnum = -1, iter = atmr[2]->cr_list.begin();iter != atmr[2]->cr_list.end();iter++)
		{
			if ((* iter).atmr == atmr[1]) continue;
			if ((* iter).atmr->el.GetAtomicNumber() < atnum) continue;
			
			atmr[3] = (* iter).atmr; atnum = atmr[3]->el.GetAtomicNumber();
		}
		
	// see mm1_docv::DoEnergyPlot1D() for a similar example...
	// see mm1_docv::DoEnergyPlot1D() for a similar example...
	// see mm1_docv::DoEnergyPlot1D() for a similar example...
		
		mdl->GatherGroups();		// for internal coordinates...
		
		const i32s molnum = 0; const i32s in_crdset = 0;
		mm1_intcrd * ic = new mm1_intcrd((* mdl), molnum, in_crdset);
		i32s ict1 = ic->FindTorsion(atmr[1]->crd_vector[in_crdset].data, atmr[2]->crd_vector[in_crdset].data);
	if (ict1 < 0) { cout << "could not find ic tor1." << endl; exit(EXIT_FAILURE); }
		
		v3d<fGL> v1a(atmr[1]->crd_vector[in_crdset].data, atmr[0]->crd_vector[in_crdset].data);
		v3d<fGL> v1b(atmr[1]->crd_vector[in_crdset].data, atmr[2]->crd_vector[in_crdset].data);
		v3d<fGL> v1c(atmr[2]->crd_vector[in_crdset].data, atmr[3]->crd_vector[in_crdset].data);
		f64 oldt1 = v1a.tor(v1b, v1c);
		
		mm1_eng_prmfit * eng = new mm1_eng_prmfit(* mdl, * tab);
		i32s fft1 = eng->FindTorsion(atmr[0], atmr[1], atmr[2], atmr[3]);
	if (ict1 < 0) { cout << "could not find ff tor1." << endl; exit(EXIT_FAILURE); }
		
		f64 tor1 = oldt1;
		for (i32s n1 = 0;n1 < (cnt + 1);n1++)		// the number of structures.
		{
			ic->SetTorsion(ict1, tor1 - oldt1);
			ic->UpdateCartesian();
			
			CopyCRD(mdl, eng, 0);	// lock_local_structure needs coordinates!!!
			eng->SetTorsionConstraint(fft1, tor1, 5000.0, true);
			
			mm1_geomopt * opt = new mm1_geomopt(eng, 100, 0.025);
			
			for (int n2 = 0;n2 < 500;n2++)		// 500 steps is enough???
			{
				opt->TakeCGStep(conjugate_gradient::Newton2An);
				
				cout << "step = " << (n2 + 1) << "   ";
				cout << "energy = " << opt->optval << "   ";
				cout << "step length = " << opt->optstp << endl;
			}
			
			delete opt;
			
			CopyCRD(eng, mdl, 0);
			
			char filename3[1024];
			ostrstream str3(filename3, sizeof(filename3)); str3 << opath << "/";
			i32s fn3 = 0; while (short_fn1[fn3] != '#') str3 << short_fn1[fn3++];
			str3 << "#t" << id << short_fn1[fn3 + 1] << short_fn1[fn3 + 2];
			str3 << setw(2) << setfill('0') << n1;
			str3 << ".gpr" << ends;
			
			ofstream ofile1; ofile1.open(filename3, ios::out);
			mdl->WriteStream(ofile1); ofile1.close();
			
			tor1 += inc;
		}
		
		delete ic;
		delete eng;
		delete mdl;
	}
	
	ifile1.close();
	delete tab;
	
	cout << "stage3 file generation #2 OK" << endl;
//*/
/*##############################################*/
/*##############################################*/
///*
	// 2001-11-12 : the actual parameter fitting.
	
	const char forcefield_path[] = "/home/thassine/DATABASE-mm1/organic_molecules/parameters";
	
	prmfit_cg_optim opt(forcefield_path);
	
//	opt.SetDebugLevel(1); opt.GetValue();	// look for failed cases....
//	opt.SetDebugLevel(2); opt.GetValue();
//	opt.SetDebugLevel(1); opt.GetGradient();
	opt.SetDebugLevel(0); opt.Check(1);
	
	cout << "parameter fitting OK" << endl;
//*/
/*##############################################*/
/*##############################################*/

}

/*################################################################################################*/

// eof
