/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                        Copyright (c) 1997                             */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute and sell this software   */
/*  and its documentation for any purpose is  hereby granted without     */
/*  fee, subject to the following conditions:                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                Author :  Alan W Black (awb@cstr.ed.ac.uk)             */
/*                          Richard Caley (rjc@cstr.ed.ac.uk)            */
/*                Date   :  January 1997                                 */
/*-----------------------------------------------------------------------*/
/*                                                                       */
/*  A non-GNU implementation of EST_String class to use with compilers that  */
/*  aren't GNU C++                                                       */
/*  Note this isn't comprehensive and just covers the things we use      */
/*                                                                       */
/*************************************************************************/

#ifndef __EST_STRING_H__
#define __EST_STRING_H__

// For better libg++ compatibility. See ReadMe for 

// #define __FSF_COMPATIBILITY__ (1)

// This will cause gsub to use a local table of substitution points
// walloced for each gsub. Otherwise one global one is used which
// should be fster, but non reentrant.

#define __GSUB_REENTRANT__ (1)

// Warn when getting a writable version of a shared chunk --
// useful for minimising copies.

#define __INCLUDE_CHUNK_WARNINGS__ (1)

// Whether or not to inline code for assignment etc.

//  Not inlining the string functions significant reduces the size
//  of the code without (measurably) affecting the speed
// #define __STRING_INLINE_AGGRESSIVELY__ (1)

#if defined(__STRING_INLINE_AGGRESSIVELY__)
#    define SII(BODY) BODY
#else
#    define SII(BODY) /* empty */
#endif

// means gripe about wierd arguments like Nulls
#define STRING_ARG_GRIPE (1)

// means when we find something to gripe about we die then and there.
#define GRIPE_FATAL (1)

#if defined(GRIPE_FATAL)
extern "C" void abort(void);
#    define gripe(WHAT) (cerr<< ("oops! " WHAT "\n"),abort())
#else
#    define gripe(WHAT) (cerr<< ("oops! " WHAT "\n"))
#endif

#define NON_CONST_CHUNKPTR(EXP)  *((EST_ChunkPtr *)&(EXP))

class EST_String;
class EST_Regex;

#include <string.h>
#include "EST_iostream.h"
#include <limits.h>
#include "EST_Chunk.h"
#include "EST_walloc.h"
#include "EST_strcasecmp.h"

#if defined(STRING_ARG_GRIPE)
#   define safe_strlen(S) ((S)?strlen(S):(gripe("null strlen"),0))
#   define CHECK_STRING_ARG(S) if (!(S)) gripe("null string arg")
#else
#   define safe_strlen(S) ((S)?strlen(S):0)
#   define CHECK_STRING_ARG(S) /* empty */
#endif

extern EST_String EST_String_nullString;

class EST_String {
private:
  static const EST_String Empty;
  public:
   typedef int EST_string_size;
#  define MAX_STRING_SIZE (INT_MAX)

   static const char *version;

  private:
    EST_ChunkPtr memory;
    EST_string_size size;

    // Make sure this is exactly the same as an EST_String. This is being too
    // clever by half.
  
   struct EST_dumb_string {
     EST_ChunkPtr memory;
     EST_string_size size;
   } ;

   enum EST_chop_direction {
     Chop_Before = -1,
     Chop_At     = 0,
     Chop_After  = 1
   };

  // private constructor which uses the buffer given.
    EST_String(int len, EST_ChunkPtr cp) {
      size=len;
      memory = cp;
    };

    int shareing (void) { return memory.shareing();};

    int locate(const char *it, int len, int from, int &start, int &end) const;
    int locate(const EST_String &s, int from, int &start, int &end) const
		{ return locate((const char *)s.memory, s.size, from, start, end); };
    int locate(EST_Regex &ex, int from, int &start, int &end, int *starts=NULL, int *ends=NULL) const;

    int extract(const char *it, int len, int from, int &start, int &end) const;
    int extract(const EST_String &s, int from, int &start, int &end) const
		{ return extract((const char *)s.memory, s.size, from, start, end); };
    int extract(EST_Regex &ex, int from, int &start, int &end) const;

    EST_String chop_internal(const char *s, int length, int pos, EST_chop_direction directionult) const;
    EST_String chop_internal(int pos, int length, EST_chop_direction directionult) const;
    EST_String chop_internal(EST_Regex &ex, int pos, EST_chop_direction directionult) const;

   int gsub_internal(const char *os, int olength, const char *s, int length);
   int gsub_internal(EST_Regex &ex, const char *s, int length);

   int split_internal(EST_String result[], int max, const EST_String* s_seperator, EST_Regex *re_separator, char quote) const;

  public:

    // Public creators and destructors. inlinedsince they are basicly just trivial
    // wrappers around EST_Chunk operations.

    EST_String(void) {size=0;memory=NULL;};

    EST_String(const char *s) SII({
      CHECK_STRING_ARG(s);
      
      size=safe_strlen(s);

       if (size != 0)
	 memory = chunk_allocate(size+1, s, size); 
       else 
	 memory=NULL;
    });


    EST_String(const char *s, int start_or_fill, int len) SII({

      if (s)
	{
	  int start= start_or_fill;
	  if (len <0)
	    len=safe_strlen(s)-start;
      
	  size=len;
	  if (size != 0)
	    memory = chunk_allocate(len+1, s+start, len);
	  else
	    memory=NULL;
	}
      else
	{
	  char fill = start_or_fill;
	  if (len<0) len=0;
	  size=len;
	  if (size != 0)
	    {
	      memory = chunk_allocate(len+1);
	      char *p = memory;
	      for(int i=0; i<len;i++)
		p[i] = fill;
	      p[len]='\0';
	    }
	  else
	    memory=NULL;
	}
    });

    EST_String(const char *s, int s_size, int start, int len) SII({
      CHECK_STRING_ARG(s);
      
      if (len <0)
	len=s_size-start;

      size=len;
      if (size != 0)
	memory = chunk_allocate(len+1, s+start, len);
      else
	memory=NULL;
    });

    EST_String(const EST_String &s, int start, int len) SII({
      if (len <0)
	len=s.size-start;
      
      size=len;

      if (start == 0 && len == s.size)
	memory = NON_CONST_CHUNKPTR(s.memory);
      else if (size != 0)
	memory = chunk_allocate(len+1, s.memory, start, len);
      else
	memory = NULL;
    });

    // we have to declare our own copy constructor to lie to the
    // compier about the constness of the RHS.
    EST_String(const EST_String &s) SII({
      *(struct EST_dumb_string *)this = *(struct EST_dumb_string *)(&s);
      });

#if defined(__FSF_COMPATABILITY__)
    EST_String(const char c) SII({
      size=1;
      memory= chunk_allocate(2, &c, 1);
      });
#endif

    ~EST_String() {
      size=0;
      memory=NULL;
      };

    int length(void) const { return size; }
    int space (void) const { return memory.size(); };
    const char *str(void) const { return size==0?"":(const char *)memory; }
    char *updatable_str(void) { return size==0?(char *)"":(char *)memory; }

    static EST_String FromChar(const char c) 
                { const char s[2] = { c, 0 }; return EST_String(s); };

    EST_String before(int pos, int len=0) const
		{ return chop_internal(pos, len, Chop_Before); };
    EST_String before(const char *s, int pos=0) const
		{ return chop_internal(s, safe_strlen(s), pos, Chop_Before); };
    EST_String before(const EST_String &s, int pos=0) const
		{ return chop_internal(s.str(), s.size, pos, Chop_Before); };
    EST_String before(EST_Regex &e, int pos=0) const
		{ return chop_internal(e,  pos, Chop_Before); };

    // specially inlined case for a common use.
    EST_String at(int from, int len=0) const
		{return EST_String(str(),size,from<0?(size+from):from,len);};
    EST_String at(const char *s, int pos=0) const
		{ return chop_internal(s, safe_strlen(s), pos, Chop_At); };
    EST_String at(const EST_String &s, int pos=0) const
		{ return chop_internal(s.str(), s.size, pos, Chop_At); };
    EST_String at(EST_Regex &e, int pos=0) const
		{ return chop_internal(e,  pos, Chop_At); };

    EST_String after(int pos, int len=1) const
		{ return chop_internal(pos, len, Chop_After); };
    EST_String after(const char *s, int pos=0) const
		{ return chop_internal(s, safe_strlen(s), pos, Chop_After); };
    EST_String after(const EST_String &s, int pos=0) const
		{ return chop_internal(s.str(), s.size, pos, Chop_After); };
    EST_String after(EST_Regex &e, int pos=0) const
		{ return chop_internal(e,  pos, Chop_After); };

    int search(const char *s, int len, int &mlen, int pos=0) const
		{ int start, end;
		  if (locate(s, len, pos, start, end))
		    { mlen=end-start; return start; }
		  return -1;
		};

    int search(const EST_String s, int &mlen, int pos=0) const
		{ int start, end;
		  if (locate(s, pos, start, end))
		    { mlen=end-start; return start; }
		  return -1;
		};

    int search(EST_Regex &re, int &mlen, int pos=0, int *starts=NULL, int *ends=NULL) const
		{ int start, end;
		  if (locate(re, pos, start, end, starts, ends))
		    { mlen=end-start; return start; }
		  return -1;
		};

    int index(const char *s, int pos=0) const
		{ int start, end; return locate(s, safe_strlen(s), pos, start, end)?start:-1; };
    int index(const EST_String &s, int pos=0) const
		{ int start, end; return locate(s, pos, start, end)?start:-1; };
    int index(EST_Regex &ex, int pos=0) const
		{ int start, end; return locate(ex, pos, start, end)?start:-1; };

    int contains(const char *s, int pos=-1) const
		{ int start, end; return extract(s, safe_strlen(s), pos, start, end); };
    int contains(const char c, int pos=-1) const
		{ int start, end; char s[2] = {c,0}; return extract(s, 1, pos, start, end); };
    int contains(const EST_String &s, int pos=-1) const
		{ int start, end; return extract(s, pos, start, end); };
    int contains(EST_Regex &ex, int pos=-1) const
		{ int start, end; return extract(ex, pos, start, end); };

    int matches(const char *e, int pos=0) const;
    int matches(const EST_String &e, int pos=0) const;
    int matches(EST_Regex &e, int pos=0, int *starts=NULL, int *ends=NULL) const;

    int gsub(const char *os, const EST_String &s)
      { return gsub_internal(os, safe_strlen(os), s, s.size); };
    int gsub(const char *os, const char *s)
      { return gsub_internal(os, safe_strlen(os), s, safe_strlen(s)); };
    int gsub(const EST_String &os, const EST_String &s)
      { return gsub_internal(os, os.size, s, s.size); };
    int gsub(const EST_String &os, const char *s)
      { return gsub_internal(os, os.size, s, safe_strlen(s)); };

    int gsub(EST_Regex &ex, const EST_String &s)
      { return gsub_internal(ex, s, s.size); };
    int gsub(EST_Regex &ex, const char *s)
      { return gsub_internal(ex, s, safe_strlen(s)); };
    int gsub(EST_Regex &ex, int bracket_num)
      { return gsub_internal(ex, NULL, bracket_num); };

    int freq(const char *s) const;
    int freq(const EST_String &s) const;
    int freq(EST_Regex &s) const;

    EST_String quote(const char quotec) const;
    EST_String quote_if_needed(const char quotec) const;
    EST_String unquote(const char quotec) const;
    EST_String unquote_if_needed(const char quotec) const;

    const char operator [] (int i) const { return memory[i]; }
    char &operator () (int i) { return memory(i); }

    operator const char*() const {return str(); }
    operator char*() { return updatable_str(); }
	
    void make_updatable(void) { ::make_updatable(memory, size+1);};

    EST_String &operator += (const char *b);
    EST_String &operator += (const EST_String b);

    EST_String &operator = (const char *str) SII({
      CHECK_STRING_ARG(str);
      int len = safe_strlen(str);
      if (!len)
	memory = NULL;
      else if (!shareing() && len < size)
	memcpy((char *)memory, str, len+1);
      else if (len)
	memory = chunk_allocate(len+1, str, len);
      size=len;
      return *this;
    });

    EST_String &operator = (const char c) SII({
      memory = chunk_allocate(2, &c, 1);
      size=1;
      return *this;
    });

    EST_String &operator = (const EST_String &s) SII({
      *(struct EST_dumb_string *)this = *(struct EST_dumb_string *)(&s);
	// memory = NON_CONST_CHUNKPTR(s.memory);
	// size=s.size;
      return *this;
    });

    friend EST_String operator + (const char *a, const EST_String &b);
    friend EST_String operator + (const EST_String &a, const EST_String &b);
    friend EST_String operator + (const EST_String &a, const char *b);

    friend int operator == (const char *a, const EST_String &b);
    friend int operator == (const EST_String &a, const char *b)
		  { return b == a; };
    friend int operator == (const EST_String &a, const EST_String &b);

    friend int operator != (const char *a, const EST_String &b)
	  { return !(a==b); };
    friend int operator != (const EST_String &a, const char *b)
	  { return !(a==b); };
    friend int operator != (const EST_String &a, const EST_String &b)
	  { return !(a==b); };

    friend int compare(const EST_String &a, const EST_String &b);
    friend int compare(const EST_String &a, const char *b);
    friend inline int compare(const char *a, const EST_String &b)
       { return -compare(b,a);  };
    friend int fcompare(const EST_String a, const EST_String b, 
			const unsigned char *table=NULL);

    friend inline int fcompare(const EST_String a, const EST_String b, 
			       const EST_String table) 
       { return fcompare(a, b, (const unsigned char *)(const char *)table); };

    friend inline int operator < (const char *a, const EST_String &b)
       { return compare(a,b) < 0; };
    friend inline int operator < (const EST_String &a, const char *b) 
       { return compare(a,b) < 0; };
    friend inline int operator < (const EST_String &a, const EST_String &b) 
       { return compare(a,b) < 0; };
    friend inline int operator > (const char *a, const EST_String &b) 
       { return compare(a,b) > 0; };
    friend inline int operator > (const EST_String &a, const char *b) 
       { return compare(a,b) > 0; };
    friend inline int operator > (const EST_String &a, const EST_String &b) 
       { return compare(a,b) > 0; };
    friend inline int operator <= (const char *a, const EST_String &b) 
       { return compare(a,b) <= 0; };
    friend inline int operator <= (const EST_String &a, const char *b) 
       { return compare(a,b) <= 0; };
    friend inline int operator <= (const EST_String &a, const EST_String &b) 
       { return compare(a,b) <= 0; };
    friend inline int operator >= (const char *a, const EST_String &b) 
       { return compare(a,b) >= 0; };
    friend inline int operator >= (const EST_String &a, const char *b) 
       { return compare(a,b) >= 0; };
    friend inline int operator >= (const EST_String &a, const EST_String &b) 
       { return compare(a,b) >= 0; };
    friend int split(const EST_String & s, EST_String result[], 
		     int max, const EST_String& seperator, char quote=0)
	{ return s.split_internal(result, max, &seperator, NULL, quote); };
    friend int split(const EST_String & s, EST_String result[], int max, 
		     EST_Regex& seperator, char quote=0)
	{ return s.split_internal(result, max, NULL, &seperator, quote); };

    friend EST_String upcase(const EST_String &s);
    friend EST_String downcase(const EST_String &s);

    static EST_String cat(EST_String s1, 
			  EST_String s2 = Empty, 
			  EST_String s3 = Empty, 
			  EST_String s4 = Empty, 
			  EST_String s5 = Empty);

    friend ostream &operator << (ostream &s, const EST_String &str);
    friend class EST_Regex;
}; 

#include "EST_Regex.h"

#endif	
