/*

------------------------------------------------------------------------------

A license is hereby granted to reproduce this software source code and
to create executable versions from this source code for personal,
non-commercial use.  The copyright notice included with the software
must be maintained in all copies produced.

THIS PROGRAM IS PROVIDED "AS IS". THE AUTHOR PROVIDES NO WARRANTIES
WHATSOEVER, EXPRESSED OR IMPLIED, INCLUDING WARRANTIES OF
MERCHANTABILITY, TITLE, OR FITNESS FOR ANY PARTICULAR PURPOSE.  THE
AUTHOR DOES NOT WARRANT THAT USE OF THIS PROGRAM DOES NOT INFRINGE THE
INTELLECTUAL PROPERTY RIGHTS OF ANY THIRD PARTY IN ANY COUNTRY.

Copyright (c) 1995, 1996, John Conover, All Rights Reserved.

Comments and/or bug reports should be addressed to:

    john@johncon.com (John Conover)

------------------------------------------------------------------------------

translit.c, transliterate a page

ssize_t transliterate (unsigned char *page, ssize_t count);

    translate count many characters in page using uppercase as a
    translation table

    note: the sole reason for breaking this module out of searchfile()
    is to provide a means of manipulating the content of a file that
    is being searched-the rules are:

        1) the search area must start at page[0], (but can constitute
        a smaller area of the page data space,) and the search area
        must end a ' ' character; it is a requirement of bmhsearch(),
        in bmhsearch.c, that the '\0' character is reserved as an end
        of search sentinel in the pattern-failure to observe this rule
        will result in a program that is erratic and either hangs
        forever, or perhaps does a core dump of a very involved data
        structure, that is very difficult to analyze-see also
        uppercase.c and bmhsearch.c

        2) the return value, count, must be the size of the data
        space, in page to be searched, *_NOT_* including the last ' '
        character

In conjunction with uppercase.c, hyphenation, backspace and
underlining, and phrase searching are addressed:

    1) hyphenation could be implemented by omitting a '-' followed by
    any number of white space characters

    2) if the program is used primarily for searching catman pages,
    the backspace and underlining features that are incorporated in
    the man page system can be defeated by deleting the "backspace
    character" sequences from the documents.

    3) phrase searching could be enhanced by translating any number of
    whitespace characters into a single ' ' character-the "\ " search
    phrase would then be interpreted as any number of white space
    characters. See uppercase.c for comments concerning whitespace,
    and locale specific issues.

    Note that main() in rel.c calls transliterate() in tranlit.c to
    transliterate the query/search criteria-if an exact match is
    specified, both the pattern and the data would be altered in
    exactly the same manner, and appropriate matches found even though
    both were translated, (although additional patterns could
    conceivably be matched, the originals will be found,
    irregardless,) for example, the data:

        ... re-engineering ...

    or hyphenated:

        ... re-
        engineering ...

    would become reengineering, which could be found by any of the
    query patterns:

        re
        engineering
        reengineering
        re-engineering

    Likewise for multiple space compression in phrase query patterns.
    Quite probably, such scenarios should be controlled by command
    line options, perhaps via a language selection to avoid
    localization and portability conflicts.

The algorithm is as follows:

    for each character in the page

        replace the character with its equivilent in uppercase[]

Usage is a call with page referencing the first character to be
translated, and count the number of characters to be translated,
for example:

    count = transliterate (page, count + 2);

There are no errors, and the number of characters translated is
returned

To test this module, compile the module source with -DTEST_TRANSLIT

$Revision: 1.2 $
$Date: 1996/09/13 13:47:23 $
$Id: translit.c,v 1.2 1996/09/13 13:47:23 john Exp $
$Log: translit.c,v $
Revision 1.2  1996/09/13 13:47:23  john
Added handling of circularly linked directories and subdirectories in searchpath.c
Cosmetic changes to bmhsearch.c, postfix.c, rel.c, searchfile.c, translit.c, uppercase.c, version.c.

Revision 1.1  1996/02/08 02:55:10  john
Added hyphenation, backspace, and multiple whitespace capability.
Changes to files: uppercase.c translit.c searcfile.c rel.c and version.c-required for hyphenation, backspace, and multiple whitespace capability.

 * Revision 1.0  1995/04/22  05:13:18  john
 * Initial revision
 *

*/

#include "rel.h"

#ifndef LINT /* include rcsid only if not running lint */

static char rcsid[] = "$Id: translit.c,v 1.2 1996/09/13 13:47:23 john Exp $"; /* module version */
static char rcsid_h[] = TRANSLIT_H_ID; /* module include version */

#endif

/*

Note: the heuristics for addressing hyphenation issues are as follows:

    if a hyphen is found while transliterating the page:

        skip the hyphen, and any following whitespace or another
        hyphens, to the first character that is not whitespace or a
        hyphen, which will collapse consecutive instances of
        whitespace and hyphens into nothing.

Note: the heuristics for addressing the backspace character is as
follows:

    if a backspace character is found while transliterating the page:

        skip the backspace, and overwrite the character before the
        backspace with the character after the backspace, which will
        instantiate the character of the last instance of of
        consecutive backspace/character combinations. This is
        specifically for catman pages which utilize
        underscore/backspace/character combinations for underlining,
        in addition to backspace/character combinations for bold
        representation-note that for this process to be successful,
        the underscore must preceed the character in the sequence.

Note: the heuristics for addressing phrase issues are as follows:

    if a whitespace character is found while transliterating the page:

        and if the previous character found while transliterating the
        page is also whitespace, skip the second instance of the
        whitespace character, which will collapse consecutive
        instances of whitespace characters into a single space.

*/

#ifdef __STDC__

ssize_t transliterate (unsigned char *page, ssize_t count)

#else

ssize_t transliterate (page, count)
    unsigned char *page;
    ssize_t count;

#endif

{
    unsigned char last_char = (unsigned char) '\0', /* last character in memory page */
                  current_char, /* current character in memory page */
                  *char_ref = page; /* reference to character in memory page */

    int i, /* character counter */
        j = 0; /* character count */

    for (i = 0; i < (int) count; i++) /* for each character in the page */
    {
        current_char = *char_ref = (unsigned char) uppercase[(int) page[i]]; /* convert the character to uppercase */

        switch ((int) current_char) /* what is the current character in the memory page? */
        {

            case (int) '-': /* hyphenation? */

                i++; /* yes, skip the hyphen; next character in the page */

                for (i = i; i < (int) count; i++) /* for each character following the hyphen */
                {
                    current_char = *char_ref = (unsigned char) uppercase[(int) page[i]]; /* convert the character to uppercase */

                    if (current_char != (unsigned char) ' ') /* character not whitespace? */
                    {

                        if (current_char != (unsigned char) '-') /* yes, character not a hyphen? */
                        {
                            char_ref++; /* yes, next character */
                            j++; /* yes, increment the character count */
                            break;
                        }

                    }

                }

                break;

            case (int) '\b': /* backspace? */

                i++; /* yes, skip the backspace; next character in the page */
                char_ref --; /* previous character */
                current_char = *char_ref = (unsigned char) uppercase[(int) page[i]]; /* convert the character to uppercase */
                char_ref++; /* next character */
                break;

            case (int) ' ': /* space? */

                if (last_char != (unsigned char) ' ') /* yes, last character in memory page not a space? */
                {
                    char_ref++; /* yes, next character */
                    j++; /* increment the character count */
                }

                break;

            default:

                char_ref++; /* next character */
                j++; /* increment the character count */
                break;

        }

        last_char = current_char; /* last character in memory page is current character in memory page */
    }

    return (j); /* return the size of the page */
}

#ifdef TEST_TRANSLIT

/*

simple exerciser for testing transliterate (); get a string from
stdin, transliterate it, and print it to stdout; ignore the:

declared global, could be static
    transliterate       translit.c(xx)

from lint

*/

#ifdef __STDC__

int main (void)

#else

int main ()

#endif

{
    unsigned char buffer[BUFSIZ]; /* buffer to be parsed */

    ssize_t i; /* length of transliterated buffer */

    if (make_uppercase () != (unsigned char *) 0) /* setup the uppercase array */
    {

        while (gets ((char *) buffer) != 0) /* input the string to be transliterated */
        {
            i = transliterate (buffer, strlen ((char *) buffer)); /* transliterate the buffer */
            buffer[i] = '\0'; /* terminate the transliterated buffer with an EOS for printing */
            (void) printf ("%s\n", buffer); /* print the transliterate buffer */
        }

    }

    else
    {
        (void) fprintf (stderr, "error making uppercase array\n"); /* couldn't setup the uppercase array, print the error */
        exit (1); /* and exit */
    }

    exit (0); /* return success */

#ifdef LINT /* include only if running lint */

    return (0); /* for LINT formality */

#endif

}

#endif
