/*
 *	cook - file construction tool
 *	Copyright (C) 1991, 1992, 1993, 1994, 1997, 1998 Peter Miller;
 *	All rights reserved.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 2 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 * MANIFEST: functions to perform recipe pattern matching
 *
 * This is in the inner loop, so it must perform well.
 * A free list of match structures is maintained to avoid malloc calls;
 * malloc is only called when this free list is empty.
 *
 * The tough part about designing a pattern matcher for something like cook is
 * that the patterns must be reversible.  That is, it must be possible to use
 * the same string both as a pattern to be matched against and as a template
 * for building a string once a pattern has matched.  Rather like the
 * difference between the left and right sides of an editor search-and-replace
 * command using the same description for both the search pattern and the
 * replace template.  This is why classic regular expressions have not been
 * used.  They tend to be slow to match, too.
 *
 * This matcher has eleven match "fields", referenced as % and %0 to %9.
 * The % character can be escaped as %%.  The % and %1 to %9 forms match any
 * character except '/'.  The %0 form matches all characters, but must be
 * either empty, or have whole path components, including the trailing '/' on
 * each component.  A few examples will make this clearer:
 *	"%.c" matches "fred.c" with %="fred"
 *	"%.c" failes to match "snot/fred.c"
 *	"%1/%2.c"matches "snot/fred.c" with %1="snot" and %2="fred"
 *	"%1/%2.c" fails to match "etc/boo/fred.c"
 *	"%0%5.c" matches "fred.c" with %0="" and %5="fred"
 *	"%0%6.c" matches "snot/fred.c" with %0="snot/" and %6="fred"
 *	"%0%7.c" matches "etc/boo/fred.c" with %0="etc/boo/" and %7="fred"
 *	"/usr/%1/%1%2/%3.%2%4" matches "/usr/man/man1/fred.1x" with %1="man",
 *		%2="1", %3="fred" and %4="x".
 * The %0 behaviour is designed to allow patterns to range over subtrees in a
 * controlled manner.  Note that the use of this sort of pattern in a recipe
 * will result in deeper seraches than the naive recipe designer would expect.
 */

#include <ac/ctype.h>
#include <ac/stddef.h>
#include <ac/string.h>
#include <ac/stdlib.h>

#include <error_intl.h>
#include <expr/position.h>
#include <match.h>
#include <mem.h>
#include <str.h>
#include <trace.h>


#define MATCH_CHAR '%'


/*
 * NAME
 *	illegal_pattern - complain
 *
 * SYNOPSIS
 *	void illegal_pattern(char *s);
 *
 * DESCRIPTION
 *	The illegal_pattern function is used to complain about errors in
 *	pattern secifications.
 *
 * RETURNS
 *	void
 */

static void illegal_pattern _((const expr_position_ty *, char *, int));

static void
illegal_pattern(pp, s, why)
	const expr_position_ty *pp;
	char		*s;
	int		why;
{
	sub_context_ty	*scp;

	if (why < 0)
	{
		scp = sub_context_new();
		sub_var_set(scp, "Name", "%c0", MATCH_CHAR);
		sub_var_set(scp, "Pattern", "%s", s);
		error_with_position
		(
			pp,
			scp,
		     i18n("illegal position of '$name' in \"$pattern\" pattern")
		);
		sub_context_delete(scp);
		return;
	}
	scp = sub_context_new();
	if (why >= 10)
		sub_var_set(scp, "Name", "%c", MATCH_CHAR);
	else
		sub_var_set(scp, "Name", "%c%d", MATCH_CHAR, why);
	sub_var_set(scp, "Pattern", "%s", s);
	error_with_position
	(
		pp,
		scp,
		i18n("illegal use of '$name' in \"$pattern\" pattern")
	);
	sub_context_delete(scp);
}


/*
 * NAME
 *	match_alloc - allocate match structure
 *
 * SYNOPSIS
 *	match_ty *match_alloc(void);
 *
 * DESCRIPTION
 *	The match_alloc function is used to allocate a match structure.
 *	The returned structure will be all zeros.
 *
 * RETURNS
 *	match_ty * - a pointer to the match structure in dynamic memory
 *
 * CAVEAT
 *	When finished with it should be disposed of by calling the match_free
 *	function.
 */

static match_ty *match_alloc _((void));

static match_ty *
match_alloc()
{
	match_ty	*result;
	int		j;

	result = mem_alloc(sizeof(match_ty));
	for (j = 0; j < SIZEOF(result->fill); ++j)
		result->fill[j] = 0;
	result->mask = 0;
	return result;
}


/*
 * NAME
 *	match_free - dispose of match structure
 *
 * SYNOPSIS
 *	void match_free(match_ty *);
 *
 * DESCRIPTION
 *	The match_free function is used to dispose of a match structure
 *	allocated by the match_alloc function.
 *
 * RETURNS
 *	void
 */

void
match_free(field)
	match_ty	*field;
{
	int		j;

	trace(("match_free(field = %08X)\n{\n"/*}*/, field));
	for (j = 0; j < SIZEOF(field->fill); ++j)
	{
		if (field->fill[j])
		{
			str_free(field->fill[j]);
			field->fill[j] = 0;
		}
	}
	mem_free(field);
	trace((/*{*/"}\n"));
}


/*
 * NAME
 *	matcher - match pattern to string
 *
 * SYNOPSIS
 *	int matcher(char *original_patn, char *patn, char *str, match_ty *field);
 *
 * DESCRIPTION
 *	The matcher function is used to match up a pattern with a string,
 *	filling in the fields as it goes.
 *
 * RETURNS
 *	int: zero if does not match, nonzero if does match.
 *		-1 on error
 *
 * CAVEAT
 *	The field structure is not allocated here.
 */

static int matcher _((char *, char *, char *, char *, char *, match_ty *,
	const expr_position_ty *));

static int
matcher(op, formal_begin, formal_end, actual_begin, actual_end, field, pp)
	char		*op;	/* original pattern */
	char		*formal_begin;
	char		*formal_end;
	char		*actual_begin;
	char		*actual_end;
	match_ty	*field;
	const expr_position_ty *pp;
{
	size_t		index;
	string_ty	*sp;
	int		result;
	char		*q;
	int		sub_result;

	trace(("matcher(op = %08lX, formal_begin = %08lX, actual_begin = %08lX, field = %08X)\n{\n"/*}*/, op, formal_begin, actual_begin, field));
	trace_string(op);
	trace(("formal = \"%.*s\";\n", (int)(formal_end - formal_begin), formal_begin));
	trace(("actual = \"%.*s\";\n", (int)(actual_end - actual_begin), actual_begin));

	/*
	 * Rip any matching constant string off the end of the formal
	 * and actual strings.  Nice, easy rejections here, and they lay
	 * the foundations for an optimization inside the main loop,
	 * avoiding a recursion.
	 */
	for (;;)
	{
		/*
		 * If we have run out of formal string, we must also run
		 * out of actual string.
		 */
		assert(formal_begin <= formal_end);
		if (formal_begin >= formal_end)
		{
			result = (actual_begin >= actual_end);
			goto ret;
		}

		/*
		 * If the last character could possibly be part of a
		 * matching sequence, stop.  We can't actually tell,
		 * because the matching sequence is only meaningful
		 * left-to-right.
		 *
		 * Examples of right-to-left broken-ness: %%0, %%%
		 */
		if (formal_end[-1] == MATCH_CHAR)
			break;
		if
		(
			formal_begin + 1 < formal_end
		&&
			formal_end[-2] == MATCH_CHAR
		&&
			isdigit((unsigned char)formal_end[-1])
		)
			break;

		/*
		 * We are looking at a constant.
		 * See if it matches.
		 */
		if
		(
			actual_begin >= actual_end
		||
			actual_end[-1] != formal_end[-1]
		)
		{
			result = 0;
			goto ret;
		}
		--formal_end;
		--actual_end;
	}
	trace(("formal = \"%.*s\";\n", (int)(formal_end - formal_begin), formal_begin));
	trace(("actual = \"%.*s\";\n", (int)(actual_end - actual_begin), actual_begin));

	for (;;)
	{
		/*
		 * Take care of the end of the string
		 */
		if (formal_begin >= formal_end)
		{
			result = (actual_begin >= actual_end);
			goto ret;
		}

		/*
		 * Take care of literal characters
		 */
		if (*formal_begin != MATCH_CHAR)
		{
			if (*formal_begin++ != *actual_begin++)
			{
				result = 0;
				goto ret;
			}
			continue;
		}

		/*
		 * take care of quoted match character
		 */
		if
		(
			formal_begin + 1 <= formal_end
		&&
			formal_begin[1] == MATCH_CHAR
		)
		{
			if
			(
				actual_begin >= actual_end
			||
				*actual_begin++ != MATCH_CHAR
			)
			{
				result = 0;
				goto ret;
			}
			formal_begin += 2;
			continue;
		}

		/*
		 * The %0 pattern element matches zero or more directory
		 * pieces, including the trailing slashes.
		 */
		if
		(
			formal_begin + 1 <= formal_end
		&&
			formal_begin[1] == '0'
		)
		{
			char	*midpoint;

			formal_begin += 2;

			/*
			 * It must appear at the beginning of the
			 * pattern, or immediately following slashes.
			 * It may not appear before a slash.
			 */
			if
			(
				(
					formal_begin > op + 2
				&&
					formal_begin[-3] != '/'
				)
			||
				(
					formal_begin < formal_end
				&&
					*formal_begin == '/'
				)
			)
			{
				illegal_pattern(pp, op, -1);
				result = -1;
				goto ret;
			}

			/*
			 * It could have been seen earlier, must be
			 * identical if so.
			 */
			field->mask |= 1;
			sp = field->fill[0];
			if (sp)
			{
				if
				(
					actual_begin + sp->str_length > actual_end
				||
					0 != memcmp(actual_begin, sp->str_text, sp->str_length)
				)
				{
					result = 0;
					goto ret;
				}
				actual_begin += sp->str_length;
				continue;
			}

			/*
			 * Match the largest number of whole directory chunks.
			 */
			midpoint = actual_end;
			for (;;)
			{
				while (midpoint > actual_begin)
				{
					if (midpoint[-1] == '/')
						break;
					--midpoint;
				}
			
				field->fill[0] =
					str_n_from_c(actual_begin, midpoint - actual_begin);
				trace_string(field->fill[0]->str_text);
				sub_result =
					matcher
					(
						op,
						formal_begin,
						formal_end,
						midpoint,
						actual_end,
						field,
						pp
					);
				if (sub_result < 0)
				{
					result = -1;
					goto ret;
				}
				if (sub_result)
				{
					result = 1;
					goto ret;
				}
				str_free(field->fill[0]);
				field->fill[0] = 0;
				--midpoint;
				if (midpoint <= actual_begin)
				{
					result = 0;
					goto ret;
				}
			}
		}

		/*
		 * figure index
		 */
		if (isdigit((unsigned char)formal_begin[1]))
		{
			index = formal_begin[1] - '0';
			formal_begin += 2;
		}
		else
		{
			index = 10;
			++formal_begin;
		}
		field->mask |= 1 << index;

		/*
		 * see if the field is already set
		 * must be identical if so
		 */
		sp = field->fill[index];
		if (sp)
		{
			if
			(
				actual_begin + sp->str_length > actual_end
			||
				0 != memcmp(actual_begin, sp->str_text, sp->str_length)
			)
			{
				result = 0;
				break;
			}
			actual_begin += sp->str_length;
			continue;
		}

		/*
		 * Fast special case.  This is very common, because we
		 * stripped the constants off the end.
		 */
		if (formal_begin >= formal_end)
		{
			if (memchr(actual_begin, '/', actual_end - actual_begin))
			{
				result = 0;
				goto ret;
			}
			field->fill[index] =
				str_n_from_c(actual_begin, actual_end - actual_begin);
			trace(("index = %ld;\n", (long)index));
			result = 1;
			break;
		}

		/*
		 * The normal % and %N sequences can't match a slash at
		 * all.  This allows for a quick reject, and short
		 * circuits some of the recursion alternatives.
		 */
		q = memchr(actual_begin, '/', actual_end - actual_begin);
		if (q)
		{
			if
			(
				formal_begin < formal_end
			&&
				!memchr(formal_begin, '/', formal_end - formal_begin)
			)
			{
				result = 0;
				break;
			}
		}
		else
			q = actual_end;
		/* not allowed to be empty */
		while (q > actual_begin)
		{
			field->fill[index] =
				str_n_from_c(actual_begin, q - actual_begin);
			trace(("index = %ld;\n", (long)index));
			trace_string(field->fill[index]->str_text);
			sub_result =
				matcher
				(
					op,
					formal_begin,
					formal_end,
					q,
					actual_end,
					field,
					pp
				);
			if (sub_result < 0)
			{
				result = -1;
				goto ret;
			}
			if (sub_result)
			{
				result = 1;
				goto ret;
			}
			str_free(field->fill[index]);
			field->fill[index] = 0;
			--q;
		}
		result = 0;
		break;
	}
ret:
	trace(("return %d;\n", result));
	trace((/*{*/"}\n"));
	return result;
}


/*
 * NAME
 *	match - attempt to
 *
 * SYNOPSIS
 *	match_ty *match(string_ty *pattern, string_ty *string);
 *
 * DESCRIPTION
 *	The match function is used to match a pattern with a string.
 *	The matching fields are filled in in the returned structure.
 *
 * RETURNS
 *	match_ty *: a pointer to a match structure in dynamic memory with the
 *	match fields set as appropriate.
 *
 *	A NULL pointer is returned if the string does not match the
 *	pattern.
 *
 *	The value MATCH_ERROR will be returned if it was not a valid
 *	pattern; the error message will have been printed already.
 *
 * CAVEAT
 *	The match structure should be released by calling match_free.,
 */

match_ty *
match(pattern, string, pp)
	string_ty	*pattern;
	string_ty	*string;
	const expr_position_ty *pp;
{
	match_ty	*field;
	int		sub_result;

	trace(("match(pattern = %08lX, string = %08lX)\n{\n"/*}*/, pattern, string));
	trace_string(pattern->str_text);
	trace_string(string->str_text);
	field = match_alloc();
	sub_result =
		matcher
		(
			pattern->str_text,
			pattern->str_text,
			pattern->str_text + pattern->str_length,
			string->str_text,
			string->str_text + string->str_length,
			field,
			pp
		);
	if (sub_result < 0)
	{
		match_free(field);
		field = MATCH_ERROR;
	}
	else if (sub_result == 0)
	{
		match_free(field);
		field = 0;
	}
	trace(("return %08lX;\n", field));
	trace((/*{*/"}\n"));
	return field;
}


/*
 * NAME
 *	reconstruct - make string from pattern
 *
 * SYNOPSIS
 *	string_ty *reconstruct(string_ty *pattern, match_ty *field);
 *
 * DESCRIPTION
 *	The reconstruct function is used to rebuild a string from a replacement
 *	pattern and the match field values.
 *
 * RETURNS
 *	string_ty *; pointer to the reconstructed string
 *		or NULL on error (the error will already have been rinted)
 */

string_ty *
reconstruct(pattern, field, pp)
	string_ty	*pattern;
	const match_ty	*field;
	const expr_position_ty *pp;
{
	static char	*tmp;
	static size_t	tmplen;
	size_t		length;
	char		*p;
	string_ty	*s;
	char		*pos;
	int		index;

	trace(("reconstruct(pattern = %08lX, field = %08X)\n{\n"/*}*/, pattern, field));
	trace_string(pattern->str_text);
	length = 0;
	for (p = pattern->str_text; *p; ++p)
	{
		if (*p == MATCH_CHAR)
		{
			if (p[1] == MATCH_CHAR)
			{
				++length;
				++p;
				continue;
			}
			if (p[1] >= '0' && p[1] <= '9')
			{
				index = p[1] - '0';
				++p;
			}
			else
				index = 10;
			s = field->fill[index];
			if (!s)
			{
				illegal_pattern(pp, pattern->str_text, index);
				trace((/*{*/"}\n"));
				return 0; /* NULL */
			}
			length += s->str_length;
		}
		else
			++length;
	}

	if (!tmp)
	{
		tmplen = length;
		if (tmplen < 16)
			tmplen = 16;
		tmp = mem_alloc(tmplen);
	}
	else
	{
		if (tmplen < length)
		{
			tmplen = length;
			tmp = mem_change_size(tmp, tmplen);
		}
	}

	pos = tmp;
	for (p = pattern->str_text; *p; ++p)
	{
		if (*p == MATCH_CHAR)
		{
			if (p[1] == MATCH_CHAR)
			{
				*pos++ = MATCH_CHAR;
				++p;
				continue;
			}
			if (p[1] >= '0' && p[1] <= '9')
			{
				index = p[1] - '0';
				++p;
			}
			else
				index = 10;
			s = field->fill[index];
			memcpy(pos, s->str_text, s->str_length);
			pos += s->str_length;
		}
		else
			*pos++ = *p;
	}

	s = str_n_from_c(tmp, length);
	trace_string(s->str_text);
	trace(("return %08lX;\n", s));
	trace((/*{*/"}\n"));
	return s;
}


/*
 * NAME
 *	match_stack_push - patch match fields
 *
 * SYNOPSIS
 *	void match_stack_push(match_ty *field);
 *
 * DESCRIPTION
 *	The match_stack_push function is used to push a pattern onto the
 *	stack of match fields.  A NULL pointer may be pushed.  This
 *	mechanism is used by the chef (cook.c) to indicate implicit and
 *	explicit recipe replacements.
 *
 * RETURNS
 *	void
 */

void
match_stack_push(msp, field)
	match_stack_ty	*msp;
	const match_ty	*field;
{
	trace(("match_stack_push(field = %08X)\n{\n"/*}*/, field));
	if (msp->stack_depth >= msp->stack_depth_max)
	{
		size_t	nbytes;

		msp->stack_depth_max = msp->stack_depth_max * 2 + 4;
		nbytes = msp->stack_depth_max * sizeof(msp->stack[0]);
		msp->stack = mem_change_size(msp->stack, nbytes);
	}
	msp->stack[msp->stack_depth++] = field;
	trace((/*{*/"}\n"));
}


/*
 * NAME
 *	match_stack_top - top of match stack
 *
 * SYNOPSIS
 *	match_ty *match_stack_top(void);
 *
 * DESCRIPTION
 *	The match_stack_top function is used to indicate the top of the
 *	match stack.
 *
 * RETURNS
 *	match_ty * - a pointer to a match strcuture, or NULL if the stack is
 *	empty, or a NULL was pashed to mak an exlpicit recipe.
 */

const match_ty *
match_stack_top(msp)
	const match_stack_ty *msp;
{
	if (msp->stack_depth <= 0)
		return 0;
	return msp->stack[msp->stack_depth - 1];
}


/*
 * NAME
 *	match_stack_pop - shorten stack
 *
 * SYNOPSIS
 *	match_ty *match_stack_pop(void);
 *
 * DESCRIPTION
 *	The match_stack_pop function is used to pop a match structure
 *	from the match stack.
 *
 * RETURNS
 *	match_ty * - a pointer to a match strcuture, or NULL if the stack is
 *	empty, or a NULL was pashed to mak an exlpicit recipe.
 *
 * CAVEAT
 *	It is an error for the stack to be empty.
 */

const match_ty *
match_stack_pop(msp)
	match_stack_ty	*msp;
{
	const match_ty	*field;

	trace(("match_stack_pop()\n{\n"/*}*/));
	assert(msp->stack_depth);
	if (msp->stack_depth > 0)
	{
		--msp->stack_depth;
		field = msp->stack[msp->stack_depth];
	}
	else
		field = 0;
	trace(("return %08X;\n", field));
	trace((/*{*/"}\n"));
	return field;
}


/*
 * NAME
 *	wl_match - find a pattern in a word list
 *
 * SYNOPSIS
 *	match_ty *wl_match(string_list_ty *pattern, string_ty *target);
 *
 * DESCRIPTION
 *	Wl_match is used to determine whether any one of the words in
 *	the wordlist (wlp) match the pattern given.
 *
 * RETURNS
 *	A zero is returned if not one of the words matches the pattern;
 *	otherwise a pointer to a "match structure" is returned,
 *	in a similar fashion to match().
 *
 * CAVEAT
 *	The information returned resides in dynamic memory.
 *	It is the responsibility of the
 *	caller to ensure that it is freed when it is finished with,
 *	by a call to match_free();
 */

match_ty *
wl_match(pattern, target, pp)
	string_list_ty	*pattern;
	string_ty	*target;
	const expr_position_ty *pp;
{
	size_t		j;
	match_ty	*retval;

	for (j = 0; j < pattern->nstrings; j++)
	{
		retval = match(pattern->string[j], target, pp);
		if (retval)
			return retval;
	}
	return 0;
}


/*
 * NAME
 *	wl_reconstruct - reconstruct a word list
 *
 * SYNOPSIS
 *	void wl_reconstruct(string_list_ty *to, string_list_ty *from, match_ty *field)
 *
 * DESCRIPTION
 *	Wl_reconstruct is used to reconstruct an entire word list,
 *	sort of the convers of wl_match().
 *
 * RETURNS
 *	'To' is a word list of reconstructed strings.
 *
 * CAVEAT
 *	It is the responsibility of the caller to ensire that the
 *	reconstructed word list in 'to' is freed when finished with,
 *	by a call to string_list_destructor().
 */

int
wl_reconstruct(to, from, field, pp)
	string_list_ty	*to;
	string_list_ty	*from;
	const match_ty	*field;
	const expr_position_ty *pp;
{
	size_t		j;

	string_list_constructor(to);
	for (j = 0; j < from->nstrings; j++)
	{
		string_ty *s;

		s = reconstruct(from->string[j], field, pp);
		if (!s)
			return -1;
		string_list_append(to, s);
		str_free(s);
	}
	return 0;
}


match_stack_ty *
match_stack_new()
{
	match_stack_ty	*msp;

	msp = mem_alloc(sizeof(match_stack_ty));
	msp->stack = 0;
	msp->stack_depth = 0;
	msp->stack_depth_max = 0;
	return msp;
}


void
match_stack_delete(msp)
	match_stack_ty	*msp;
{
	assert(msp);
	if (msp->stack)
		mem_free(msp->stack);
	mem_free(msp);
}


int
match_usage_mask(s)
	string_ty	*s;
{
	char		*cp;
	int		result;

	result = 0;
	for (cp = s->str_text; *cp; ++cp)
	{
		if (*cp != MATCH_CHAR)
			continue;
		switch (cp[1])
		{
		default:
			result |= (1 << 10);
			break;

		case MATCH_CHAR:
			++cp;
			break;

		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			result |= 1 << (*++cp - '0');
			break;
		}
	}
	return result;
}
