/*  Screem:  screem-page-model.c
 *
 *  The ScreemPageModel object
 *  
 *  This object is responsible for keeping a tree view of a ScreemPage
 *  up to date with changes made to the GtkTextBuffer of the page
 *  or at least that is the intention.
 *
 *  It will also be responsible for keeping track of the DTD in use
 *  for the page rather than the horrible method of the ScreemWindow
 *  handling this in combination with ScreemPage
 *
 *  The problem here though is coming up with an algorithm for
 *  dynamically updating the tree on text changes
 * 
 *  Copyright (C) 2003 David A Knight
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 *
 *  For contact information with the author of this source code please see
 *  the AUTHORS file.  If there is no AUTHORS file present then check the
 *  about box under the help menu for a contact address
 */

#include <config.h>

#include <string.h>

#include <glib/gi18n.h>

#include <libcroco/libcroco.h>

#include "screem-application.h"

#include "screem-page-model.h"

#include "screem-dtd.h"
#include "screem-markup.h"
#include "screem-page.h"
#include "screem-search.h"

static void screem_page_model_page_set( ScreemPageModel *model );
static void insert_text( GtkTextBuffer *buffer, GtkTextIter *it,
			 const gchar *text, gint length, ScreemPageModel *model );
static void delete_range( GtkTextBuffer *buffer, GtkTextIter *it,
			  GtkTextIter *eit, ScreemPageModel *model );

static void screem_page_doctype_check( ScreemPageModel *model, gint pos, 
				       gint length );
static void screem_page_charset_check( ScreemPageModel *model, gint pos, 
				       gint length );

static gboolean screem_page_model_dtd_set( ScreemPageModel *model );

typedef enum {
	PROP_0,
	PROP_APPLICATION,
	PROP_PAGE,
	PROP_DTD,
	PROP_CHARSET,
	PROP_DOCTYPE
} ScreemPageModelProps;

typedef struct {
	GtkTreeModel *model;
	GtkTreeIter document;
	GtkTreeIter external;
	GtkTreeIter editable;
	GtkTreeIter styles;

	GSList *stack;
	
	GtkTreeIter *parent;
	gchar *pname;
	ScreemDTDTagExistance pclose;
	const ScreemDTDElement *pelem;
	
	guint pos;
	guint ppos;
	gchar *text;
	const gchar *offset_text;
	const gchar *poffset_text;
	gchar *name;
	gchar *tag;
	guint start;
	guint end;
	guint last;

	ScreemDTD *dtd;
	gboolean gotroot;

	guint sourceid;

	GCompareFunc compare;

	xmlDocPtr doc;
	xmlNodePtr cur;
	ScreemPage *page;
	gboolean xml;
} ScreemPageModelParser;

typedef struct {
	GtkTreeModel *model;
	GtkTreeIter *current;
	GtkTreeIter *parent;
	CRInput *input;
	ScreemPage *page;
	const gchar *buf;
	guint offset;
} ScreemPageModelCSSParser;

struct ScreemPageModelPrivate {
	ScreemApplication *application;
	
	ScreemPage *page;

	ScreemDTD *dtd;

	gint doctype_pos;
	gchar *doctype;
	gchar *docroot;

	guint charset_pos;
	guint charset_len;
	gchar *charset;
	gboolean is_default_charset;
	guint charset_notify;

	ScreemPageModelParser parser;

	GtkTreeIter *context;
	GConfClient *client;
	gboolean highlight;
	guint highlight_notify;

	guint dtd_set;

	guint check_idle;
	guint pos;
	guint length;

	gchar *curroot;
};

ScreemPageModel *screem_page_model_new( ScreemPage *page, GObject *app )
{
	ScreemPageModel *model;
	GType type;

	type = screem_page_model_get_type();

	model = SCREEM_PAGE_MODEL( g_object_new( type, 
						 "page", page,
						 "application", app,
						 NULL ) );

	return model;
}

void screem_page_model_force_check( ScreemPageModel *model,
		gboolean in_progress )
{
	ScreemPageModelPrivate *priv;
	guint pos;
	guint length;

	priv = model->priv;
	pos = length = 0x7fffffff;

	if( in_progress && ! priv->check_idle ) {
		return;
	}
	
	if( priv->check_idle ) {
		g_source_remove( priv->check_idle );
		if( in_progress ) {
			pos = priv->pos;
			length = priv->length;
		}
	}
	priv->check_idle = 0;
	
	if( screem_page_is_markup( priv->page ) ) {
		screem_page_doctype_check( model, 0, G_MAXINT );
		
		screem_page_charset_check( model, 0, G_MAXINT );
	}
	priv->pos = 0;
	priv->length = 0;
}

gboolean screem_page_model_is_default_charset( ScreemPageModel *model )
{
	g_return_val_if_fail( SCREEM_IS_PAGE_MODEL( model ), FALSE );
	
	return model->priv->is_default_charset;
}

/* static stuff */
static void screem_page_model_page_set( ScreemPageModel *model )
{
	GtkTreeIter it;

	gtk_tree_store_clear( GTK_TREE_STORE( model ) );
	
	gtk_tree_store_append( GTK_TREE_STORE( model ), &it, NULL );
	gtk_tree_store_set( GTK_TREE_STORE( model ), &it,
			    SCREEM_PAGE_MODEL_NAME, "[Document]",
			    SCREEM_PAGE_MODEL_VALID, TRUE,
			    -1 );

	g_signal_connect_after( G_OBJECT( model->priv->page ), "insert_text",
			  G_CALLBACK( insert_text ), model );
	g_signal_connect_after( G_OBJECT( model->priv->page ), "delete_range",
				G_CALLBACK( delete_range ), model );

}

static gboolean check_doctype_and_charset( ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	guint pos;
	guint length;
	
	priv = model->priv;

	priv->check_idle = 0;

	if( screem_page_is_markup( priv->page ) ) {
		pos = priv->pos;
		length = priv->length;

		screem_page_doctype_check( model, pos, length );
		screem_page_charset_check( model, pos, length );
	}
	priv->pos = 0;
	priv->length = 0;
	
	return FALSE;
}

static void insert_text( GtkTextBuffer *buffer, GtkTextIter *it,
			 const gchar *text, gint length, ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	guint pos;

	guint p1;
	guint p2;
	guint p3;
	
	priv = model->priv;
	
	/* - length as we are connected after, so the iter will be advanced
	 * by length chars */
	pos = gtk_text_iter_get_offset( it ) - length + 1;

	/* hmm, can end up with - pos here, possibly due to the initial
	 * newline chars in the buffer? */
	if( pos < 0 ) {
		pos = 0;
	}

	if( priv->check_idle ) {
		g_source_remove( priv->check_idle );
		/* calc range the check will be over */
		p1 = priv->pos;
		p2 = priv->length + p1;
		p3 = pos + length;

		priv->pos = MIN( p1, pos );
		priv->length = MAX( p2, p3 ) - priv->pos;
	} else {
		priv->pos = pos;
		priv->length = length;
	}
	
	priv->check_idle = g_idle_add( (GSourceFunc)check_doctype_and_charset, model );
}

static void delete_range( GtkTextBuffer *buffer, GtkTextIter *it,
			  GtkTextIter *eit, ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	guint pos;
	guint length;

	guint p1;
	guint p2;
	guint p3;
	
	priv = model->priv;
	
	pos = gtk_text_iter_get_offset( it );
	length = gtk_text_iter_get_offset( eit ) - pos;

	if( priv->check_idle ) {
		g_source_remove( priv->check_idle );
		/* calc range the check will be over */
		p1 = priv->pos;
		p2 = priv->length + p1;
		p3 = pos + length;

		priv->pos = MIN( p1, pos );
		priv->length = MAX( p2, p3 ) - priv->pos;
	} else {
		priv->pos = pos;
		priv->length = length;
	}

	priv->check_idle = g_idle_add( (GSourceFunc)check_doctype_and_charset, model );
}

static void screem_page_doctype_check( ScreemPageModel *model, 
		gint pos, gint length )
{
	ScreemPageModelPrivate *priv;
	gboolean set;

	priv = model->priv;

	set = FALSE;

	if( ! priv->doctype ) {
		set = TRUE;
	} else { 
		gint len;
		
		len = strlen( priv->doctype );
			
		if( ( pos <= ( priv->doctype_pos + len ) ) &&
		    ( pos + length ) >= priv->doctype_pos ) {
			set = TRUE;
		}
	}
	if( set ) {
		/* set the doctype */
		gchar *text;
		gchar *doctype;
		gint depth;
		
		text = screem_page_get_data( priv->page );
		
		doctype = find_text( text,
				     "<![Dd][Oo][Cc][Tt][Yy][Pp][Ee] ",
				     NULL, NULL );
		if( doctype ) {
			GString *tag;
			
			/* <!DOCTYPE> is at least 10 chars,
			 * and will normally be much longer with the
			 * public id and url, so alloc a good sized
			 * chunk to avoid reallocing */
			tag = g_string_sized_new( 4096 );
			
			priv->doctype_pos = doctype - text;
			
			doctype --;
			depth = 0;
			do {
				doctype ++;
				g_string_append_c( tag, *doctype );
				if( *doctype == '>' ) {
					depth --;
				} else if( *doctype == '<' ) {
					depth ++;
				}
			} while( *doctype != '\0' && depth > 0 );
			g_object_set( G_OBJECT( model ), "doctype", tag->str, NULL );
			g_string_free( tag, TRUE );
		} else {
			/* no doctype tag found, root element
			 * might have changed, which can effect
			 * the dtd we want to be using, the
			 * overhead should be minimal, a scan
			 * of an xmlDoc tree if the default dtd
			 * to be used won't be changing */
			if( ! priv->dtd_set ) {
				priv->dtd_set = g_idle_add( (GSourceFunc)screem_page_model_dtd_set, model );
			}
		}
			
		g_free( text );
	}
}

static void screem_page_charset_check( ScreemPageModel *model, 
		gint pos, gint length )
{
	ScreemPageModelPrivate *priv;
	gboolean set = FALSE;
	GConfClient *client;

	priv = model->priv;

	if( ! priv->charset ) {
		set = TRUE;
	} else { 
		gint len;

		len = priv->charset_len;

		if( ( pos <= ( priv->charset_pos + len ) ) &&
		    ( pos + length ) >= priv->charset_pos ) {
			set = TRUE;
		}
	}
	if( set ) {
		/* set the charset */
		gchar *text;
		gchar *charset;
		ScreemDTD *dtd;
		GCompareFunc compare;

		dtd = screem_page_get_dtd( priv->page );
		if( dtd ) {
			g_object_get( G_OBJECT( dtd ),
					"compare", &compare, NULL );
		} else {
			compare = (GCompareFunc)strcmp;
		}
		text = screem_page_get_data( priv->page );

		g_free( priv->charset );
		priv->charset = NULL;
		
		charset = screem_markup_get_charset( text, compare,
				&priv->charset_pos,
				&priv->charset_len );
		g_object_set( G_OBJECT( model ), "charset", charset,
				NULL );
	
		g_free( text );
	
		if( ! priv->charset ) {
			client = priv->client;
			priv->charset = 
				gconf_client_get_string( client,
							"/apps/screem/editor/default_charset",
							NULL );
			if( ! priv->charset ) {
				g_get_charset( (const gchar**)&priv->charset );
				priv->charset = g_strdup( model->priv->charset );
			}
			priv->is_default_charset = TRUE;
		}
	}
}

/* NOTE: this should only be called with the gdk lock unclaimed,
 * use it as an idle handler, or from an idle handler */
static gboolean screem_page_model_dtd_set( ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	ScreemDTDDB *db;
	ScreemDTD *dtd;

	gchar *pub;
	gchar *sys;
	gboolean doc_specific;

	const gchar *mime;

	g_return_val_if_fail( SCREEM_IS_PAGE_MODEL( model ), FALSE );
	priv = model->priv;

	priv->dtd_set = 0;
	
	g_free( priv->docroot );
	priv->docroot = NULL;
	
	db = screem_application_get_dtd_db( priv->application );
	dtd = NULL;
	doc_specific = FALSE;
	if( priv->doctype ) {
		gdk_threads_enter();
		dtd = screem_dtd_db_get_dtd_from_doctype( db,
			       priv->doctype );
		gdk_threads_leave();
		doc_specific = ( screem_dtd_db_parse_doctype( db,
					priv->doctype, &pub, &sys,
					&priv->docroot ) != NULL );
		g_free( pub );
		g_free( sys );
	}
	if( ! dtd ) {
		g_free( priv->docroot );
		priv->docroot = NULL;
		mime = screem_page_get_mime_type( priv->page );
		
		gdk_threads_enter();
		dtd = screem_dtd_db_get_default_dtd( db, mime, priv->curroot );
		gdk_threads_leave();
		doc_specific = FALSE;
	}
	if( dtd != priv->dtd ) {
		g_object_set( G_OBJECT( model ), "dtd", dtd, NULL );

		/* if dtd is document specific we have an extra ref,
		 * release it */
		if( dtd && doc_specific ) {
			g_object_unref( dtd );
		}
	}

	return FALSE;
}

/*********************************************/
static gboolean build_tree( ScreemPageModel *model );
static gboolean screem_page_model_clear_tree( GtkTreeModel *model, 
					     GtkTreePath *path,
					     GtkTreeIter *iter, 
					     gpointer data );
static void screem_page_model_build_text_node( ScreemPageModelParser *parser );
static void screem_page_model_build_close( ScreemPageModelParser *parser );
static void screem_page_model_build_open( ScreemPageModelParser *parser );
static void screem_page_model_parser_cleanup( ScreemPageModelParser *parser );
static gboolean screem_page_model_build_step( ScreemPageModelParser *parser );
static void screem_page_model_set_invalid( ScreemPage *page,
				guint start, guint end,
				const gchar *name,
				const gchar *pname,
				const gchar *attr,
				const gchar *value );
static gboolean select_context( ScreemPageModel *model,
				GtkTreeIter **it,
				guint pos,
				guint *start, guint *end,
				gboolean select_text,
				gboolean build );

static void screem_page_model_css_build( ScreemPage *page );


void screem_page_model_build_model( ScreemPageModel *model )
{
	build_tree( model );
}

void screem_page_model_ensure_built( ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	
	priv = model->priv;
	
	
	/* make sure full tree is fully built, we remove
	   the idle handler, but don't set the handle to 0
	   as this will be done by while building in the
	   while loop */
	if( priv->parser.sourceid ) {
		g_source_remove( priv->parser.sourceid );
	}

	/* we need to leave the thread lock as
	 * screem_page_model_build_step() is normally
	 * called via an idle handler */
	gdk_threads_leave();
	while( priv->parser.sourceid ) {
		screem_page_model_build_step( &priv->parser );
	}
	gdk_threads_enter();
}

gboolean screem_page_model_select_context( ScreemPageModel *model,
					  guint pos,
					  guint *start, guint *end,
					  gboolean select_text )
{
	guint tstart;
	guint tend;
	gboolean ret;

	if( ! start ) {
		start = &tstart;
	}
	if( ! end ) {
		end = &tend;
	}

	ret = select_context( model,
			NULL, pos, start, end, select_text, TRUE );

	return ret;
}

gboolean screem_page_model_select_parent_context( ScreemPageModel *model,
		guint pos,
		guint *start, guint *end,
		gboolean select_text )
{
	ScreemPageModelPrivate *priv;
	guint tstart;
	guint tend;
	gboolean ret;
	GtkTreeIter parent;
	
	if( ! start ) {
		start = &tstart;
	}
	if( ! end ) {
		end = &tend;
	}

	ret = FALSE;
	priv = model->priv;
	if( select_context( model, NULL, pos, start, end, select_text,
				TRUE ) && priv->context ) {
		if( gtk_tree_model_iter_parent( GTK_TREE_MODEL( model ), 
					&parent,
					priv->context ) ) {
			gtk_tree_model_get( GTK_TREE_MODEL( model ), 
					&parent,
					SCREEM_PAGE_MODEL_START, start, 
					SCREEM_PAGE_MODEL_END, end,
					-1 );
			ret = TRUE;
		}
		gtk_tree_iter_free( priv->context );
		priv->context = NULL;
	}

	return ret;
}

gboolean screem_page_model_select_content( ScreemPageModel *model,
					  guint pos,
					  guint *start, guint *end,
					  gboolean select_text )
{
	ScreemPageModelPrivate *priv;
	guint tstart;
	guint tend;
	gboolean ret;
	GtkTreeIter child;
	
	if( ! start ) {
		start = &tstart;
	}
	if( ! end ) {
		end = &tend;
	}

	ret = FALSE;
	priv = model->priv;
	if( select_context( model, NULL, pos, start, end, select_text,
				TRUE ) && priv->context ) {
		if( gtk_tree_model_iter_children( GTK_TREE_MODEL( model ), 
					&child,
					priv->context ) ) {
			gtk_tree_model_get( GTK_TREE_MODEL( model ), 
					&child,
					SCREEM_PAGE_MODEL_START, start, 
					SCREEM_PAGE_MODEL_END, end,
					-1 );
			ret = TRUE;
			while( gtk_tree_model_iter_next( GTK_TREE_MODEL( model ),
						&child ) ) {
				gtk_tree_model_get( GTK_TREE_MODEL( model ), 
					&child,
					SCREEM_PAGE_MODEL_END, end,
					-1 );
			}
		}
		gtk_tree_iter_free( priv->context );
		priv->context = NULL;
	}

	return ret;
}

gchar *screem_page_model_query_context( ScreemPageModel *model,
		guint pos, gboolean query_text,
		gboolean build, 
		guint *depth, guint *start, guint *end,
		xmlNodePtr *node )
{
	ScreemPageModelPrivate *priv;
	guint tstart;
	guint tend;
	guint tdepth;
	gchar *ret;
	GtkTreePath *path;
	xmlNodePtr tnode;

	if( ! start ) {
		start = &tstart;
	}
	if( ! end ) {
		end = &tend;
	}
	if( ! depth ) {
		depth = &tdepth;
	}
	*depth = 0;
	if( ! node ) {
		node = &tnode;
	}
	*node = NULL;

	priv = model->priv;
	ret = NULL;

	/* FIXME: priv->context should always be non NULL
	   if select_context() returns TRUE, 
	   however we don't handle <!DOCTYPE ... [ .. ]>
	   very well, and can cause it to be NULL, at least I think
	   that is what is causing a crash */
	if( select_context( model, NULL, pos, start, end, query_text,
				build ) && priv->context ) {

		gtk_tree_model_get( GTK_TREE_MODEL( model ), 
				priv->context,
				SCREEM_PAGE_MODEL_NAME, &ret, 
				SCREEM_PAGE_MODEL_NODE, node,
				-1 );

		path = gtk_tree_model_get_path( GTK_TREE_MODEL( model ),
					priv->context );

		/* -1 as we are always under the [document] node */
		*depth = gtk_tree_path_get_depth( path );
		if( *depth > 0 ) {
			(*depth) --;
		}
		gtk_tree_path_free( path );

		gtk_tree_iter_free( priv->context );
		priv->context = NULL;
	}

	return ret;
}

static gboolean build_tree( ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	ScreemPage *page;
	gboolean feature_markup;
	ScreemPageModelParser *parser;
	GtkTextTagTable *table;
	GtkTextTag *tag;
	GtkTextIter it;
	GtkTextIter eit;
	const gchar *mime_type;
	
	priv = model->priv;
	page = priv->page;
	
	gdk_threads_enter();
	
	screem_page_model_emit_building( page );

	table = gtk_text_buffer_get_tag_table( GTK_TEXT_BUFFER( page ) );
	tag = gtk_text_tag_table_lookup( table,
			SCREEM_INVALID_MARKUP_TAG );
	if( tag ) {
		gtk_text_buffer_get_start_iter( GTK_TEXT_BUFFER( page ),
				&it );
		gtk_text_buffer_get_end_iter( GTK_TEXT_BUFFER( page ),
				&eit );
		gtk_text_buffer_remove_tag( GTK_TEXT_BUFFER( page ),
				tag, &it, &eit );
	}
	/* this will stop any current build + cleanup after it */
	screem_page_model_parser_cleanup( &priv->parser );

	/* no data needs freeing with the current code.

	gtk_tree_model_foreach( GTK_TREE_MODEL( model ), 
				screem_page_model_clear_tree, 
				NULL );
	*/

	gtk_tree_store_clear( GTK_TREE_STORE( model ) );
	
	mime_type = screem_page_get_mime_type( page );
	feature_markup = screem_page_is_markup( page );

	if( feature_markup ) {
		/* init parser */
		parser = &priv->parser;
		parser->page = page;
		parser->xml = screem_page_is_xml( page );
		parser->dtd = screem_page_get_dtd( page );
		if( parser->dtd ) {
			g_object_get( G_OBJECT( parser->dtd ),
					"compare", &parser->compare, 
					NULL );
		} else {
			gdk_threads_leave();
			return FALSE;
	/*		parser->compare = (GCompareFunc)strcmp;*/
		}
		parser->model = GTK_TREE_MODEL( model );
		gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
					&parser->document, NULL );
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ), 
				    &parser->document,
				    SCREEM_PAGE_MODEL_NAME,
				    _( "[Document]" ),
				    SCREEM_PAGE_MODEL_VALID, TRUE,
				    -1 );
		gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
					&parser->editable,
					NULL ); 
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					&parser->editable,
					SCREEM_PAGE_MODEL_NAME,
					_( "[Regions]" ),
					SCREEM_PAGE_MODEL_VALID, TRUE,
					-1 );
			
		gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
					&parser->external,
					NULL );
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					&parser->external,
					SCREEM_PAGE_MODEL_NAME,
					_( "[Links]" ),
					SCREEM_PAGE_MODEL_VALID, TRUE,
					-1 );
		
		gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
					&parser->styles,
					NULL );
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					&parser->styles,
					SCREEM_PAGE_MODEL_NAME,
					_( "[Styles]" ),
					SCREEM_PAGE_MODEL_VALID, TRUE,
					-1 );
		
		parser->parent = &parser->document;
		parser->pclose = SCREEM_DTD_MUST;
		parser->text = screem_page_get_data( page );
		parser->offset_text = parser->text;
		parser->last = gtk_text_buffer_get_char_count( GTK_TEXT_BUFFER( page ) ); 

		parser->doc = xmlNewDoc( (const xmlChar*)XML_DEFAULT_VERSION );
		parser->cur = NULL;
		
		/* add parser idle handler */
		g_free( priv->curroot );
		priv->curroot = NULL;
		parser->sourceid = g_idle_add_full( G_PRIORITY_LOW,
				(GSourceFunc)screem_page_model_build_step,
				parser, NULL );
	} else if( ! strcmp( "text/css", mime_type ) ) {
		screem_page_model_css_build( page );
	} else {
		screem_page_model_emit_built( page );
	}
		
	gdk_threads_leave();
	
	return FALSE;
}

static void screem_page_model_css_selector( CRDocHandler *handler,
					CRSelector *selector )
{
	guchar *sel;
	ScreemPageModelCSSParser *data;
	guint start;
	guint len;
	
	data = handler->app_data;

	if( data->current ) {
		gtk_tree_iter_free( data->current );
		data->current = NULL;
	}
	data->current = g_new0( GtkTreeIter, 1 );

	gtk_tree_store_append( GTK_TREE_STORE( data->model ),
				data->current, data->parent );
	sel = cr_selector_to_string( selector );
	start = selector->location.byte_offset + data->offset;

	/* convert start to char offset */
	start = g_utf8_pointer_to_offset( data->buf,
			data->buf + start );
	len = g_utf8_strlen( (gchar*)sel, -1 );

	gtk_tree_store_set( GTK_TREE_STORE( data->model ),
			data->current,
			SCREEM_PAGE_MODEL_NAME, sel,
			SCREEM_PAGE_MODEL_START, start,
			SCREEM_PAGE_MODEL_END, start + len,
			-1 );

	g_free( sel );
}

static void screem_page_model_css_end_selector( CRDocHandler *handler,
		CRSelector *selector )
{
	ScreemPageModelCSSParser *data;
	CRParsingLocation *location;
	guint end;
	
	location = &selector->location;
	
	data = handler->app_data;
	
	if( 0 && data->current ) { 
		end = location->byte_offset + data->offset + 1;
	
		/* convert end to char offset */
		end = g_utf8_pointer_to_offset( data->buf,
			data->buf + end );
	
		gtk_tree_store_set( GTK_TREE_STORE( data->model ),
			data->current,
			SCREEM_PAGE_MODEL_END, end,
			-1 );
	}
}

static void screem_page_model_css_property( CRDocHandler *handler,
					CRString *name,
					CRTerm *value,
					gboolean important )
{
	GtkTreeIter it;
	GtkTreeIter vit;
	const gchar *pname;
	guchar *val;
	ScreemPageModelCSSParser *data;

	guint start;
	guint end;
	GString *tmp;

	data = handler->app_data;
	
	if( data->current ) {
		pname = cr_string_peek_raw_str( name );

		start = name->location.byte_offset + data->offset;

		/* convert start to char offset */
		start = g_utf8_pointer_to_offset( data->buf,
			data->buf + start );
		
		gtk_tree_store_append( GTK_TREE_STORE( data->model ),
				&it, data->current );
		gtk_tree_store_set( GTK_TREE_STORE( data->model ), &it,
				SCREEM_PAGE_MODEL_NAME, 
				cr_string_peek_raw_str( name ),
				SCREEM_PAGE_MODEL_START, start,
				-1 );

		start = value->location.byte_offset + data->offset;
		end = start;
		start = g_utf8_pointer_to_offset( data->buf,
			data->buf + start );
	
		tmp = g_string_new( NULL );
		for( ; value; value = value->next ) {
			val = cr_term_to_string( value );
			if( ! tmp->len ) {
				g_string_append( tmp, (gchar*)val );
			}
			if( end != 0 ) {
				end = value->location.byte_offset;
				end += data->offset;
				end += strlen( (gchar*)val );
			}
			g_free( val );
		}
		end = g_utf8_pointer_to_offset( data->buf,
				data->buf + end );
	
		gtk_tree_store_append( GTK_TREE_STORE( data->model ),
				&vit, &it );
		gtk_tree_store_set( GTK_TREE_STORE( data->model ), &vit,
				SCREEM_PAGE_MODEL_NAME, tmp->str,
				SCREEM_PAGE_MODEL_START, start,
				SCREEM_PAGE_MODEL_END, end, 
				-1 );

		gtk_tree_store_set( GTK_TREE_STORE( data->model ), &it,
				SCREEM_PAGE_MODEL_END, end,
				-1 );
	}
}

static void screem_page_model_css_import( CRDocHandler *handler,
		GList *media_list, CRString *uri, CRString *ns,
		CRParsingLocation *location )
{
	ScreemPageModelCSSParser *data;
	GtkTreeIter it;
	GtkTreeIter vit;
	guint start;
	guint end;
	guint tmp;
	CRString *tmpcr;

	data = handler->app_data;

	start = location->byte_offset + data->offset;
	start = g_utf8_pointer_to_offset( data->buf,
			data->buf + start );
	
	gtk_tree_store_append( GTK_TREE_STORE( data->model ),
			&it, NULL );
	gtk_tree_store_set( GTK_TREE_STORE( data->model ),
			&it,
			SCREEM_PAGE_MODEL_NAME, "@import",
			SCREEM_PAGE_MODEL_START, start,
			SCREEM_PAGE_MODEL_END, 
			start + strlen( "@import" ),
			-1 );

	start = end = uri->location.byte_offset + data->offset;
	start = g_utf8_pointer_to_offset( data->buf,
			data->buf + start );

	end += cr_string_peek_raw_str_len( uri );

	for( ; media_list; media_list = media_list->next ) {
		tmpcr = (CRString*)media_list->data;
	
		tmp = tmpcr->location.byte_offset;
		if( tmp > end ) {
			end = tmp;
			end += cr_string_peek_raw_str_len( tmpcr );
		}
	}
	end = g_utf8_pointer_to_offset( data->buf,
			data->buf + end );
	
	gtk_tree_store_append( GTK_TREE_STORE( data->model ),
			&vit, &it );
	gtk_tree_store_set( GTK_TREE_STORE( data->model ),
			&vit,
			SCREEM_PAGE_MODEL_NAME, 
			cr_string_peek_raw_str( uri ),
			SCREEM_PAGE_MODEL_START, start,
			SCREEM_PAGE_MODEL_END, end, 
			-1 );
	gtk_tree_store_set( GTK_TREE_STORE( data->model ),
			&it,
			SCREEM_PAGE_MODEL_END, end, 
			-1 );
}

static void screem_page_model_css_build_real( ScreemPage *page,
		gchar *buf, ScreemPageModelCSSParser *data )
{
	CRParser *parser;
	CRInput *input;
	CRDocHandler *sac;

	guint len;
	
	/* we want byte length, not char length */
	len = strlen( buf );
	
	input = cr_input_new_from_buf( (guchar *)buf, 
			len, CR_UTF_8, TRUE );
	data->input = input;
	data->page = page;
	parser = cr_parser_new_from_input( input );
	if( parser ) {
		sac = cr_doc_handler_new();
		if( sac ) {
			sac->start_document = NULL;
			sac->end_document = NULL;
			sac->start_selector = screem_page_model_css_selector;
			sac->property = screem_page_model_css_property;
			sac->end_selector = screem_page_model_css_end_selector;
			sac->import_style = screem_page_model_css_import;
			sac->app_data = data;

			cr_parser_set_sac_handler( parser, sac );

			cr_parser_parse( parser );
		}
		cr_parser_destroy( parser );
	}
	/* input and sac are destroyed by cr_parser_destroy,
	 * do not destroy them here */
	if( data->offset == 0 ) {
		screem_page_model_emit_built( page );
	}
	if( data->current ) {
		g_free( data->current );
	}
}

static void screem_page_model_css_build( ScreemPage *page )
{
	gchar *buf;
	ScreemPageModelCSSParser data;

	buf = screem_page_get_data( page );

	data.model = screem_page_get_model( page );
	data.current = NULL;	
	data.parent = NULL;
	data.offset = 0;
	data.buf = buf;

	screem_page_model_css_build_real( page, buf, &data );
}

static gboolean screem_page_model_clear_tree( GtkTreeModel *model, 
					     GtkTreePath *path,
					     GtkTreeIter *iter, 
					     gpointer data )
{

	return FALSE;
}

static void screem_page_model_build_text_node( ScreemPageModelParser *parser )
{
	/* add text node */
	GtkTreeIter tit;
	gchar *txt;
	gboolean valid;

	xmlNodePtr node;
	
	txt = g_strndup( parser->poffset_text,
			g_utf8_offset_to_pointer( parser->poffset_text,
				parser->pos - parser->ppos ) -
			parser->poffset_text );

	/* FIXME: add xmlNode, this doesn't do what we want really,
	 * \r gets converted to an entity for some reason,
	 * also xmlStringGetNodeList() doen't know about
	 * the entities that the doctype in use may be using */
	if( parser->cur ) {
		node = xmlStringGetNodeList( parser->doc, 
				(xmlChar*)txt );
		xmlAddChild( parser->cur, node );
	}

	/* if this text is in a <style> parse as css instead
	 * of text 
	 * 
	 * TODO?
	 *
	 * If we are in a <script> we could possibly
	 * split the text node based on blocks, 
	 * e.g. { } 
	 */
	if( parser->pname && 
		! parser->compare( "style", parser->pname ) ) {

		ScreemPageModelCSSParser data;

		data.model = parser->model;
		data.current = NULL;	
		data.parent = parser->parent;
		data.offset = parser->ppos; 
		data.buf = parser->poffset_text;
		
		screem_page_model_css_build_real( parser->page,
					g_strdup( txt ), &data );
		/* prevent addition as a text node */
		*txt = '\0';
	}
		
	/* chomp / chop any white space, if all 
	   white space then we won't even bother
	   putting it in the tree, FIXME: in XML white space does 
	   matter, so don't strip it in XML documents, 
	   this leads to text
	   nodes being inserted that are marked as invalid though */
	if( 1 /*! parser->xml*/ ) {
		txt = g_strstrip( txt );
	}

	if( *txt != '\0' ) {
		valid = screem_dtd_valid_child_element( parser->dtd, 
				parser->pelem, "PCDATA" );
		if( ! valid ) {
			valid = screem_dtd_valid_child_element( parser->dtd,
						     parser->pelem,
						     "CDATA");
		}

		gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
				       &tit, parser->parent );
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ), 
				    &tit,
				    SCREEM_PAGE_MODEL_NAME,
				    _( "[text]" ),
				    SCREEM_PAGE_MODEL_TEXT,
				    txt,
				    SCREEM_PAGE_MODEL_START,
				    parser->ppos,
				    SCREEM_PAGE_MODEL_END,
				    parser->pos,
				    SCREEM_PAGE_MODEL_VALID,
				    valid,
				    -1 );
	}
	g_free( txt );
}

static void screem_page_model_build_close( ScreemPageModelParser *parser )
{
	ScreemPageModel *pmodel;
	gboolean match;
	gboolean valid;
	guint start;
	guint end;
	gchar *name;
	gchar *pname;

	GtkTreeIter it;
	
	pmodel = SCREEM_PAGE_MODEL( parser->model );

	/* pop stack, change parent until
	   we hit the correct close tag or
	   we run out of stack */
	match = FALSE;
	g_free( parser->pname );
	parser->pname = NULL;
	end = parser->pos;
	while( parser->stack && ! match ) {		
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
				    parser->parent,
				   SCREEM_PAGE_MODEL_END, 
				   end, -1 );
		gtk_tree_model_get( GTK_TREE_MODEL( parser->model ),
				parser->parent,
				SCREEM_PAGE_MODEL_NAME, &name,
				SCREEM_PAGE_MODEL_VALID, &valid, 
				SCREEM_PAGE_MODEL_START, &start,
				-1 );
		
		pname = NULL;
		if( gtk_tree_model_iter_parent( GTK_TREE_MODEL( parser->model ),
					&it, parser->parent ) ) {
			gtk_tree_model_get( GTK_TREE_MODEL( parser->model ),
					&it, SCREEM_PAGE_MODEL_NAME, &pname,
					-1 );
		}

		parser->stack = g_slist_remove( parser->stack, 
					parser->stack->data );
		match = ! parser->compare( parser->name + 1, name );

		if( parser->pclose == SCREEM_DTD_MUST && ! match ) {
			valid = FALSE;
			gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					parser->parent,
					SCREEM_PAGE_MODEL_VALID,
					valid, -1 );
		}

		if( ( ! valid ) && pmodel->priv->highlight ) {
			screem_page_model_set_invalid( pmodel->priv->page,
					start, end, name, pname,
					NULL, NULL );
		}
		g_free( pname );
		
		g_free( name );
		gtk_tree_iter_free( parser->parent );
		if( parser->stack ) {
			parser->parent = parser->stack->data;
			parser->cur = parser->cur->parent;
	
			if( parser->parent != &parser->document ) {
				g_free( parser->pname );
				gtk_tree_model_get( parser->model,
					  parser->parent,
					  SCREEM_PAGE_MODEL_NAME,
					  &parser->pname, -1 );
				parser->pelem = screem_dtd_valid_element( parser->dtd,
							parser->pname );
				if( ! parser->xml ) {
					parser->pclose = screem_dtd_element_close_element_state( parser->dtd,
							 parser->pelem );
				}
			}
		}
	}
	g_free( parser->pname );
	parser->pname = NULL;
	parser->pelem = NULL;
	if( ! parser->stack ) {
		parser->parent = &parser->document;
		parser->pclose = SCREEM_DTD_MUST;
		parser->cur = NULL;
	} else if( parser->parent != &parser->document ) {
		gtk_tree_model_get( parser->model,
				  parser->parent,
				  SCREEM_PAGE_MODEL_NAME,
				  &parser->pname, -1 );
		parser->pelem = screem_dtd_valid_element( parser->dtd,
						parser->pname );
		if( ! parser->xml ) {
			parser->pclose = screem_dtd_element_close_element_state( parser->dtd,
							 parser->pelem );
		}
	}
}

static void screem_page_model_build_open( ScreemPageModelParser *parser )
{
	ScreemPageModel *pmodel;
	gchar firstc;
	gboolean valid;
	gboolean iscomment;
	ScreemDTDTagExistance close_state;
	GtkTreeIter it;
	GtkTreeIter sit;
	GSList *attrs;
	GSList *tmp;
	gchar *name;
	gchar *value;
	const ScreemDTDElement *elem;
	
	const gchar *xmlclosecheck;
	gboolean hasuri;
	
	xmlNodePtr node;
	gboolean xml;
	
	xml = parser->xml;
	pmodel = SCREEM_PAGE_MODEL( parser->model );
	
	firstc = *parser->name;
	
	if( ( iscomment = ! strcmp( "!--", parser->name ) ) ) {
		g_free( parser->name );
		parser->name = g_strdup( _( "[Comment]" ) );
	}

	elem = screem_dtd_valid_element( parser->dtd, parser->name );

	if( xml ) {
		close_state = SCREEM_DTD_MUST;
	} else {
		close_state = screem_dtd_element_close_element_state( parser->dtd,
				elem );
	}

	valid = FALSE;
	node = NULL; /* shut gcc up, it can not be used uninitialised */
	switch( firstc ) {
		case '!':
		case '?':
		case '%':
			valid = TRUE;
			break;
		case '[':
			if( ! strcmp( "CDATA]", parser->name + 1 ) ) {
				firstc = '!';
				valid = screem_dtd_valid_child_element( parser->dtd,
						parser->pelem, "CDATA" );
				if( ! valid ) {
					valid = screem_dtd_valid_child_element( parser->dtd,
						parser->pelem, "PCDATA" );
				}
				break;
			}
			/* deliberate fall through */
		default:
			if( parser->parent == &parser->document &&
				! parser->gotroot && elem &&
				( ( pmodel->priv->docroot &&
				  ! parser->compare( pmodel->priv->docroot, 
					     parser->name ) ) ||
				screem_dtd_is_root_element( parser->dtd, 
					parser->name ) ) ) {
				valid = TRUE;
				parser->gotroot = TRUE;
			} else if( elem && parser->pelem ) {
				valid = screem_dtd_valid_child_element( parser->dtd,
						parser->pelem,
						parser->name );
			}
			break;
	}
		
	/* if closing the parent tag is optional, and 
	   valid is FALSE, pop the parent */
	while( parser->parent != &parser->document && 
		parser->pclose == SCREEM_DTD_SHOULD && ! valid ) {
		/* assume this closes the parent */
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
				parser->parent, SCREEM_PAGE_MODEL_END, 
				parser->start, -1 );
		parser->stack = g_slist_remove( parser->stack, 
						parser->stack->data );
		gtk_tree_iter_free( parser->parent );
		if( parser->stack ) {
			parser->parent = parser->stack->data;
		
			parser->cur = parser->cur->parent;
			
			g_free( parser->pname );
			gtk_tree_model_get( parser->model, 
					parser->parent,
					SCREEM_PAGE_MODEL_NAME,
					&parser->pname, -1 );
			parser->pelem = screem_dtd_valid_element( parser->dtd,
						parser->pname );
			parser->pclose = screem_dtd_element_close_element_state( parser->dtd, parser->pelem );
			valid = screem_dtd_valid_child_element( parser->dtd,
					parser->pelem,
					parser->name );
		} else {
			parser->parent = &parser->document;
			parser->cur = NULL;
			g_free( parser->pname );
			parser->pname = NULL;
			parser->pelem = NULL;
		}
	}
			
	/* we are dealing with an opening tag */
	gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
			       &it, parser->parent );
	
	/* set values for the iter */
	if( iscomment ) {
		GtkTreeIter rit;
		gint n;
		gchar *ed_name;
		guint len;

		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
				&it,
				SCREEM_PAGE_MODEL_NAME, parser->name,
				SCREEM_PAGE_MODEL_START,parser->start,
				SCREEM_PAGE_MODEL_END, parser->end,
				SCREEM_PAGE_MODEL_TEXT, parser->tag,
				-1 );

		/* is it an editable region comment ? */
		if( g_str_has_prefix( parser->tag, 
					"<!-- #BeginEditable " ) ) {
			/* it is */
			gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
					&rit, &parser->editable );
			ed_name = find_text( parser->tag, "\"[^\"]*\"",
						NULL, &len );
			ed_name = g_strndup( ed_name, len );

			gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					&rit,
					SCREEM_PAGE_MODEL_NAME, ed_name,
					SCREEM_PAGE_MODEL_START, 
					parser->start,
					SCREEM_PAGE_MODEL_END,
					parser->end,
					SCREEM_PAGE_MODEL_VALID, TRUE,
					-1 );
			g_free( ed_name );
		} else if( g_str_has_prefix( parser->tag,
					"<!-- #EndEditable " ) ) {
			/* it ends the last one */
			n = gtk_tree_model_iter_n_children( parser->model,
					&parser->editable );
			if( ( n > 0 ) &&
			    gtk_tree_model_iter_nth_child( parser->model,
				    &rit, &parser->editable, n - 1 ) ) {
				gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
							&rit,
							SCREEM_PAGE_MODEL_END,
							parser->end,
							-1 );
			}
		}
	} else {
		/* add xmlNode */
		node = xmlNewNode( NULL, (xmlChar*)parser->name );
		if(  parser->cur ) {
			xmlAddChild( parser->cur, node );
		} else {
			xmlDocSetRootElement( parser->doc, node );
		}
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
				&it,
				SCREEM_PAGE_MODEL_NAME, parser->name,
				SCREEM_PAGE_MODEL_START,parser->start,
				SCREEM_PAGE_MODEL_END, parser->end,
				SCREEM_PAGE_MODEL_NODE, node,
				-1 );
	}

	/* check attributes + validate 
	 *
	 * TODO: emit the invalid signal only across the positions
	 * for invalid attributes, instead of invalidating the whole
	 * tag
	 *
	 * */
	if( elem ) {
		hasuri = FALSE;
		attrs = screem_markup_build_attributes_list( parser->tag, NULL );
		for( tmp = attrs; tmp; tmp = tmp->next ) {
			value = tmp->data;
			tmp = tmp->next;
			name = tmp->data;

			/* set xmlNode prop */
			if( ! value ) {
				value = g_strdup( name );
			}
			/* FIXME: 
			 * hmm seems to be causing massive leaking
			 * here */
			xmlSetProp( node, (xmlChar*)name, 
					(xmlChar*)value );
			
			valid &= ( screem_dtd_valid_element_attr( parser->dtd,
							elem,
							name ) != NULL );
			hasuri |= screem_dtd_attr_is_uri( parser->dtd,
					parser->name, name );
			g_free( name );
			g_free( value );
		}
		g_slist_free( attrs );
		if( hasuri ) {
			GtkTreeIter lit;
			gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
					&lit, &parser->external );
			gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					&lit,
					SCREEM_PAGE_MODEL_VALID, valid,
					SCREEM_PAGE_MODEL_NAME, 
					parser->name,
					SCREEM_PAGE_MODEL_START,
					parser->start,
					SCREEM_PAGE_MODEL_END, parser->end,
					SCREEM_PAGE_MODEL_NODE, node,
					-1 );
		}
	}
	gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
				&it,
				SCREEM_PAGE_MODEL_VALID, valid,
				-1 );
	if( ( ! valid ) && pmodel->priv->highlight ) {
		screem_page_model_set_invalid( pmodel->priv->page,
					parser->start, 
					parser->end, 
					parser->name, 
					parser->pname,
					NULL, NULL );
	}
	if( ! parser->compare( "style", parser->name ) ) {
		value = (gchar*)xmlGetProp( node, (const xmlChar*)"type" );
		if( ! value ) {
			value = (gchar*)xmlGetProp( node, 
					(const xmlChar*)"TYPE" );
		}
		if( value && ! parser->compare( "text/css", 
					(const gchar*)value ) ) {
			gtk_tree_store_append( GTK_TREE_STORE( parser->model ),
						&sit, &parser->styles );
			gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
					&sit,
					SCREEM_PAGE_MODEL_VALID, valid,
					SCREEM_PAGE_MODEL_NAME, 
					parser->name,
					SCREEM_PAGE_MODEL_START,
					parser->start,
					SCREEM_PAGE_MODEL_END, 
					parser->end,
					SCREEM_PAGE_MODEL_NODE, node,
					-1 );
		}
		g_free( value );
	}

	/* put it on the stack as a parent if needed,
	   we don't need to for:
	   1) comments / doctypes
	   2) tags closing themselves, eg <meta /> 
	   3) HTML tags which forbid closing, same as
	      2 but we need to lookup in the doctype */

	/* 2 g_utf8_prev_char() is safe, as offset_text is end + 1,
	 * and end must be > start therefore -2 >= start */
	xmlclosecheck = parser->offset_text;
	xmlclosecheck = g_utf8_prev_char( xmlclosecheck );
	xmlclosecheck = g_utf8_prev_char( xmlclosecheck );
	if(  ( firstc != '!' ) && ( firstc != '?' ) &&
	     ( firstc != '%' ) &&
	    ( *xmlclosecheck != '/' ) &&
		( close_state != SCREEM_DTD_MUST_NOT ) ) {
		parser->parent = gtk_tree_iter_copy( &it );
		parser->stack = g_slist_prepend( parser->stack, 
						parser->parent );
		g_free( parser->pname );
		gtk_tree_model_get( parser->model,
				  parser->parent,
				  SCREEM_PAGE_MODEL_NAME,
				  &parser->pname, -1 );
		parser->pelem = screem_dtd_valid_element( parser->dtd,
						parser->pname );
		if( ! xml ) {
			parser->pclose = screem_dtd_element_close_element_state( parser->dtd, parser->pelem );
		}
	
		/* set xml parent */
		parser->cur = node;
	}
}

static void screem_page_model_parser_cleanup( ScreemPageModelParser *parser )
{
	GSList *cleanup;
	GtkTreeModel *model;

	model = parser->model;
	
	g_free( parser->pname );
	parser->pelem = NULL;

	/* cleanup the stack */
	cleanup = parser->stack;
	while( cleanup ) {
		gtk_tree_store_set( GTK_TREE_STORE( parser->model ),
				   parser->parent,
				   SCREEM_PAGE_MODEL_END, 
				   parser->pos, -1 );
		cleanup = g_slist_remove( cleanup, cleanup->data ); 
		if( parser->parent != &parser->document &&
		    parser->parent != NULL ) {
			gtk_tree_iter_free( parser->parent );
		}
		if( cleanup ) {
			parser->parent = cleanup->data;
		}
	}
	g_free( parser->text );
	parser->offset_text = NULL;

	if( parser->doc ) {
		xmlFreeDoc( parser->doc );
	}
	
	if( parser->sourceid ) {
		g_source_remove( parser->sourceid );
		parser->sourceid = 0;
	}

	memset( parser, 0, sizeof( ScreemPageModelParser ) );

	parser->model = model;
}

static gboolean screem_page_model_build_step( ScreemPageModelParser *parser )
{
	gboolean ret;
	ScreemPageModel *pmodel;
	ScreemPageModelPrivate *priv;
	
	/*clock_t start;
	clock_t end;
	
	start = clock();
	while( ret ) {*/
	
	ret = TRUE;
	
	pmodel = SCREEM_PAGE_MODEL( parser->model );
	priv = pmodel->priv;
	
	parser->ppos = parser->pos;

	parser->name = NULL;
	
	parser->start = 0;
	parser->end = 0;
	
	parser->tag = screem_markup_next_tag( parser->offset_text, 
				0, &parser->start, &parser->end, 
				&parser->name );

	if( parser->tag && 
			( ! pmodel->priv->curroot ) && 
			( *parser->name != '!') &&
			( *parser->name != '?') &&
			( *parser->name != '%') ) {
		pmodel->priv->curroot = g_strdup( parser->name );

		/* if the actually root element 
		 * was known until now,
		 * it is possible, if there is no doctype
		 * that we are using an incorrect default
		 * doctype, we should recalculate it, stop
		 * document parsing, and restart with the
		 * recalculated dtd in use,
		 * this isn't ideal doing it like this as
		 * we will call screem_markup_next_tag() twice
		 * for each tag up to the root element. so doctype
		 * will be read twice, any pi elements, comments,
		 * server side language tags etc.  */
		if( ! priv->doctype ) {
			screem_page_model_dtd_set( pmodel );
			parser->dtd = pmodel->priv->dtd;
		}
		g_free( parser->tag );
		g_free( parser->name );
		
		return TRUE;
	}
	
	parser->poffset_text = parser->offset_text;
	parser->offset_text = 
		g_utf8_offset_to_pointer( parser->offset_text, 
					  parser->end );

	parser->start += parser->pos;
	parser->end += parser->pos;
	
	if( ! parser->tag ) {
		parser->pos = parser->last;
	} else {
		parser->pos = parser->start;	
	}

	gdk_threads_enter();

	if( parser->pos > parser->ppos ) {
		screem_page_model_build_text_node( parser );
	}
		
	if( parser->tag ) {
		parser->pos = parser->end;
		if( *parser->name == '/' ) {
			screem_page_model_build_close( parser );
		} else {
			screem_page_model_build_open( parser );
		}
		g_free( parser->tag );
		g_free( parser->name );
	}

	if( parser->pos >= parser->last ) {
		ret = FALSE;
		parser->sourceid = 0;

		g_object_set_data( G_OBJECT( pmodel->priv->page ),
				"xmlDoc", parser->doc );
		screem_page_model_emit_built( pmodel->priv->page );
		g_object_set_data( G_OBJECT( pmodel->priv->page ),
				"xmlDoc", NULL );
		screem_page_model_parser_cleanup( parser );
	}

	gdk_threads_leave();

/*	}
	end = clock();
	g_print( "TIME: %f\n", ( end - start ) * 1.0 / CLOCKS_PER_SEC );*/
	
	return ret;
}

static void screem_page_model_set_invalid( ScreemPage *page,
				guint start, guint end,
				const gchar *name,
				const gchar *pname,
				const gchar *attr,
				const gchar *value )
{
	GtkTextTagTable *table;
	GtkTextTag *tag;
	GtkTextIter it;
	GtkTextIter eit;
	
	table = gtk_text_buffer_get_tag_table( GTK_TEXT_BUFFER( page ) );
	tag = gtk_text_tag_table_lookup( table, 
			SCREEM_INVALID_MARKUP_TAG );
	if( tag ) {
		gtk_text_buffer_get_iter_at_offset( GTK_TEXT_BUFFER( page ),
						&it, start );
		gtk_text_buffer_get_iter_at_offset( GTK_TEXT_BUFFER( page ),
						&eit, end );
		gtk_text_buffer_apply_tag( GTK_TEXT_BUFFER( page ), tag,
					&it, &eit );
	}
}

static gboolean select_context( ScreemPageModel *model,
				GtkTreeIter **it,
				guint pos,
				guint *start, guint *end,
				gboolean select_text,
				gboolean build )
{
	ScreemPageModelPrivate *priv;
	GtkTreeModel *tmodel;
	gboolean loop;
	GtkTreeIter *tmp;
	GtkTreeIter root;
	
	priv = model->priv;
	tmodel = GTK_TREE_MODEL( model );

	if( build ) {
		/* leave gdk lock, build_tree() is generally
		 * called via timeout so claims the lock itself */
		gdk_threads_leave();
		build_tree( model );
		gdk_threads_enter();
	}
	screem_page_model_ensure_built( model );
	
	loop = TRUE;
	if( ! it ) {
		if( priv->context ) {
			gtk_tree_iter_free( priv->context );
			priv->context = NULL;
		}

		loop = gtk_tree_model_get_iter_first( tmodel, &root );
		if( loop && gtk_tree_model_iter_has_child( tmodel, &root ) ) {
			GtkTreeIter *parent;
			
			parent = gtk_tree_iter_copy( &root );

			loop = gtk_tree_model_iter_children( tmodel, &root, 
							     parent );
			if( loop ) {
				tmp = &root;
				it = &tmp;
			}
			gtk_tree_iter_free( parent );
		} else {
			loop = FALSE;
		}
	}
	
	while( loop ) {
		gint estart;
		gint eend;

		gtk_tree_model_get( tmodel, *it, 
				SCREEM_PAGE_MODEL_START, &estart,
				SCREEM_PAGE_MODEL_END, &eend, -1 );

		if( pos <= eend && pos >= estart ) {
			/* in here somewhere */
			gboolean istext;
			gchar *txt;

			gtk_tree_model_get( tmodel, *it,
					SCREEM_PAGE_MODEL_TEXT,
					&txt, -1 );
			istext = ( txt != NULL );
			if( istext ) {
				istext = strncmp( "<!", txt, 2 );
			}
			g_free( txt );
			
			if( ! istext || select_text ) {
				*start = estart;
				*end = eend + 1;
				if( priv->context ) {
					gtk_tree_iter_free( priv->context );
				}
				priv->context = 
					gtk_tree_iter_copy( *it );
			}
			
			if( gtk_tree_model_iter_has_child( tmodel, *it ) ) {
				GtkTreeIter child;
				
				if( gtk_tree_model_iter_children( tmodel,
								  &child,
								  *it ) ) {
					GtkTreeIter *cptr;
					
					cptr = &child;
					select_context( model, 
							&cptr, pos,
							start, end,
							select_text,
							FALSE );
				}
			}
			break;
		} else {
			loop = gtk_tree_model_iter_next( tmodel, *it );
		}
	}

	return loop;
}

static void charset_toggle_notify( GConfClient *client, guint cnxn_id,
		GConfEntry *entry, gpointer data )
{
	ScreemPageModel *model;
	ScreemPageModelPrivate *priv;
	const gchar *value;
	
	if( entry->value && entry->value->type != GCONF_VALUE_STRING ) {
		return;
	}
	
	model = SCREEM_PAGE_MODEL( data );
	priv = model->priv;

	if( priv->is_default_charset ) {
		value = gconf_value_get_string( entry->value );
		g_free( priv->charset );
		priv->charset = g_strdup( value );
	}
}

static void highlight_toggle_notify( GConfClient *client, 
		guint cnxn_id,
		GConfEntry *entry, gpointer data )
{
	ScreemPageModel *model;
	ScreemPageModelPrivate *priv;
	
	if( entry->value && entry->value->type != GCONF_VALUE_BOOL ) {
		return;
	}
	
	model = SCREEM_PAGE_MODEL( data );
	priv = model->priv;

	priv->highlight = gconf_value_get_bool( entry->value );
}



/* G Object stuff */
G_DEFINE_TYPE( ScreemPageModel, screem_page_model, GTK_TYPE_TREE_STORE )

static void screem_page_model_finalize( GObject *object );
static void screem_page_model_set_prop( GObject *object, guint prop_id,
				  	const GValue *value, 
					GParamSpec *spec );
static void screem_page_model_get_prop( GObject *object, guint prop_id,
				  	GValue *value, GParamSpec *spec );

static void screem_page_model_class_init( ScreemPageModelClass *klass )
{
	GObjectClass *object_class;
	GParamSpec *pspec;
	
	object_class = G_OBJECT_CLASS( klass );

	object_class->finalize = screem_page_model_finalize;
	object_class->get_property = screem_page_model_get_prop;
	object_class->set_property = screem_page_model_set_prop;

	pspec = g_param_spec_object( "application",
			"Application",
			"The Application",
			G_TYPE_OBJECT,
			G_PARAM_READWRITE | G_PARAM_CONSTRUCT );	
	g_object_class_install_property( object_class, PROP_APPLICATION,
			pspec );

	pspec = g_param_spec_object( "page",
			"Page",
			"The Page",
			G_TYPE_OBJECT,
			G_PARAM_READWRITE );
	g_object_class_install_property( object_class, PROP_PAGE, 
			pspec );

	pspec = g_param_spec_object( "dtd",
			"DTD",
			"The page DTD",
			G_TYPE_OBJECT,
			G_PARAM_READWRITE );
	g_object_class_install_property( object_class, PROP_DTD, 
			pspec );

	pspec = g_param_spec_string( "charset",
			"Page character set",
			"The character set of the page",
			"",
			G_PARAM_READWRITE );
	g_object_class_install_property( object_class, PROP_CHARSET,
			pspec );

	pspec = g_param_spec_string( "doctype",
			"Page doctype",
			"The doctype of the page",
			"",
			G_PARAM_READWRITE );
	g_object_class_install_property( object_class, PROP_DOCTYPE,
			pspec );
}

static void screem_page_model_init( ScreemPageModel *model )
{
	ScreemPageModelPrivate *priv;
	GConfClient *client;
	GType model_types[] = {
		G_TYPE_STRING,
		G_TYPE_STRING,
		G_TYPE_UINT,
		G_TYPE_UINT,
		G_TYPE_BOOLEAN,
		GDK_TYPE_PIXBUF,
		G_TYPE_POINTER
	};
	GtkTreeIter it;
	
	priv = model->priv = g_new0( ScreemPageModelPrivate, 1 );
		
	priv->client = client = gconf_client_get_default();
	priv->charset = gconf_client_get_string( client,
			"/apps/screem/editor/default_charset", NULL );
	priv->is_default_charset = TRUE;

	priv->charset_notify = gconf_client_notify_add( client,
			"/apps/screem/editor/default_charset",
			charset_toggle_notify, model, NULL, NULL );
	
	if( ! priv->charset ) {
		g_get_charset( (const gchar**)&priv->charset );
		priv->charset = g_strdup( priv->charset );
	}

	priv->highlight = gconf_client_get_bool( client,
			"/apps/screem/editor/error_highlight",
			NULL );
	priv->highlight_notify = gconf_client_notify_add( client,
			"/apps/screem/editor/error_highlight",
			highlight_toggle_notify, model, NULL, NULL );
	
	gtk_tree_store_set_column_types( GTK_TREE_STORE( model ),
					 SCREEM_PAGE_MODEL_MAX,
					 model_types );

	gtk_tree_store_append( GTK_TREE_STORE( model ), &it, NULL );
	gtk_tree_store_set( GTK_TREE_STORE( model ), &it,
			    SCREEM_PAGE_MODEL_NAME, "[Document]",
			    SCREEM_PAGE_MODEL_VALID, TRUE,
			    -1 );
}

static void screem_page_model_finalize( GObject *object )
{
	ScreemPageModel *model;
	ScreemPageModelPrivate *priv;
	ScreemPageModelParser *parser;
	GSList *cleanup;
	GConfClient *client;

	model = SCREEM_PAGE_MODEL( object );
	priv = model->priv;

	g_free( priv->curroot );
	
	/* cleanup the parser stack, needs special handling here */
	parser = &model->priv->parser;
	cleanup = parser->stack;
	while( cleanup ) {
		cleanup = g_slist_remove( cleanup, cleanup->data ); 
		if( parser->parent != &parser->document ) {
			gtk_tree_iter_free( parser->parent );
		}
		if( cleanup ) {
			parser->parent = cleanup->data;
		}
	}
	screem_page_model_parser_cleanup( parser );

	if( priv->check_idle ) {
		g_source_remove( priv->check_idle );
	}
	if( priv->dtd_set ) {
		g_source_remove( priv->dtd_set );
	}

	client = priv->client;
	gconf_client_notify_remove( client, priv->charset_notify ); 
	gconf_client_notify_remove( client, priv->highlight_notify ); 
	g_object_unref( client );

	if( priv->dtd ) {
		g_object_unref( priv->dtd );
	}
	
	g_free( priv->docroot );
	g_free( priv->charset );
	g_free( priv->doctype );
	g_free( priv );

	G_OBJECT_CLASS( screem_page_model_parent_class )->finalize( object );
}

static void screem_page_model_set_prop( GObject *object, guint prop_id,
					const GValue *value, GParamSpec *spec )
{
	ScreemPageModel *model;
	ScreemPageModelPrivate *priv;
	const gchar *str;
	ScreemDTDDB *db;
	ScreemDTD *dtd;
	
	model = SCREEM_PAGE_MODEL( object );
	priv = model->priv;

	switch( prop_id ) {
		case PROP_APPLICATION:
			priv->application = SCREEM_APPLICATION( g_value_get_object( value ) );
			db = screem_application_get_dtd_db( priv->application );
			/* hmm, this is done so we have a dtd to work
			 * with all the time, however the application
			 * is only set at creation time, as soon
			 * as the page is loaded / edited a doctype
			 * will be looked for.  does mean the following
			 * isn't needed?  
			priv->dtd = screem_dtd_db_get_default_dtd( db,
					NULL, NULL );
			if( priv->dtd ) {
				g_object_ref( priv->dtd );
			} else {
				g_warning( "Unable to obtain a default dtd\n" );
			}*/
			break;
		case PROP_PAGE:
			priv->page = SCREEM_PAGE( g_value_get_object( value ) );
			screem_page_model_page_set( model );
			break;
		case PROP_DTD:
			dtd = g_value_get_object( value );
			if( dtd != priv->dtd ) {
				if( dtd ) {
					g_object_ref( dtd );
				}
				if( priv->dtd ) {
					g_object_unref( priv->dtd );
				}
				priv->dtd = dtd;
			
				/* set on page, so we can listen
				 * on notify::dtd on ScreemPage
				 * objects */
				g_object_set( G_OBJECT( priv->page ), 
						"dtd",
						priv->dtd, NULL );
			}
			break;
		case PROP_CHARSET:
			g_free( priv->charset );
			priv->charset = NULL;
			str = g_value_get_string( value );
			priv->is_default_charset = TRUE;
			if( str ) {
				priv->charset = g_strdup( str );
				priv->is_default_charset = FALSE;
			}
			break;
		case PROP_DOCTYPE:
			g_free( priv->doctype );
			priv->doctype = NULL;
			str = g_value_get_string( value );
			if( str ) {
				priv->doctype = g_strdup( str );
			}
			if( priv->dtd_set ) {
				g_source_remove( priv->dtd_set );
			}
			priv->dtd_set = g_idle_add( (GSourceFunc)screem_page_model_dtd_set, model );
			break;
		default:
			g_warning( "Unknown property\n" );
	}
}

static void screem_page_model_get_prop( GObject *object, guint prop_id,
				  	GValue *value, GParamSpec *spec )
{
	ScreemPageModel *model;

	model = SCREEM_PAGE_MODEL( object );

	switch( prop_id ) {
		case PROP_APPLICATION:
			g_value_set_object( value, model->priv->application );
			break;
		case PROP_PAGE:
			g_value_set_object( value, model->priv->page );
			break;
		case PROP_DTD:
			g_value_set_object( value, model->priv->dtd );
			break;
		case PROP_CHARSET:
			if( model->priv->charset ) {
				g_value_set_string( value, model->priv->charset );
			}
			break;
		case PROP_DOCTYPE:
			if( model->priv->doctype ) {
				g_value_set_string( value, model->priv->doctype );
			}
			break;
		default:
			break;
	}
}

