#include "os.h"
#include "storage.h"

typedef struct Btree Btree;
typedef struct Btdat Btdat;
typedef struct Btkey Btkey;
typedef struct Btpage Btpage;

typedef ushort u16int;

enum
{
	BtPagesize = 8192,

	BtLeaf = 1<<0,
	BtRoot = 1<<1,
	BtEmpty = 1<<2,

	BtPagehdrsize = 0,	/* BUG */
	BtKeysize = 4+4,
};

struct Btdat
{
	void *a;
	uint n;
};

struct Btkey
{
	Btdat dat;
	ulong ptr;
};

/*
 * All pages in a btree are of the same fixed size.  The size can be
 * configured when the btree is created and is currently limited to 64k. 
 *
 * A btree page contains some number of keys along with pointers
 * to other btree pages or storage locations.  Pointers are stored as 4-byte
 * file offsets.  Page pointers will be page-aligned; storage pointers may not be.
 * Keys are interlaced with the pointers and can have arbitrary sizes.
 * They are treated as opaque data chunks except that they are lexicographically
 * ordered (as memcmp would) for the purposes of the tree structure.
 * The page begins and ends with a pointer.
 * 
 * pointer [4 bytes]
 * key [?? bytes]
 * pointer [4 bytes]
 * key [?? bytes]
 * ...
 * pointer [4 bytes]
 */
struct Btpage
{
	Btpage *hnext;
	Btree *bt;
	uchar *buf;
	ulong addr;
	ulong flags;
	ulong nkey;
	ulong nbuf;
	ulong nbufkeydat;
	Btkey key[1];
};

struct Btree
{
	Btpage *root;
	Btpage *leaf;
	Btpage *hash[256];

	uint pagesz;
	uint maxnkey;
};

static	Btpage*	btallocpage(Btree*);
static	int		btaddkey(Btpage*, Btdat*);
static	int		btcanaddkey(Btpage*, Btdat*);
static	void		btclose(Btree*);
static	Btree*	btcreate(void);
static	int		btdelete(Btree*, Btdat*);
static	void		btdelkey(Btpage*, int);
static	int		btfindkey(Btpage*, Btdat*);
static	void		btincrefpage(Btpage*);
static	int		btinsert(Btree*, Btdat*, Btdat*);
static	int		btkeycmp(Btdat*, Btdat*);
static	int		btlookup(Btree*, Btdat*, Btdat*);
static	Btpage*	btpage(Btree*, ulong);
static	Btpage*	btsplitpage(Btpage*, Btdat*);


#define PHIINV 0.61803398874989484820
static uint
ahash(ulong addr, uint nhash)
{
	return (uint)floor(nhash*fmod(addr*PHIINV, 1.0));
}

static Btpage*
btpage(Btree *bt, ulong addr)
{
	Btpage **l, *p;
	uint h;

	h = ahash(addr, nelem(bt->hash));
	for(l=&bt->hash[h]; *l; l=&(*l)->hnext){
		if((*l)->addr == addr){
			p = *l;
			*l = p->hnext;
			p->hnext = bt->hash[h];
			bt->hash[h] = p;
			return p;
		}
	}

	/* DISK read from disk here */

	return nil;
}

static Btpage*
btallocpage(Btree *bt)
{
	uint h;
	Btpage *p;

	/* DISK choose disk address here */
	/* DISK perhaps reuse page from cache here */

	p = mallocz(sizeof(Btpage)+(bt->maxnkey+1)*sizeof(Btkey)+bt->pagesz, 1);
	if(p == nil)
		return nil;

	p->bt = bt;
	p->buf = (uchar*)p+sizeof(Btpage)+(bt->maxnkey+1)*sizeof(Btkey);
	p->nbuf = BtPagehdrsize;

	p->addr = (ulong)p;	/* DISK use disk address here */
	h = ahash(p->addr, nelem(bt->hash));
	p->hnext = bt->hash[h];
	bt->hash[h] = p;

	return p;
}

/*
 * do we have space to add a key on this page?
 * don't actually do the add.
 *
 * we could check to see if the key is there, and
 * if so allow its ``addition'' even if there is no space.
 */
static int
btcanaddkey(Btpage *p, Btdat *key)
{
	int sz;

	if(p->nkey >= p->bt->maxnkey)
		return 0;

	sz = BtKeysize+key->n;
	if(p->nbuf+sz > p->bt->pagesz)
		return 0;

	return 1;
}

/*
 * return the index of the first p->key that comes at or after key.
 * there is a virtual p->key[p->nkey] that comes after everything.
 */
static int
btfindkey(Btpage *p, Btdat *key)
{
	int i;

	/* BUG fix this to use binary search */
	for(i=0; i<p->nkey; i++)
		if(btkeycmp(&p->key[i].dat, key) >= 0)
			break;
	return i;
}

/*
 * insert the key into this page.
 * return the key slot number.
 */
static int
btaddkey(Btpage *p, Btdat *key)
{
	int i;
	uchar *w;

	assert(btcanaddkey(p, key));

	i = btfindkey(p, key);
	// print("addkey i=%d\n", i);
	if(i < p->nkey && btkeycmp(key, &p->key[i].dat)==0)
		return i;

	/* key not present, should go in slot i */
	p->key[p->nkey+1].ptr = p->key[p->nkey].ptr;	/* ptr without key */
	memmove(&p->key[i+1], &p->key[i], (p->nkey-i)*sizeof(p->key[0]));
	p->nkey++;
	p->nbufkeydat += key->n;
	w = p->buf+p->bt->pagesz-p->nbufkeydat;
	memmove(w, key->a, key->n);
	p->key[i].dat.a = w;
	p->key[i].dat.n = key->n;
	p->nbuf += BtKeysize+key->n;
	return i;
}

/*
 * remove the key in the given slot from this page.
 * the ptrs should have been fixed already.
 */
static void
btdelkey(Btpage *p, int slot)
{
	int i, n;
	uchar *w, *b;

	assert(0 <= slot && slot < p->nkey);

	/* shift key data */
	w = p->key[slot].dat.a;
	n = p->key[slot].dat.n;
	for(i=0; i<p->nkey; i++)
		if(p->key[i].dat.a < w)
			p->key[i].dat.a = (uchar*)p->key[i].dat.a + n;
	p->nbufkeydat -= n;
	b = p->buf + p->bt->pagesz - p->nbufkeydat;
	memmove(b, b-n, w-(b-n));

	/* shift keys */
	memmove(&p->key[slot], &p->key[slot+1], (p->nkey-(slot+1))*sizeof(p->key[0]));
	p->nkey--;
	p->key[p->nkey].ptr = p->key[p->nkey+1].ptr;	/* ptr without key */
	p->nbuf -= BtKeysize+n;
}

/*
 * split a node that has too many keys.
 * the returned page is a newly allocated one
 * that sits to the right of the original page.
 * try to move about 1/3 of the keys to the 
 * new page.  (we leave the left heavier to
 * help sequential insertions.)
 */
static Btpage*
btsplitpage(Btpage *p, Btdat *upkey)
{
	int i, k, s, targ;
	Btpage *np;
	ulong ptr0;

	/* find an approximate 2:1 split */
	if(p->nkey >= p->bt->maxnkey)
		k = (p->nkey*2)/3;
	else{
		s = BtPagehdrsize;
		targ = (p->bt->pagesz*2)/3;
		for(k=0; k<p->nkey; k++){
			s += BtKeysize+p->key[k].dat.n;
			if(s >= targ)
				break;
		}
	}

	/* boundaries; shouldn't happen */
	if(k == 0)
		k++;
	if(k == p->nkey)
		k--;
	if((p->flags&BtLeaf)==0 && k==1 && p->nkey>=3)
		k++;

	/* we need to send a key up. if it's a leaf, we send a copy.*/
	upkey->n = p->key[k-1].dat.n;
	upkey->a = malloc(upkey->n);
	if(upkey->a == nil)
		return nil;
	memmove(upkey->a, p->key[k-1].dat.a, upkey->n);

	ptr0 = 0;
	if((p->flags&BtLeaf)==0){
		ptr0 = p->key[k-1].ptr;
		btdelkey(p, k-1);
		k--;
	}

	np = btallocpage(p->bt);
	np->flags = p->flags;
	if(np == nil){
		free(upkey->a);
		upkey->a = nil;
		return nil;
	}

	/* perform the split */
	/* (there are more efficient ways to do this) */
	while(k < p->nkey){
		i = btaddkey(np, &p->key[k].dat);
		np->key[i].ptr = p->key[k].ptr;
		btdelkey(p, k);
	}

	/*
	 * Fix up right-hand-side pointers.
	 * 
	 * No matter what, the new page gets the old page's rhs pointer.
	 * If these are leaf nodes, the old page links to the new page in the leaf chain.
	 * If these are interior nodes, the rhs of the old page is the pointer
	 * that always followed the currently last key.  (We picked it up as ptr0 above.)
	 */
	np->key[np->nkey].ptr = p->key[p->nkey].ptr;
	if(p->flags&BtLeaf)
		p->key[p->nkey].ptr = np->addr;
	else{
		assert(ptr0 != 0);
		p->key[p->nkey].ptr = ptr0;
	}

	return np;
}

/*
 * Create a fresh btree. 
 */
static Btree*
btcreate(void)
{
	Btree *b;
	Btpage *p;

	if((b = mallocz(sizeof(*b), 1)) == nil)
		return nil;
	b->pagesz = 512;
	b->maxnkey = 8;
	if((p = btallocpage(b)) == nil){
		free(b);
		return nil;
	}
	p->flags |= BtLeaf;
	b->root = p;
	b->leaf = p;
	btincrefpage(p);
	return b;
}

/*
 * return a new reference to the sub page which should contain key.
 */
static Btpage*
btnextpage(Btpage *p, Btdat *key)
{
	Btpage *np;

	np = btpage(p->bt, p->key[btfindkey(p, key)].ptr);
	if(np == nil) // BUG print warning
		return nil;
	return np;
}

/*
 * walk from p to the leaf node responsible for key.
 * if there is no node containing key, return nil.
 */
static Btpage*
btwalk(Btpage *p, Btdat *key)
{
	int i;
	Btpage *np;

	btincrefpage(p);
	while(!(p->flags&BtLeaf)){
		np = btnextpage(p, key);
		p = np;
	}
	if((i=btfindkey(p, key)) < p->nkey && btkeycmp(key, &p->key[i].dat)==0)
		return p;
	else
		return nil;
}

/* 
 * Return the pointer associated with key.
 * If the pointer does not exist, return 0.
 */
static ulong
_btlookup(Btree *bt, Btdat *key)
{
	int i;
	ulong ptr;
	Btpage *p;

	p = btwalk(bt->root, key);
	if(p == nil){
		werrstr("key not found");
		return 0;
	}
	i = btfindkey(p, key);
	assert(0 <= i && i < p->nkey && btkeycmp(key, &p->key[i].dat)==0);
	ptr = p->key[i].ptr;
	return ptr;
}

/* 
 * Write to val the data associated with key, return 0.
 * If the data does not fit or does not exist, return -1.
 */
static int
btlookup(Btree *bt, Btdat *key, Btdat *val)
{
	ulong ptr;

	ptr = _btlookup(bt, key);
	if(ptr == 0)
		return -1;

	// BUG fetch data
	if(val->n < 4){
		werrstr("val buffer too small");
		return 0;
	}
	val->n = 4;
	*(ulong*)val->a = ptr;
	return 0;
}

/* 
 * insert key (with pointer ptr) into the btree rooted at p.
 * return -1 on error, 0 if success, 1 if success but split. 
 */
static int
_btinsertkey(Btpage *p, Btdat *key, ulong ptr, Btdat *klift, ulong *pnaddr)
{
	int i, j;
	ulong nptr;
	Btdat k;
	Btpage *np, *kidp;

	if(p->flags&BtLeaf){
		i = btfindkey(p, key);
		if(i < p->nkey && btkeycmp(&p->key[i].dat, key) == 0){
			// BUG what about the old pointer?
			p->key[i].ptr = ptr;
			return 0;
		}
		if(btcanaddkey(p, key)){
			p->key[btaddkey(p, key)].ptr = ptr;
			return 0;
		}
		np = btsplitpage(p, klift);
		if(np == nil)
			return -1;
		*pnaddr = np->addr;
		if(btkeycmp(key, klift) <= 0)
			p->key[btaddkey(p, key)].ptr = ptr;
		else
			np->key[btaddkey(np, key)].ptr = ptr;
		return 1;
	}else{
		i = btfindkey(p, key);
		kidp = btpage(p->bt, p->key[i].ptr);
		if(kidp == nil)
			return -1;
		j = _btinsertkey(kidp, key, ptr, &k, &nptr);
		switch(j){
		default:
		case -1:
			return -1;
		case 0:
			return 0;
		case 1:
			ptr = p->key[i].ptr;
			p->key[i].ptr = nptr;
			if(btcanaddkey(p, &k)){
				p->key[j=btaddkey(p, &k)].ptr = ptr;
				assert(j==i);
				return 0;
			}
			np = btsplitpage(p, klift);
			if(np == nil)
				return -1;
			*pnaddr = np->addr;
			if(btkeycmp(key, klift) <= 0)
				p->key[btaddkey(p, &k)].ptr = ptr;
			else
				np->key[btaddkey(np, &k)].ptr = ptr;
			return 1;
		}
	}
}

static int
btinsert(Btree *bt, Btdat *key, Btdat *val)
{
	int i;
	ulong ptr, nptr;
	Btdat k;
	Btpage *np;

	// BUG store val somewhere
	assert(val->n == 4);
	ptr = *(ulong*)(val->a);

	switch(_btinsertkey(bt->root, key, ptr, &k, &nptr)){
	case -1:
		return -1;
	case 0:
		return 0;
	case 1:
		np = btallocpage(bt);
		i = btaddkey(np, &k);
		assert(i == 0);
		np->key[0].ptr = bt->root->addr;
		np->key[1].ptr = nptr;
		bt->root = np;
		return 0;
	}
}

typedef struct Btneighbor Btneighbor;
struct Btneighbor
{
	int dir;	/* 1 = left, 2 = right */
	Btpage *p;
	Btpage *anchor;
	int aslot;
};

/*
 * is a page too small to keep around?
 */
static int
bttoosmall(Btpage *p)
{
	return p->nbuf < p->bt->pagesz/2 && p->nkey < p->bt->maxnkey/2;
}

/*
 * merge from into to, if possible.
 * we know for a fact that from and to have the same parent in the tree.
 */
static int
btcanmerge(Btpage *from, Btpage *parent, Btneighbor side, Btneighbor otherside)
{
	Btpage *to;
	Btdat *midkey;

	assert(parent == side.anchor);

	midkey = &side.anchor->key[side.aslot].dat;
	to = side.p;

	if(from->nbuf+to->nbuf-BtPagehdrsize+BtKeysize+midkey->n > from->bt->pagesz)
		return 0;
	if(from->nkey+to->nkey+1 > from->bt->maxnkey)
		return 0;

//print("merge %p %p\n", from, to);
	while(from->nkey > 0){
		to->key[btaddkey(to, &from->key[0].dat)].ptr = from->key[0].ptr;
		btdelkey(from, 0);
	}

	if(from->flags&BtLeaf){
//print("leaf patch %p %p\n", to, otherside.p);
		/* patch chain pointer */
		if(side.dir == 1)	/* to = left of from */
			to->key[to->nkey].ptr = from->key[0].ptr;
		else{			/* to = right of from */
			if(otherside.p)
				otherside.p->key[otherside.p->nkey].ptr = to->addr;
		}
	}else{
		if(side.dir == 1){	/* to = left of from */
			to->key[btaddkey(to, midkey)].ptr = to->key[to->nkey].ptr;
			to->key[to->nkey].ptr = from->key[0].ptr;
		}else
			to->key[btaddkey(to, midkey)].ptr = from->key[0].ptr;
	}
	if(side.dir == 1){
//print("parent fixup left %p\n", parent);
		assert(parent->key[side.aslot+1].ptr == from->addr);
		parent->key[side.aslot+1].ptr = to->addr;
		btdelkey(parent, side.aslot);
	}else{
//print("parent fixup right %p\n", parent);
		assert(parent->key[side.aslot].ptr == from->addr);
		btdelkey(parent, side.aslot);
	}
	return 1;
}

static void
btreplacekey(Btpage *p, int slot, Btdat *key)
{
	int i;
	ulong ptr;

	ptr = p->key[slot].ptr;
	btdelkey(p, slot);
	i = btaddkey(p, key);
	assert(i == slot);
	p->key[slot].ptr = ptr;
}

static void
btshift(Btpage *p, Btneighbor side)
{
	Btpage *a, *q;

//print("shift %p %p [%p]\n", p, side.p, p->bt->root->key[0].ptr);
	/* BUG check that side can handle it */
	q = side.p;
	a = side.anchor;
	while(p->nbuf < q->nbuf){
		if(side.dir == 1){	/* q is left of p */
//print("shift from left [%p]\n", p->bt->root->key[0].ptr);
			if(p->flags&BtLeaf){
				p->key[btaddkey(p, &q->key[q->nkey-1].dat)].ptr = q->key[q->nkey-1].ptr;
				btdelkey(q, q->nkey-1);
				btreplacekey(a, side.aslot, &q->key[q->nkey-1].dat);
			}else{
				p->key[btaddkey(p, &a->key[side.aslot].dat)].ptr = q->key[q->nkey].ptr;
				btreplacekey(a, side.aslot, &q->key[q->nkey-1].dat);
				q->key[q->nkey].ptr = q->key[q->nkey-1].ptr;
				btdelkey(q, q->nkey-1);
			}
//print("done shift from left [%p]\n", p->bt->root->key[0].ptr);
		}else{			/* p is left of q */
//print("shift from right\n");
			if(p->flags&BtLeaf){
				p->key[btaddkey(p, &q->key[0].dat)].ptr = q->key[0].ptr;
				btdelkey(q, 0);
				btreplacekey(a, side.aslot, &p->key[p->nkey-1].dat);
			}else{
				p->key[btaddkey(p, &a->key[side.aslot].dat)].ptr = p->key[p->nkey].ptr;
				p->key[p->nkey].ptr = q->key[0].ptr;
				btreplacekey(a, side.aslot, &q->key[0].dat);
				btdelkey(q, 0);
			}
		}
	}	
}

/*
 * p is currently too small.
 * p->addr == parent->key[i].ptr
 * either merge it with a neighbor or move some keys in.
 *
 * return:
 *	-1 - error
 *	0 - no merge
 *	1 - merged with left
 *	2 - merged with right
 */
static int
btrebalance(Btpage *p, Btpage *parent, Btneighbor left, Btneighbor right)
{
	Btneighbor side, otherside;

//print("rebalance %p\n", p);
	if(parent == nil)	/* p is the tree root */
		return 0;

	/* try to merge with other page with our parent */
	if(left.anchor == parent){
		side = left;
		otherside = right;
	}else if(right.anchor == parent){
		side = right;
		otherside = left;
	}else
		abort();	/* cannot happen */
	if(btcanmerge(p, parent, side, otherside))
		return side.dir;

	/* otherwise, shift from page that is more full. */
	if(left.p!=nil && (right.p==nil || left.p->nbuf > right.p->nbuf))
		side = left;
	else
		side = right;
	btshift(p, side);
	return 0;
}

/*
 * remove key from the btree rooted at p.
 * left is the page immediately to the left of p at this level of the hierarchy
 * right is the page immediately to the right of p at this level of the hierarchy
 * lanchor is the lowest common ancestor of p and left
 * ranchor is the lowest common ancestor of p and right
 *
 * return:
 *	-1 on error
 * 	0 if the delete succeeded and p was not merged
 * 	1 if the delete succeeded and p was eaten by its left neighbor
 * 	2 if the delete succeeded and p was eaten by its right neighbor
 */
static int
_btdeletekey(Btpage *p, Btpage *parent, Btdat *key, Btneighbor left, Btneighbor right)
{
	int i, j;
	Btpage *np;
	Btree *bt;
	Btneighbor nleft, nright;

	bt = p->bt;
	if(p->flags&BtLeaf){
		i = btfindkey(p, key);
		if(i==p->nkey || btkeycmp(&p->key[i].dat, key) != 0)
			return -1; /* key not found */
		// BUG what about ptr?
		btdelkey(p, i);
		if(bttoosmall(p))
			return btrebalance(p, parent, left, right);
		return 0;
	}else{
		/* walk p toward key */
		i = btfindkey(p, key);
		np = btpage(bt, p->key[i].ptr);

		/* walk left toward key */
		nleft.dir = 1;
		if(i>0){
			nleft.p = btpage(bt, p->key[i-1].ptr);
			nleft.anchor = p;
			nleft.aslot = i-1;
		}else if(left.p){
			nleft.p = btpage(bt, left.p->key[left.p->nkey].ptr);
			nleft.anchor = left.anchor;
			nleft.aslot = left.aslot;
		}else{
			nleft.p = nil;
			nleft.anchor = nil;
			nleft.aslot = 0;
		}

		/* walk right toward key */
		nright.dir = 2;
		if(i<p->nkey){
			nright.p = btpage(bt, p->key[i+1].ptr);
			nright.anchor = p;
			nright.aslot = i;
		}else if(right.p){
			nright.p = btpage(bt, right.p->key[0].ptr);
			nright.anchor = right.anchor;
			nright.aslot = right.aslot;
		}else{
			nright.p = nil;
			nright.anchor = nil;
			nright.aslot = 0;
		}

		j = _btdeletekey(np, p, key, nleft, nright);
		switch(j){
		default:
		case -1:
			return -1;
		case 0:
			return 0;
		case 1:	/* left neighbor has taken over np's keys, key i-1 is gone */
			if(bttoosmall(p))
				return btrebalance(p, parent, left, right);
			return 0;
		case 2:	/* right neighbor has taken over np's keys, key i is gone */
			if(bttoosmall(p))
				return btrebalance(p, parent, left, right);
			return 0;		
		}
	}
}

static int
btdelete(Btree *bt, Btdat *key)
{
	Btpage *p;
	Btneighbor left, right;

	memset(&left, 0, sizeof left);
	memset(&right, 0, sizeof right);
	switch(_btdeletekey(bt->root, nil, key, left, right)){
	case -1:
		return -1;
	case 0:
		p = bt->root;
		if(p->nkey == 0 && p->key[0].ptr != 0){
//print("condense root\n");
			bt->root = btpage(bt, p->key[0].ptr);
		}
		return 0;
	case 1:
	case 2:
		abort();
		return -1;
	}
}

