/* ************************************* */
/*   MDawg.cpp                           */
/*   Copyright 2001 Martin L"auter       */
/*   laeuter@mathematik.uni-leipzig.de   */
/* ************************************* */

#include "MDawg.h"
#include <stdlib.h>
#include <string.h>

#define DAWGAD(n) ((NODE *)(((unsigned char *)dawg)+(n)))
#define DAWG(n) (*DAWGAD(n))
#define MDAWGAD(i,n) ((NODE *)(((unsigned char *)(dawgp[i]))+(n)))
#define MDAWG(i,n) (*MDAWGAD(i,n))

void MDawg::LoadFrom(FILE *fin)
{ long info[4];
  if(fin!=NULL) {
    fread(info,sizeof(long),4,fin);
    if(((info[0]&0xff000000)==0xff000000)||(info[0]==DAWG_MAGIC)) {
      if((dawg=(NODE *)malloc(info[1]))!=NULL) {
        fread(dawg,1,info[1],fin);
      } else {
        fprintf(stderr,"Nicht genuegend Speicher fuer DAWG\n");
        exit(1);
      }
    }
    firstnode=DAWGAD(info[2]);
    NrOfWords=info[3];
	NrOfBytes=info[1];
  }
}

void MDawg::SaveTo(FILE *fin)
{ long info[4];
  if(fin!=NULL) {
	info[0]=0xff000001;
    info[1]=NrOfBytes;
	info[2]=(char *)firstnode-(char *)dawg;
	info[3]=NrOfWords;
    fwrite(info,sizeof(long),4,fin);
    fwrite(dawg,1,NrOfBytes,fin);
  }
}

MDawg::MDawg(const char *file)
{ FILE *fin;
  dawg=NULL;
  if((fin=fopen(file,"rb"))!=NULL) {
    LoadFrom(fin);
    fclose(fin);
  } else {
    fprintf(stderr,"Kann DAWG-File (%s) nicht oeffnen!\n",file);
    exit(1);
  }
}

MDawg::MDawg()
{
  dawg=NULL;
  firstnode=NULL;
  NrOfWords=0;
}

MDawg::~MDawg()
{
  if(dawg!=NULL) free(dawg);
}

void MDawg::StartWord()
{
  curnode=firstnode;
}

int MDawg::Letter(unsigned char c)
{ NODE *node;
  node=curnode;
  while(LTR(*node)<c) {
    if(*node&M_END_OF_NODE)  return(0);
    node++;
  }
  if(LTR(*node)==c) {
    curnode=DAWGAD(PTR(*node));
    return((*node&3)|8|((PTR(*node)!=0)<<2));
  }
  return(0);
}

int MDawg::GetState()
{
  return((int)curnode);
}

void MDawg::SetState(int state)
{
  curnode=(NODE *)state;
}

unsigned char MDawg::GetNthLetter(int n)
{ int i;
  i=0;
  while(i<n) {
    if(curnode[i]&M_END_OF_NODE) return('\0');
    i++;
  }
  return(LTR(curnode[i]));
}

int MDawg::Find(unsigned char *str)
{ NODE *node;
  node=firstnode;
  while(*str) {
    do {
      if(LTR(*node)==*str) {
        if(str[1]) {
          if(PTR(*node)==0) return(0);
          goto m1;
        } else {
          return((*node&M_END_OF_WORD)==M_END_OF_WORD);
        }
      }
    } while((*(node++)&M_END_OF_NODE)==0);
    return(0);
m1: node=DAWGAD(PTR(*node));
    str++;
  }
  return(1);
}

int MDawg::Longest(unsigned char *str)
{ NODE *node; int l,lm;
  l=lm=0;
  node=firstnode;
  while(*str) {
    do {
      if(LTR(*node)==*str) {
	    l++;
	    if((*node&M_END_OF_WORD)==M_END_OF_WORD) {
          if(l>lm) lm=l;
	    }
	    if(str[1]=='\0') return(lm);
	    if(PTR(*node)!=0) {
          node=DAWGAD(PTR(*node));
	      goto m2;
	    } else break;
      }
    } while((*(node++)&M_END_OF_NODE)==0);
    return(lm);
m2: str++;
  }
  return(lm);
}

DWGMark *MDawg::NextMark(DWGMark *mark)
{ int i;
  i=mark->len;
  while(0==0) {
    if((mark->flag==0)&&((*(mark->nodes[i])&M_END_OF_WORD)!=0)) {
      mark->flag=1;
      mark->len=i;
      return(mark);
    }
    if((*(mark->nodes[i])&M_END_OF_WORD)==0) {
mnm1: do {
        mark->nodes[i+1]=DAWGAD(PTR(*(mark->nodes[i])));
        i++;
      } while((*(mark->nodes[i])&M_END_OF_WORD)==0);
      mark->flag=1;
      mark->len=i;
      return(mark);
    }
    mark->flag=0;
    if(PTR(*(mark->nodes[i]))!=0) { goto mnm1; }
    while((*(mark->nodes[i])&M_END_OF_NODE)!=0) { if(i--==0) return(NULL); }
    mark->nodes[i]++;
  }
}

DWGMark *MDawg::SetMark(unsigned char *word)
{ DWGMark *mark;
  mark=(DWGMark *)malloc(sizeof(DWGMark));
  if(word==NULL) {
    mark->len=0; mark->flag=0;
    mark->nodes[0]=firstnode;
    mark=MDawg::NextMark(mark);
  } else { NODE *node; int i;
    node=firstnode; i=0;
    while(*word) {
      do {
        if(LTR(*node)==*word) {
          if(word[1]) {
            if(PTR(*node)==0) { free(mark); return(NULL); }
          } else {
            if((*node&M_END_OF_WORD)==M_END_OF_WORD) {
              mark->len=i-1;
              mark->flag=1;
              return(mark);
            } else {
              free(mark);
              return(NULL);
            }
          }
          mark->nodes[i++]=node;
          node=DAWGAD(PTR(*node));
          goto ma1;
        }
      } while((*(node++)&M_END_OF_NODE)==0);
      free(mark);
      return(NULL);
ma1:  word++;
    }
  }
  return(mark);
}

void MDawg::GetMarkString(DWGMark *mark,unsigned char *buf)
{ int i;
  for(i=0;i<mark->len+mark->flag;i++) {
    *buf++=LTR(*(mark->nodes[i]));
  }
  *buf='\0';
}

MCDawg::MCDawg(const char *file) : MDawg(file)
{

}

MCDawg::MCDawg() : MDawg()
{

}

int MCDawg::GetNr(unsigned char *str)
{ int n; NODE w,*node;
  n=0; node=firstnode;
  while(*str) {
    while(LTR(w=*node)!=*str) {
      if(PTR(w)) n+=DAWG(PTR(w)-4);
      if((w&M_END_OF_WORD)!=0) n++;
      if((w&M_END_OF_NODE)!=0) return(0);
      node++;
    }
    if((w&M_END_OF_WORD)!=0) n++;
    if(*++str) {
      if(PTR(w)) node=DAWGAD(PTR(w)); else return(0);
    } else break;
  }
  if((w&M_END_OF_WORD)==0) n=0;
  return(n);
}

int MCDawg::GetNth(int n,unsigned char *str,int l)
{ NODE w,*node; int h,ll;
  if(n<=0) return(-1);
  node=firstnode; ll=1;
  while(0==0) {
    h=0; w=0;
    do {
      if((w&M_END_OF_NODE)==M_END_OF_NODE) return(-1);
      n-=h; h=0;
      if(((w=*node)&M_END_OF_WORD)!=0) {
        n--;
	    if(n==0) {
	      if(++ll==l) { *str='\0'; return(-2); }
	      *str++=LTR(w); *str='\0'; return(0);
	    }
      }
      if(PTR(w)!=0) h=DAWG(PTR(w)-4);
      node++;
    } while(n>h);
    *str++=LTR(w);
    if(++ll==l) { *str='\0'; return(-2); }
    node=DAWGAD(PTR(w));
  } 
}

int MCDawg::GetCINrs(unsigned char *str,int *res)
{ int n,nhp,rp; NODE w,*node,*nh[256];
  n=0; node=firstnode; nhp=0; rp=0;
  while(0==0) {
    while((LTR(w=*node)^(*str))&(~0x20)) {
      if(PTR(w)) n+=DAWG(PTR(w)-4);
      if((w&M_END_OF_WORD)!=0) n++;
mgcin1:
	  while((w&M_END_OF_NODE)!=0) {
		if(--nhp<0) {
          res[rp]=0;
          return(rp);
		}
	    str--;
        w=*(node=nh[nhp]);
	  }
      node++;
	}
    if((w&M_END_OF_WORD)!=0) n++;
    if(str[1]==0) break;
    if(PTR(w)==0) goto mgcin1;
	str++;
	nh[nhp++]=node;
	node=DAWGAD(PTR(w));
  }
  if(w&M_END_OF_WORD) {
	res[rp++]=n;
  }
  if(PTR(w)) n+=DAWG(PTR(w)-4);
  goto mgcin1;
}

int MCDawg::FindLEQ(unsigned char *word)
{ int nr;
  if(word==NULL) {
    return(0);
  } else { NODE *node; int i;
    node=firstnode; i=0; nr=0;
    while(*word) {
      do {
        if(LTR(*node)>*word) return(~nr);
        if(*node&M_END_OF_WORD) nr++;
        if(LTR(*node)==*word) {
          if(word[1]!='\0') {
            if(PTR(*node)==0) {
              return(~nr);
            } else {
              node=DAWGAD(PTR(*node));
              goto mca1;
            }
          } else {
            return(((*node&M_END_OF_WORD)==M_END_OF_WORD)?nr:~nr);
          }
        }
        if(PTR(*node)) nr+=DAWGAD(PTR(*node))[-1];
      } while((*(node++)&M_END_OF_NODE)==0);
      return(~nr);
mca1: word++;
    }
  }
  return(~nr);
}

MMDawg::MMDawg(int n)
{
  dawgp=(NODE **)malloc(n*sizeof(NODE *));
  firstnodep=(NODE **)malloc(n*sizeof(NODE *));
  nmax=n; nfilled=0; NrOfWords=0; NrOfBytes=0;
}

MMDawg::~MMDawg()
{
  for(nfilled--;nfilled>=0;nfilled--) {
    if(dawgp[nfilled]!=NULL) free(dawgp[nfilled]);
  }
}

void MMDawg::AddFile(const char *file)
{ FILE *fin; long info[4];
  if(nfilled==nmax) {
    fprintf(stderr,"Zu viele DAWG-Files\n");
    exit(1);
  }
  dawgp[nfilled]=NULL;
  if((fin=fopen(file,"rb"))!=NULL) {
    fread(info,sizeof(long),4,fin);
    if(((info[0]&0xff000000)==0xff000000)||(info[0]==DAWG_MAGIC)) {
      if((dawgp[nfilled]=(NODE *)malloc(info[1]))!=NULL) {
        fread(dawgp[nfilled],1,info[1],fin);
      } else {
        fprintf(stderr,"Nicht genuegend Speicher fuer DAWG\n");
        exit(1);
      }
    }
    fclose(fin);
    firstnodep[nfilled]=MDAWGAD(nfilled,info[2]);
    NrOfWords+=info[3];
    NrOfBytes+=info[1];
  } else {
    fprintf(stderr,"Kann DAWG-File (%s) nicht oeffnen!\n",file);
    exit(1);
  }
  nfilled++;
}

MMDawg::MMDawg(const char *file)
{ FILE *fin; long info[4]; NODE *(hdawgp[16]); int i,hfirstnode[16];
  nmax=0; nfilled=0; NrOfWords=0;
  if((fin=fopen(file,"rb"))!=NULL) {
    do {
      if(fread(info,sizeof(long),4,fin)==4) {
        if((info[0]&0xff000000)==0xff000000) {
          if((hdawgp[nfilled]=(NODE *)malloc(info[1]))!=NULL) {
            if(fread(hdawgp[nfilled],1,info[1],fin)!=info[1]) {
              for(i=nfilled;i>=0;i--) { free(hdawgp[i]); }
              fprintf(stderr,"Unerwartetes Dateiende beim Laden von %s/%d\n",file,nfilled+1);
              exit(1);
            }
          } else {
            for(i=nfilled-1;i>=0;i--) { free(hdawgp[i]); }
            fprintf(stderr,"Nicht genuegend Speicher fuer DAWG\n");
            exit(1);
          }
        } else {
          for(i=nfilled-1;i>=0;i--) { free(hdawgp[i]); }
          fprintf(stderr,"Korrumpiertes DAWG-File (%s/%d)\n",file,nfilled+1);
          exit(1);
        }
      } else break;
      hfirstnode[nfilled]=info[2];
      NrOfWords+=info[3];
      NrOfBytes+=info[1];
      nfilled++;
    } while(((info[0]>>8)&0xff)!=/*(info[0]&0xff)*/-1);
    fclose(fin);
    nmax=nfilled;
    dawgp=(NODE **)malloc(nfilled*sizeof(NODE *));
    firstnodep=(NODE **)malloc(nfilled*sizeof(NODE *));
    for(i=0;i<nfilled;i++) {
      dawgp[i]=hdawgp[i];
      firstnodep[i]=MDAWGAD(i,hfirstnode[i]);
    }
  } else {
    fprintf(stderr,"Kann DAWG-File (%s) nicht oeffnen!\n",file);
    exit(1);
  }
}

void MMDawg::StartWord()
{
  curnode=firstnodep[0];
  curdwgnr=0;
}

int MMDawg::Letter(unsigned char c)
{ int fl; NODE *node;
  fl=(node=curnode)==firstnodep[0];
mlet1:
  while(LTR(*node)<c) {
    if(*node&M_END_OF_NODE) {
      if(fl) {
	    curdwgnr=fl;
        node=firstnodep[fl++];
	    if(fl==nfilled) return(0);
	    goto mlet1;
      }
      return(0);
    }
    node++;
  }
  if(LTR(*node)==c) {
    curnode=MDAWGAD(curdwgnr,PTR(*node));
    return((*node&3)|8|((PTR(*node)!=0)<<2));
  }
  return(0);
}

int MMDawg::GetState()
{ int *p;
  p=(int *)malloc(sizeof(int)+sizeof(NODE *));
  p[0]=curdwgnr;
  p[1]=(int)curnode;
  return((int)p);
}

void MMDawg::SetState(int state)
{
  curdwgnr=((int *)state)[0];
  curnode=(NODE *)(((int *)state)[1]);
}

unsigned char MMDawg::GetNthLetter(int n)
{ int i;
  i=0;
  while(i<n) {
    if(curnode[i]&M_END_OF_NODE) return('\0');
    i++;
  }
  return(LTR(curnode[i]));
}


int MMDawg::Find(unsigned char *str0)
{ NODE *node,*dawg; int i; unsigned char *str;
  for(i=0;i<nfilled;i++) {
    node=firstnodep[i]; dawg=dawgp[i]; str=str0;
    while(*str) {
      do {
        if(LTR(*node)==*str) {
          if(str[1]) {
            if(PTR(*node)==0) return(0);
            goto m3;
          } else {
            return((*node&M_END_OF_WORD)==M_END_OF_WORD);
          }
        }
      } while((*(node++)&M_END_OF_NODE)==0);
      goto m4;
m3:   node=DAWGAD(PTR(*node));
      str++;
    }
    return(1);
m4: ;
  }
  return(0);
}

int MMDawg::Longest(unsigned char *str0)
{ NODE *node,*dawg; int l,lm,i; unsigned char *str;
  l=lm=0;
  for(i=0;i<nfilled;i++) {
    node=firstnodep[i]; dawg=dawgp[i]; str=str0;
    while(*str) {
      do {
        if(LTR(*node)==*str) {
	      l++;
	      if((*node&M_END_OF_WORD)==M_END_OF_WORD) {
            if(l>lm) lm=l;
	      }
	      if(str[1]=='\0') return(lm);
	      if(PTR(*node)!=0) {
            node=DAWGAD(PTR(*node));
  	        goto m5;
	      } else break;
        }
      } while((*(node++)&M_END_OF_NODE)==0);
      goto m6;
m5:   str++;
    }
    return(lm);
m6: ;
  }
  return(lm);
}

MMCDawg::MMCDawg(int n) : MMDawg(n)
{

}

MMCDawg::MMCDawg(const char *name) : MMDawg(name)
{

}

int MMCDawg::GetNr(unsigned char *str0)
{ int n,i; NODE w,*node,*dawg; unsigned char *str;
  n=0;
  for(i=0;i<nfilled;i++) {
    node=firstnodep[i]; dawg=dawgp[i]; str=str0;
    while(*str) {
      while(LTR(w=*node)!=*str) {
        if(PTR(w)) n+=DAWG(PTR(w)-4);
        if((w&M_END_OF_WORD)!=0) n++;
        if((w&M_END_OF_NODE)!=0) goto m7;
        node++;
      }
      if((w&M_END_OF_WORD)!=0) n++;
      if(*++str) {
        if(PTR(w)) node=DAWGAD(PTR(w)); else return(0);
      } else break;
    }
    if((w&M_END_OF_WORD)==0) n=0;
    return(n);
m7: ;
  }
  return(0);
}

int MMCDawg::GetNth(int n,unsigned char *str0,int l)
{ NODE w,*node,*dawg; int h,ll,i; unsigned char *str;
  if(n<=0) return(-1);
  for(i=0;i<nfilled;i++) {
    node=firstnodep[i]; dawg=dawgp[i]; str=str0; ll=1;
    while(0==0) {
      h=0; w=0;
      do {
        n-=h; h=0;
        if((w&M_END_OF_NODE)==M_END_OF_NODE) goto m8;
        if(((w=*node)&M_END_OF_WORD)!=0) {
          n--;
	      if(n==0) {
	        if(++ll==l) { *str='\0'; return(-2); }
	        *str++=LTR(w); *str='\0'; return(0);
	      }
        }
        if(PTR(w)!=0) h=DAWG(PTR(w)-4);
        node++;
      } while(n>h);
      *str++=LTR(w);
      if(++ll==l) { *str='\0'; return(-2); }
      node=DAWGAD(PTR(w));
    }
m8: ;
  }
  return(-1);
}

MRDawg::MRDawg(const char *file) : MDawg(file) {}

MRDawg::~MRDawg()
{
  if(dawg) free(dawg);
}

int MRDawg::Find(unsigned char *str)
{ NODE *node,w;
  node=firstnode;
  while(*str) {
    if(LTR(w=node[*str])!=*str) return(0);
    str++;
    if(PTR(w)==0) {
      if(*str) return(0);
      break;
    }
    node=DAWGAD(PTR(w));
  }
  return(w&M_END_OF_WORD); 
}

