/* THIS IS UNDER DEVELOPMENT: the source file is most likely being edited
   even as you look at it.  For the problem which this solves, see post:
   http://groups.yahoo.com/group/wordgame-programmers/message/495

   This is a proof-of-concept prototype; there's still a bit of
   tidying up to be done before incorporating it into a program,
   but that's the easy part; the actual algorithm work is done.
 */
#include <stdio.h>
#include <errno.h>
#include "splib.h"

#define DEFAULT_MAXLEN 80
#define NUM_RECOGNISERS (DEFAULT_MAXLEN+1)

static int    debug = FALSE;
static int    use_stdin = FALSE;

/* for this quick hack its more convenient to use lots of globals than
   it is to pass everything around cleanly as parameters */

static FILE  *infile;
static NODE  *dawg;
static INDEX  nedges;
static int    maxlen;

/* replace these with allocations off the heap */
static NODE *edge[NUM_RECOGNISERS];
static int firsttime[NUM_RECOGNISERS]; /* all init to TRUE */
static char word[NUM_RECOGNISERS][DEFAULT_MAXLEN];

/* the old procedure to check a word in the dawg has been broken out
   so that it is fed one char at a time.  It keeps its global state
   in the three arrays above.  The parallel search consists of multiple
   invocations of this procedure, one starting at each position in the
   target text input stream.  The dawg contains the list of words to
   be recognised, pre-calculated.
 */

static int process_one_char(char c, int id)
{
  int lenp, this_char; /* Not preserved between calls */

  if (firsttime[id]) {
    edge[id] = dawg+ROOT_NODE;
    *word[id] = '\0';
  }
  firsttime[id] = FALSE;

  for (;;) {  /* locate this letter at current point in trie */
    if (debug) {
      fprintf(stderr, "Checking for %s+%c against %s+%c\n",
        word, c, word, this_char);
    }
    this_char = ((*edge[id] >> V_LETTER) & M_LETTER);
    if (c == this_char) {
      if ((*edge[id] & M_END_OF_WORD) != 0) {
        /* OUTPUT THIS WORD */
        fprintf(stderr, "Recognised: %s%c\n", word[id], c);
        /* need to check if dest == dawg (end of trie) */
      }
      /* prepare for next iteration. */
      /* "edge" is the preserved global state */
      edge[id] = dawg+(*edge[id] & M_NODE_POINTER);
      lenp = strlen(word[id]);
      word[id][lenp] = c; word[id][lenp+1] = '\0';
      if (edge[id] == dawg){ /* end of dawg? */
        /* return code is not whether a word was found,
           but whether we can progress */
        firsttime[id] = TRUE; /* let the next one reset the variables */
        return(FALSE);
      } else {
        return(TRUE);
      }
    }
    if (((*edge[id]++) & M_END_OF_NODE) != 0) break; /* checked all branches */
  }
  firsttime[id] = TRUE;
  return(FALSE); /* No more choices are possible using this recogniser */
}

static void filter(FILE *in)
{
  int i, c, chars_processed, this_slot;

  if (debug) {
  fprintf(stderr, "This test is set up to detect 'scat' and 'cat' and 'catch'\n");

  c = process_one_char('s',0); fprintf(stderr, "s0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('c',0); fprintf(stderr, "c0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('c',1); fprintf(stderr, "c1: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('a',0); fprintf(stderr, "a0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('a',1); fprintf(stderr, "a1: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('t',0); fprintf(stderr, "t0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('t',1); fprintf(stderr, "t1: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('c',0); fprintf(stderr, "c0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('c',1); fprintf(stderr, "c1: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('h',0); fprintf(stderr, "h0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('h',1); fprintf(stderr, "h1: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('!',0); fprintf(stderr, "!0: %s\n", (c ? "TRUE":"FALSE"));
  c = process_one_char('!',1); fprintf(stderr, "!1: %s\n", (c ? "TRUE":"FALSE"));

  exit(0);
  }

  chars_processed = 0;
  for (;;) {
    c = fgetc(in);
    if (c == EOF) break;
    putchar(c);
    chars_processed += 1;
    /* each slot starts off another recogniser */
    this_slot = chars_processed % NUM_RECOGNISERS;
    firsttime[this_slot] = TRUE;
    (void)process_one_char(c, this_slot); /* kick off a new recogniser for this position */
    /* and pass the character to any still active recognisers */
    for (i = 0; i < NUM_RECOGNISERS; i++) {
      if ((i != this_slot) && !(firsttime[i])) (void)process_one_char(c, i);
    }
  }
}

int main(int argc, char **argv)
{
  int i;

  /* useful little debugging tool in
        http://www.gtoal.com/devel/backtrace/auto_gdb.c */
  /* restart_under_gdb(argc, argv); */

  if (argc == 4 && strcmp(argv[1], "-d") == 0) {
    debug = TRUE; argc -= 1; argv += 1;
  }

  if (argc != 3) {
    fprintf(stderr, "syntax: multiscan dawgfile textfile\n");
    exit(1);
  }

  if (!dawg_init((argc == 1 ? "" : argv[1]), &dawg, &nedges)) {
    exit(EXIT_FAILURE);
  }

  /* ideally we should at this point walk the dawg and find the longest string */
  maxlen = DEFAULT_MAXLEN;
  for (i = 0; i < NUM_RECOGNISERS; i++) firsttime[i] = TRUE;

  if ((*argv[2] == '-') && (*(argv[2]+1) == '\0')) {
    use_stdin = TRUE;
  } else {
    infile = fopen(argv[2], "r");
    if (infile == NULL) {
      fprintf(stderr, "multiscan: %s\n", strerror(errno));
      exit(2);
    }
  }

  if (debug) {
    if (use_stdin) {
      fprintf(stderr, "Searching stdin for the following strings:\n", argv[2]);
    } else {
      fprintf(stderr, "Searching for the following strings in %s:\n", argv[2]);
    }
    /* not advisable if using a large word list!
    dawg_print(dawg, (INDEX) ROOT_NODE);
     */
  }

  if (use_stdin) filter(stdin); else filter(infile);

  exit(0);
  return(0);
}