/*

    File:    correct.c
    Author:  Graham Toal
    Purpose: Correct a word using Soudex and similcmp on DAWG.
    Functions exported:  dawg_correct
    Internal functions:  reccmp, walk_rest_dawg, walk_dawg

Description:
   Reduce word to rough phonetic representation.
   Reduce all words in dawg to same form.  Compare.  This
   results in a large list of plausible (HA!) candidates.

   Then apply 'similcmp' which returns a distance metric representing
   closeness of two words.  Sort by this metric, and return N best
   matches.  If one stands out above others, return it alone.

   I'm working on a *much better* algorithm, but this will do
   for prototyping the interface.

   Final version of this will have callbacks to handle corrected
   words as they are produced.

*/

#include "soundex.c"
#include "similcmp.c"

typedef struct {
  char *possible;
  int score;
} rec;


/* Numbers of parameters exploded during process of removing global state :( */

static void
#ifdef PROTOTYPES
walk_rest_dawg(NODE PCCRAP *dawg, INDEX node, int len,
  char *word, char *original, char *target,
  rec *ans, int *nextfreep)
#else
walk_rest_dawg(dawg, node, len, word, original, target, ans, nextfreep)
  NODE PCCRAP *dawg;
  INDEX node;
  int len;
  char *word;
  char *original;
  char *target;
  rec *ans;
  int *nextfreep;
#endif
#define nextfree (*nextfreep)
{
  NODE PCCRAP *edge;
  long c;

    for (edge = (NODE PCCRAP *)&dawg[node]; ;edge++) {
    c = *edge;            /* Avoid MS C compiler bug :-( */
    c = c >> V_LETTER;
    c = c & M_LETTER;
    word[len] = (char)c;
    if ((*edge & M_END_OF_WORD) != 0) {
      word[len+1] = 0;
      if (strcmp(soundex(word), target) == 0) {
        ans[nextfree].possible = (char *)Malloc(len+1, 1);
        if (ans[nextfree].possible == NULL) {
          fprintf(stderr,"I\'ve no idea - it could be anything...\n");
          exit(0);
        }
        strcpy(ans[nextfree].possible, word);
        ans[nextfree].score = simil(word, original);
        if (nextfree < 511) nextfree++;
      }
    }
    c = *edge & M_NODE_POINTER;
    if (c != 0) {
      walk_rest_dawg(dawg, c, len+1, word, original, target, ans, nextfreep);
    }
    if ((*edge & M_END_OF_NODE) != 0) break; /* End of node */
  }
#undef nextfree
}


static void
#ifdef PROTOTYPES
walk_dawg(
  NODE PCCRAP *dawg, INDEX node, int len,
  char *original, char *target,
  char targets, char *word,
  rec *ans, int *nextfreep)
#else
walk_dawg(dawg, node, len, original, target, targets, word, ans, nextfreep)
  NODE PCCRAP *dawg;
  INDEX node;
  int len;
  char *original;
  char *target;
  char targets;
  char *word;
  rec *ans;
  int *nextfreep;
#endif
#define nextfree (*nextfreep)
{
  NODE PCCRAP *edge;
  long c;

  edge = (NODE PCCRAP *)&dawg[node];
  /* Only search trie starting with initial letter */
  for (;;) {
    c = *edge;            /* Avoid MS C compiler bug :-( */
    c = c >> V_LETTER;
    c = c & M_LETTER;
    word[len] = (char)c;
    if (c == targets) {
      c = *edge & M_NODE_POINTER;
      walk_rest_dawg(dawg, c, len+1, word, original, target, ans, nextfreep);
      return;
    }
    edge++;
  }
#undef nextfree
}

static int
#ifdef PROTOTYPES
reccmp(const void *a, const void *b)
#else
reccmp(a, b)
  void *a;
  void *b;
#endif
{
  return(((rec *)b)->score - ((rec *)a)->score);
}


int
#ifdef PROTOYPES
dawg_correct(NODE PCCRAP *dawg, char *original)
#else
dawg_correct(dawg, original)
NODE PCCRAP *dawg;
char *original;
#endif
{
char word[81];
char target[256];
static rec ans[256];
  /* static because brain-dead MSC can't hack a big stack :-( */
int targets;
int nextfree = 0;
int i, prev=0;

  targets = original[0];
  strcpy(target, soundex(original));
  walk_dawg(
    dawg, (INDEX)ROOT_NODE, 0, original, target, targets, word, ans, &nextfree);
  if (nextfree == 0) return(FALSE);
  qsort(ans, (size_t)nextfree, sizeof(rec), /*&*/reccmp);
  for (i = 0; i < nextfree; i++) {
    if (((prev*100) / (ans[i].score)) > 120) break;
    fprintf(stdout, "%s (%d)\n", ans[i].possible, ans[i].score);
    if (ans[i].score >= 100) break;
    if (i == 5) break;
    prev = ans[i].score;
  }
  return(TRUE);
}

