#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>

#ifndef FALSE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif

#define debug_input 1
#define debug_sizes 2
#define debug_strcmp 4
#define debug_lookup 8
#define debug_letterbanks 16
#define debug_letterswaps 32
static int debug = 0 /*|debug_letterswaps|debug_letterbanks|debug_lookup|debug_strcmp|debug_sizes|debug_input*/;

char **string = NULL;
int   *lb = NULL;     // points to a cycle of letterbank equivalents
int   *ls = NULL;     // points to a list of letterswap targets

#define LB_MAX 2000000
#define LS_MAX 1000000

static int lbpool[LB_MAX];
static int lspool[LS_MAX];

int next_lb = 0; /* index into lbpool */
int next_ls = 0; /* index into lspool */

int next_word = 0;
int MAX_WORDS = 0;

int lookup(char *word) {
  // Return index of the word.  Binary chop for now.
  int low = 1, high = MAX_WORDS; // inclusive at each end
  while (low <= high) {
    int middle = (low+high)/2;
    int comparison = strcmp(string[middle], word);
    if (debug&debug_strcmp) fprintf(stderr, "[%s:%d %s:%d %s:%d] %d\n",
                                    string[low], low,
				    string[middle], middle,
                                    string[high], high, comparison);
    if (comparison < 0) {
      low = middle+1;
    } else if (comparison > 0) {
      high = middle-1;
    } else return middle;
  }
  return 0; // not found
}

#ifndef MAX_LINE
#define MAX_LINE (8*1024)
#endif

void first_pass(FILE *in) {
  char line[MAX_LINE+1];
  int lp;
  for (;;) { // read all words
    lp = 0;
    for (;;) { // read letters within a word
      int c = fgetc(in);
      if (c == EOF) {
        if (lp == 0) {
          if (debug&debug_sizes) fprintf(stderr, "#D MAX_WORDS = %d\n", MAX_WORDS);
          return;
	}
        line[lp] = '\0';
        fprintf(stderr, "Unexpected end of file at \"%s\"\n", line);
        exit(EXIT_FAILURE);
      } else if (c == '\n') {
        break;
      } else if (c == '\r') { // in case of M$
        continue;
      } else {
        line[lp++] = c; if (lp == MAX_LINE) {
          fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
          exit(EXIT_FAILURE);
        }
      }
    }
    line[lp] = '\0';
    MAX_WORDS += 1;
    if (debug&debug_input) fprintf(stderr, "#D P0: %s\n", line);
  }
  /* NOT REACHED */
}

void second_pass(FILE *in) {
  char line[MAX_LINE+1];
  int lp;
  for (;;) { // read all words
    lp = 0;
    for (;;) { // read letters within a word
      int c = fgetc(in);
      if (c == EOF) {
        if (lp == 0) {
          if (debug&debug_sizes) fprintf(stderr, "#D MAX_WORDS = %d, next_word = %d\n", MAX_WORDS, next_word);
          if (next_word != MAX_WORDS+1) {
            fprintf(stderr, "Second pass inconsistency - MAX_WORDS = %d, next_word = %d\n", MAX_WORDS, next_word);
            exit(EXIT_FAILURE);
	  }
          return;
	}
        exit(EXIT_FAILURE);
      } else if (c == '\n') {
        break;
      } else if (c == '\r') { // in case of M$
        continue;
      } else {
        line[lp++] = c; if (lp == MAX_LINE) exit(EXIT_FAILURE);
      }
    }
    line[lp] = '\0';
    string[next_word] = strdup(line); // Matbe change to a stringpool?
    lb[next_word] = 0; // none generated yet
    ls[next_word] = 0; // ditto
    if (debug&debug_input) fprintf(stderr, "#D P1: %s\n", line);
    next_word += 1; // So next_word will equal 1 for the first word, "aa"
  }
  /* NOT REACHED */
}

void add_letterbank_link(char *from_word, char *to_word) {
  if (strcmp(from_word, to_word) == 0) return; // Don't link to self
  if (debug&debug_letterbanks) fprintf(stderr, "Add link \"%s\" (word[%d].lb (%d) ) : \"%s\" (%d)\n",
          from_word, lookup(from_word), next_lb,
	  to_word, lookup(to_word));
  // And do it...
  lbpool[next_lb++] = lookup(to_word); if (next_lb >= LB_MAX) {
    fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
    exit(EXIT_FAILURE);
  }

}

void add_letterbank(char *from_word, char *group) {
  /* Loop over each word in group, add a letterbank link from word -> each */
  char *word, *end;
  char copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP.

  lb[lookup(from_word)] = next_lb;
 
  if (debug&debug_letterbanks) fprintf(stderr, "Add \"%s\": \"%s\"\n", from_word, group);
  strcpy(copy, group);
  word = copy;
  for (;;) {
    end = strchr(word, ' ');
    if (end != NULL) *end++ = '\0';
    add_letterbank_link(from_word, word);
    if ((word = end) == NULL) { // last word
      // end of this group
      lbpool[next_lb++] = 0; if (next_lb >= LB_MAX) {
        fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
        exit(EXIT_FAILURE);
      }
      return;
    }
  }
}

void add_letterbank_group(char *group) {
  char *word, *end;
  char copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP.
  strcpy(copy, group);
  word = copy;
  for (;;) {
    end = strchr(word, ' ');
    if (end != NULL) *end++ = '\0';
    add_letterbank(word, group);
    word = end;
    // last word
    if (end == NULL) return;
  }
}

void create_letterbanks(FILE *in) {
  char line[MAX_LINE+1];
  int lp;
  for (;;) { // read all letterbank lines
    lp = 0;
    for (;;) { // read letters within a letterbank line
      int c = fgetc(in);
      if (c == EOF) {
        if (lp == 0) return; // EOF should be at the start of a line.
        line[lp] = '\0';
        fprintf(stderr, "Unexpected end of file at \"%s\"\n", line);
        exit(EXIT_FAILURE);
      } else if (c == '\n') {
        break;
      } else if (c == '\r') { // in case of M$
        continue;
      } else {
        line[lp++] = c; if (lp == MAX_LINE) {
          fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
          exit(EXIT_FAILURE);
        }
      }
    }
    line[lp] = '\0';
    if (debug&debug_letterbanks) fprintf(stderr, "#D LB: %s\n", line);
    add_letterbank_group(line);
  }
}

void insert_letterswap(int from_word, int to_word) {
  static int last_from_word = 0;

  if (last_from_word != from_word) {
    lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
      fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
      exit(EXIT_FAILURE);
    }
    ls[from_word] = next_ls;
    last_from_word = from_word;
  }
  lspool[next_ls] = to_word;
  lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
    fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
    exit(EXIT_FAILURE);
  }
}

void add_one_letterswap(char *from_word, char *to_word) {
  int old_word, new_word;
  new_word = lookup(to_word);
  if (new_word != 0) {
    old_word = lookup(from_word);
    if (debug&debug_letterswaps) fprintf(stderr, "SWAP: %s (%d) -> %s (%d)\n",
                                                 from_word, old_word,
                                                 to_word, new_word);
    insert_letterswap(old_word, new_word);
  }
}

void add_letterswaps(char *from_word)
{
  int i, letpos;
  char saved_let;
  char to_word[MAX_LINE+1];

  strcpy(to_word, from_word);

  for (letpos = 0; letpos < strlen(from_word); letpos++) {
    // try all letters at position letpos (except saved_let)
    saved_let = from_word[letpos];
    // try all letters of the same case at this position
    // case hack reduces the need to know the exact details
    // of the wordlist file.
    if (isalpha(saved_let)) {
      if (isupper(saved_let)) {
        for (i = 32; i < 256; i++) {
          if (isalpha(i) && isupper(i) && (i != saved_let)) {
            to_word[letpos] = (char)i;
            add_one_letterswap(from_word, to_word);
            to_word[letpos] = saved_let;    
	  }
        }
      } else if (islower(saved_let)) {
        for (i = 32; i < 256; i++) {
          if (isalpha(i) && islower(i) && (i != saved_let)) {
            to_word[letpos] = (char)i;
            add_one_letterswap(from_word, to_word);
            to_word[letpos] = saved_let;    
	  }
        }
      }
    }
  }
}

void create_letterswaps(void) {
  int wordno;
  for (wordno = 1; wordno <= MAX_WORDS; wordno++) {
    add_letterswaps(string[wordno]);
  }
}

int main(int argc, char **argv) {
  FILE *dict, *bank;
  char *wordlist, *banklist;

  if (argc == 1) {
    wordlist = "wordlist.txt";
    banklist = "letterbanks.txt";
  } else if (argc == 2) {
    wordlist = argv[1];
    banklist = "letterbanks.txt";
  } else if (argc == 3) {
    wordlist = argv[1];
    banklist = argv[2];
  } else {
    fprintf(stderr, "syntax: gentables [wordlist.txt [letterbanks.txt]]\n");
    exit(EXIT_FAILURE);
  }

  dict = fopen(wordlist, "r");
  if (dict == NULL) {
    fprintf(stderr, "Cannot convert word list \"%s\" - %s\n\n", wordlist, strerror(errno));
    exit(EXIT_FAILURE);
  }

  bank = fopen(banklist, "r");
  if (bank == NULL) {
    fprintf(stderr, "Cannot convert letterbank \"%s\" - %s\n\n", banklist, strerror(errno));
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "Converting word list \"%s\"\n", wordlist);
  fprintf(stderr, "     and letter bank \"%s\"\n\n", banklist);

  first_pass(dict); // determine MAX_WORDS
  if (MAX_WORDS == 0) {
    fprintf(stderr, "No valid words found in %s\n\n", wordlist);
    exit(EXIT_FAILURE);
  }

  fseek(dict, SEEK_SET, 0L); // Re-read word list

  next_word = 1;
  string = malloc((MAX_WORDS+1) * sizeof(char *)); string[0] = strdup("");
  /* lb and ls point to a flat array where the links are stored.  lb and ls are
     indexed by lookup("word"), ie another layer of indirection is needed
     to get to all the links for a particular word */
  lb = malloc((MAX_WORDS+1) * sizeof(int)); lb[0] = 0; lbpool[0] = 0; next_lb = 1;
  ls = malloc((MAX_WORDS+1) * sizeof(int)); ls[0] = 0; lspool[0] = 0; next_ls = 0;

  fprintf(stdout, "\n");
  fprintf(stdout, "// Array of all words.  word[1].string returns \"aa\" etc.\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "typedef struct transformation {\n");
  fprintf(stdout, "  char *string;\n");
  fprintf(stdout, "  int lb; // points to a list of letterbank equivalents\n");
  fprintf(stdout, "  int ls; // points to a list of letterswap targets\n");
  fprintf(stdout, "} transformation;\n");
  fprintf(stdout, "\n");

  second_pass(dict);

  create_letterbanks(bank);

  create_letterswaps();

  fprintf(stdout, "/* Generated tables: */\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "const int lb[] = { /* Values are indexes of equivalent words, 0 terminates */\n");
  {
    int idx = 0;
    for (;;) {
      if ((idx != 0) && (lbpool[idx] == 0)) break;
      fprintf(stdout, "/* %6d: */   ", idx);
      for (;;) {
        int to_word = lbpool[idx++];
        fprintf(stdout, " %d,", to_word);
        if (to_word == 0) break;
      }
      fprintf(stdout, "\n");
    }
  }
  fprintf(stdout, "};\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "const int ls[] = {\n");
  {
    int idx = 0;
    for (;;) {
      fprintf(stdout, "/* %6d: */   ", idx);
      for (;;) {
        int to_word = lspool[idx++];
        fprintf(stdout, " %d,", to_word);
        if (to_word == 0) break;
      }
      fprintf(stdout, "\n");
      if (idx >= next_ls) break;
    }
  }
  fprintf(stdout, "};\n");
  fprintf(stdout, "\n");

  fprintf(stdout, "#define MAX_WORDS %d\n", MAX_WORDS);
  fprintf(stdout, "\n");
  fprintf(stdout, "transformation word[MAX_WORDS+1] = {\n");
  for (next_word = 0; next_word <= MAX_WORDS; next_word++) {
    fprintf(stdout, "/* %6d: */   { \"%s\", %d, %d },\n",
                    next_word, string[next_word], lb[next_word], ls[next_word]);
  }
  fprintf(stdout, "};\n");

  if (debug&debug_lookup) {
    fprintf(stderr, "Lookup a -> %d\n", lookup("a"));
    fprintf(stderr, "Lookup aa -> %d\n", lookup("aa"));
    fprintf(stderr, "Lookup aargh -> %d\n", lookup("aargh"));
    fprintf(stderr, "Lookup pitstop -> %d\n", lookup("pitstop"));
    fprintf(stderr, "Lookup pitsaw -> %d\n", lookup("pitsaw"));
    fprintf(stderr, "Lookup zyzzyvas -> %d\n", lookup("zyzzyvas"));
    fprintf(stderr, "Lookup zzzzzz -> %d\n", lookup("zzzzzz"));
  }

  exit(EXIT_SUCCESS);
  return (0);
}