#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>

#ifndef FALSE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif

char **string = NULL;
int   *lb = NULL;     // each entry points to a list of letterbank equivalents
int   *ls = NULL;     // each entry points to a list of letterswap targets
char  *stringpool = NULL; // consecutive \0-terminated strings

#define LB_MAX 2000000
#define LS_MAX 1000000

static int lbpool[LB_MAX];
static int lspool[LS_MAX];

int next_lb = 0; /* index into lbpool */
int next_ls = 0; /* index into lspool */

int next_word = 0;
int MAX_WORDS = 0;
int MAX_STRINGPOOL = 1;
int next_stringpool = 0;

int lookup(char *word) {
  // Return index of the word.  Binary chop for now.
  int low = 1, high = MAX_WORDS; // inclusive at each end
  while (low <= high) {
    int middle = (low+high)/2;
    int comparison = strcmp(string[middle], word);
    if (comparison < 0) {
      low = middle+1;
    } else if (comparison > 0) {
      high = middle-1;
    } else return middle;
  }
  return 0; // not found
}

#ifndef MAX_LINE
#define MAX_LINE (8*1024)
#endif

// determine number of words in dictionary and size of string pool
// updates MAX_WORDS and MAX_STRINGPOOL
void first_pass(FILE *in) {
  char line[MAX_LINE+1];
  int lp;
  int c;

  for (;;) { // read all lines
    lp = 0;
    for (;;) { // read letters within a word
      if ((c = fgetc(in)) == EOF) {
        if (lp == 0) return;
        line[lp] = '\0';
        fprintf(stderr, "Unexpected end of file at \"%s\"\n", line);
        exit(EXIT_FAILURE);
      }
      else if (c == '\n') break;
      else if (c == '\r') continue;
      else {
        line[lp++] = c;
        if (lp == MAX_LINE) {
          fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
          exit(EXIT_FAILURE);
        }
	MAX_STRINGPOOL += 1;
      }
    }
    line[lp] = '\0';
    MAX_STRINGPOOL += 1;
    MAX_WORDS += 1;
  }
}

void second_pass(FILE *in) {
  char line[MAX_LINE+1];
  int lp;
  int c;

  for (;;) { // read all words
    lp = 0;
    string[next_word] = &stringpool[next_stringpool];
    for (;;) { // read letters within a word
      if ((c = fgetc(in)) == EOF) {
        if (lp == 0) return;
        exit(EXIT_FAILURE);
      }
      else if (c == '\n') break;
      else if (c == '\r') continue;
      else {
        line[lp++] = c;
        if (lp == MAX_LINE) exit(EXIT_FAILURE);
        stringpool[next_stringpool++] = c;
      }
    }
    line[lp] = '\0';
    lb[next_word] = 0; // none generated yet
    ls[next_word] = 0; // ditto
    stringpool[next_stringpool++] = '\0';
    next_word += 1; // So next_word will equal 1 for the first word, "aa"
  }
  /* NOT REACHED */
}

void create_letterbanks(FILE *in) {
  char group[MAX_LINE+1];
  char from_group_copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP.
  char *from_word, *from_word_end;
  char copy[MAX_LINE+1];
  char *word, *end;
  int lp;

  for (;;) { // read all letterbank lines
    lp = 0;
    for (;;) { // read letters within a letterbank line
      int c = fgetc(in);
      if (c == EOF) {
        if (lp == 0) return; // EOF should be at the start of a line.
        group[lp] = '\0';
        fprintf(stderr, "Unexpected end of file at \"%s\"\n", group);
        exit(EXIT_FAILURE);
      } else if (c == '\n') {
        break;
      } else if (c == '\r') { // in case of M$
        continue;
      } else {
        group[lp++] = c; if (lp == MAX_LINE) {
          fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
          exit(EXIT_FAILURE);
        }
      }
    }
    group[lp] = '\0';
    strcpy(from_group_copy, group);
    from_word = from_group_copy;
    for (;;) {
      from_word_end = strchr(from_word, ' ');
      if (from_word_end != NULL) *from_word_end++ = '\0';
      lb[lookup(from_word)] = next_lb;
      strcpy(copy, group);
      word = copy;
      for (;;) {
        end = strchr(word, ' ');
        if (end != NULL) *end++ = '\0';

        if (strcmp(from_word, word) != 0) { // Don't link to self
          lbpool[next_lb++] = lookup(word); if (next_lb >= LB_MAX) {
            fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
            exit(EXIT_FAILURE);
          }
        }

        if ((word = end) == NULL) { // last word
          // end of this group
          lbpool[next_lb++] = 0; if (next_lb >= LB_MAX) {
            fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
            exit(EXIT_FAILURE);
          }
          break;
        }
      }

      from_word = from_word_end;
      // last word
      if (from_word_end == NULL) break;
    }

  }
}

void add_one_letterswap(char *from_word, char *to_word) {
  static int last_old_word = 0;
  int old_word, new_word;

  new_word = lookup(to_word);
  if (new_word != 0) {
    old_word = lookup(from_word);
    if (last_old_word != old_word) {
      lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
        fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
        exit(EXIT_FAILURE);
      }
      ls[old_word] = next_ls;
      last_old_word = old_word;
    }
    lspool[next_ls] = new_word;
    lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
      fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
      exit(EXIT_FAILURE);
    }
  }
}

void create_letterswaps(void) {
  char *from_word;
  int wordno;
  int i, letpos;
  char saved_let;
  char to_word[MAX_LINE+1];

  for (wordno = 1; wordno <= MAX_WORDS; wordno++) {
    from_word = string[wordno];
    strcpy(to_word, from_word);
    for (letpos = 0; letpos < strlen(from_word); letpos++) {
      // try all letters at position letpos (except saved_let)
      saved_let = from_word[letpos];
      // try all letters of the same case at this position
      // case hack reduces the need to know the exact details
      // of the wordlist file.
      if (isalpha(saved_let)) {
        if (isupper(saved_let)) {
          for (i = 32; i < 256; i++) {
            if (isalpha(i) && isupper(i) && (i != saved_let)) {
              to_word[letpos] = (char)i;
              add_one_letterswap(from_word, to_word);
              to_word[letpos] = saved_let;    
	    }
          }
        } else if (islower(saved_let)) {
          for (i = 32; i < 256; i++) {
            if (isalpha(i) && islower(i) && (i != saved_let)) {
              to_word[letpos] = (char)i;
              add_one_letterswap(from_word, to_word);
              to_word[letpos] = saved_let;    
	    }
          }
        }
      }
    }
  }
}

int main(int argc, char **argv) {
  FILE *dict, *bank;
  char *wordlist, *banklist;

  if (argc == 1) {
    wordlist = "wordlist.txt";
    banklist = "letterbanks.txt";
  } else if (argc == 2) {
    wordlist = argv[1];
    banklist = "letterbanks.txt";
  } else if (argc == 3) {
    wordlist = argv[1];
    banklist = argv[2];
  } else {
    fprintf(stderr, "syntax: gentables [wordlist.txt [letterbanks.txt]]\n");
    exit(EXIT_FAILURE);
  }

  dict = fopen(wordlist, "r");
  if (dict == NULL) {
    fprintf(stderr, "Cannot convert word list \"%s\" - %s\n\n", wordlist, strerror(errno));
    exit(EXIT_FAILURE);
  }

  bank = fopen(banklist, "r");
  if (bank == NULL) {
    fprintf(stderr, "Cannot convert letterbank \"%s\" - %s\n\n", banklist, strerror(errno));
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "Converting word list \"%s\"\n", wordlist);
  fprintf(stderr, "     and letter bank \"%s\"\n\n", banklist);

  first_pass(dict); // determine MAX_WORDS and MAX_STRINGPOOL

  if (MAX_WORDS == 0) {
    fprintf(stderr, "No valid words found in %s\n\n", wordlist);
    exit(EXIT_FAILURE);
  }

  fseek(dict, SEEK_SET, 0L); // Re-read word list

  fprintf(stdout, "\n");
  fprintf(stdout, "// Array of all words.  word[1].string returns \"aa\" etc.\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "typedef struct transformation {\n");
  fprintf(stdout, "  char *string;\n");
  fprintf(stdout, "  int lb; // points to a list of letterbank equivalents\n");
  fprintf(stdout, "  int ls; // points to a list of letterswap targets\n");
  fprintf(stdout, "} transformation;\n");
  fprintf(stdout, "\n");

  stringpool = malloc((MAX_STRINGPOOL+1) * sizeof(char)); stringpool[0] = '\0'; next_stringpool = 1;
  string = malloc((MAX_WORDS+1) * sizeof(char *)); string[0] = stringpool;  next_word = 1;
  lb = malloc((MAX_WORDS+1) * sizeof(int)); lb[0] = 0; lbpool[0] = 0; next_lb = 1;
  ls = malloc((MAX_WORDS+1) * sizeof(int)); ls[0] = 0; lspool[0] = 0; next_ls = 0;

  if ((!string) || (!lb) || (!ls) || (!stringpool)) {
    fprintf(stderr, "Malloc failed.\n\n");
    exit(EXIT_FAILURE);
  }

  second_pass(dict);
  create_letterbanks(bank);
  create_letterswaps();

  fprintf(stdout, "/* Generated tables: */\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "const int lb[] = { /* Values are indexes of equivalent words, 0 terminates */\n");
  {
    int idx = 0;
    for (;;) {
      if ((idx != 0) && (lbpool[idx] == 0)) break;
      fprintf(stdout, "/* %6d: */   ", idx);
      for (;;) {
        int to_word = lbpool[idx++];
        fprintf(stdout, " %d,", to_word);
        if (to_word == 0) break;
      }
      fprintf(stdout, "\n");
    }
  }
  fprintf(stdout, "};\n");
  fprintf(stdout, "\n");
  fprintf(stdout, "const int ls[] = {\n");
  {
    int idx = 0;
    for (;;) {
      fprintf(stdout, "/* %6d: */   ", idx);
      for (;;) {
        int to_word = lspool[idx++];
        fprintf(stdout, " %d,", to_word);
        if (to_word == 0) break;
      }
      fprintf(stdout, "\n");
      if (idx >= next_ls) break;
    }
  }
  fprintf(stdout, "};\n");
  fprintf(stdout, "\n");

  // NOTE: If modifying this code to produce tables for some other language,
  // you may find it easier to output the strings, the lb links, and the ls links
  // as three separate scalar arrays rather than a single array of 3-element records.

  fprintf(stdout, "#define MAX_WORDS %d\n", MAX_WORDS);
  fprintf(stdout, "\n");
  fprintf(stdout, "transformation word[MAX_WORDS+1] = {\n");
  for (next_word = 0; next_word <= MAX_WORDS; next_word++) {
    fprintf(stdout, "/* %6d: */   { \"%s\", %d, %d },\n",
                    next_word, string[next_word], lb[next_word], ls[next_word]);
  }
  fprintf(stdout, "};\n");

  free(string); free(lb); free(ls); free(stringpool);
  exit(EXIT_SUCCESS);
  return (0);
}