#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#ifndef FALSE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif
char **string = NULL;
int *lb = NULL; // each entry points to a list of letterbank equivalents
int *ls = NULL; // each entry points to a list of letterswap targets
char *stringpool = NULL; // consecutive \0-terminated strings
#define LB_MAX 2000000
#define LS_MAX 1000000
static int lbpool[LB_MAX];
static int lspool[LS_MAX];
int next_lb = 0; /* index into lbpool */
int next_ls = 0; /* index into lspool */
int next_word = 0;
int MAX_WORDS = 0;
int MAX_STRINGPOOL = 1;
int next_stringpool = 0;
int lookup(char *word) {
// Return index of the word. Binary chop for now.
int low = 1, high = MAX_WORDS; // inclusive at each end
while (low <= high) {
int middle = (low+high)/2;
int comparison = strcmp(string[middle], word);
if (comparison < 0) {
low = middle+1;
} else if (comparison > 0) {
high = middle-1;
} else return middle;
}
return 0; // not found
}
#ifndef MAX_LINE
#define MAX_LINE (8*1024)
#endif
// determine number of words in dictionary and size of string pool
// updates MAX_WORDS and MAX_STRINGPOOL
void first_pass(FILE *in) {
char line[MAX_LINE+1];
int lp;
int c;
for (;;) { // read all lines
lp = 0;
for (;;) { // read letters within a word
if ((c = fgetc(in)) == EOF) {
if (lp == 0) return;
line[lp] = '\0';
fprintf(stderr, "Unexpected end of file at \"%s\"\n", line);
exit(EXIT_FAILURE);
}
else if (c == '\n') break;
else if (c == '\r') continue;
else {
line[lp++] = c;
if (lp == MAX_LINE) {
fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
exit(EXIT_FAILURE);
}
MAX_STRINGPOOL += 1;
}
}
line[lp] = '\0';
MAX_STRINGPOOL += 1;
MAX_WORDS += 1;
}
}
void second_pass(FILE *in) {
char line[MAX_LINE+1];
int lp;
int c;
for (;;) { // read all words
lp = 0;
string[next_word] = &stringpool[next_stringpool];
for (;;) { // read letters within a word
if ((c = fgetc(in)) == EOF) {
if (lp == 0) return;
exit(EXIT_FAILURE);
}
else if (c == '\n') break;
else if (c == '\r') continue;
else {
line[lp++] = c;
if (lp == MAX_LINE) exit(EXIT_FAILURE);
stringpool[next_stringpool++] = c;
}
}
line[lp] = '\0';
lb[next_word] = 0; // none generated yet
ls[next_word] = 0; // ditto
stringpool[next_stringpool++] = '\0';
next_word += 1; // So next_word will equal 1 for the first word, "aa"
}
/* NOT REACHED */
}
void create_letterbanks(FILE *in) {
char group[MAX_LINE+1];
char from_group_copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP.
char *from_word, *from_word_end;
char copy[MAX_LINE+1];
char *word, *end;
int lp;
for (;;) { // read all letterbank lines
lp = 0;
for (;;) { // read letters within a letterbank line
int c = fgetc(in);
if (c == EOF) {
if (lp == 0) return; // EOF should be at the start of a line.
group[lp] = '\0';
fprintf(stderr, "Unexpected end of file at \"%s\"\n", group);
exit(EXIT_FAILURE);
} else if (c == '\n') {
break;
} else if (c == '\r') { // in case of M$
continue;
} else {
group[lp++] = c; if (lp == MAX_LINE) {
fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
exit(EXIT_FAILURE);
}
}
}
group[lp] = '\0';
strcpy(from_group_copy, group);
from_word = from_group_copy;
for (;;) {
from_word_end = strchr(from_word, ' ');
if (from_word_end != NULL) *from_word_end++ = '\0';
lb[lookup(from_word)] = next_lb;
strcpy(copy, group);
word = copy;
for (;;) {
end = strchr(word, ' ');
if (end != NULL) *end++ = '\0';
if (strcmp(from_word, word) != 0) { // Don't link to self
lbpool[next_lb++] = lookup(word); if (next_lb >= LB_MAX) {
fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
exit(EXIT_FAILURE);
}
}
if ((word = end) == NULL) { // last word
// end of this group
lbpool[next_lb++] = 0; if (next_lb >= LB_MAX) {
fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
exit(EXIT_FAILURE);
}
break;
}
}
from_word = from_word_end;
// last word
if (from_word_end == NULL) break;
}
}
}
void add_one_letterswap(char *from_word, char *to_word) {
static int last_old_word = 0;
int old_word, new_word;
new_word = lookup(to_word);
if (new_word != 0) {
old_word = lookup(from_word);
if (last_old_word != old_word) {
lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
exit(EXIT_FAILURE);
}
ls[old_word] = next_ls;
last_old_word = old_word;
}
lspool[next_ls] = new_word;
lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
exit(EXIT_FAILURE);
}
}
}
void create_letterswaps(void) {
char *from_word;
int wordno;
int i, letpos;
char saved_let;
char to_word[MAX_LINE+1];
for (wordno = 1; wordno <= MAX_WORDS; wordno++) {
from_word = string[wordno];
strcpy(to_word, from_word);
for (letpos = 0; letpos < strlen(from_word); letpos++) {
// try all letters at position letpos (except saved_let)
saved_let = from_word[letpos];
// try all letters of the same case at this position
// case hack reduces the need to know the exact details
// of the wordlist file.
if (isalpha(saved_let)) {
if (isupper(saved_let)) {
for (i = 32; i < 256; i++) {
if (isalpha(i) && isupper(i) && (i != saved_let)) {
to_word[letpos] = (char)i;
add_one_letterswap(from_word, to_word);
to_word[letpos] = saved_let;
}
}
} else if (islower(saved_let)) {
for (i = 32; i < 256; i++) {
if (isalpha(i) && islower(i) && (i != saved_let)) {
to_word[letpos] = (char)i;
add_one_letterswap(from_word, to_word);
to_word[letpos] = saved_let;
}
}
}
}
}
}
}
int main(int argc, char **argv) {
FILE *dict, *bank;
char *wordlist, *banklist;
if (argc == 1) {
wordlist = "wordlist.txt";
banklist = "letterbanks.txt";
} else if (argc == 2) {
wordlist = argv[1];
banklist = "letterbanks.txt";
} else if (argc == 3) {
wordlist = argv[1];
banklist = argv[2];
} else {
fprintf(stderr, "syntax: gentables [wordlist.txt [letterbanks.txt]]\n");
exit(EXIT_FAILURE);
}
dict = fopen(wordlist, "r");
if (dict == NULL) {
fprintf(stderr, "Cannot convert word list \"%s\" - %s\n\n", wordlist, strerror(errno));
exit(EXIT_FAILURE);
}
bank = fopen(banklist, "r");
if (bank == NULL) {
fprintf(stderr, "Cannot convert letterbank \"%s\" - %s\n\n", banklist, strerror(errno));
exit(EXIT_FAILURE);
}
fprintf(stderr, "Converting word list \"%s\"\n", wordlist);
fprintf(stderr, " and letter bank \"%s\"\n\n", banklist);
first_pass(dict); // determine MAX_WORDS and MAX_STRINGPOOL
if (MAX_WORDS == 0) {
fprintf(stderr, "No valid words found in %s\n\n", wordlist);
exit(EXIT_FAILURE);
}
fseek(dict, SEEK_SET, 0L); // Re-read word list
fprintf(stdout, "\n");
fprintf(stdout, "// Array of all words. word[1].string returns \"aa\" etc.\n");
fprintf(stdout, "\n");
fprintf(stdout, "\n");
fprintf(stdout, "typedef struct transformation {\n");
fprintf(stdout, " char *string;\n");
fprintf(stdout, " int lb; // points to a list of letterbank equivalents\n");
fprintf(stdout, " int ls; // points to a list of letterswap targets\n");
fprintf(stdout, "} transformation;\n");
fprintf(stdout, "\n");
stringpool = malloc((MAX_STRINGPOOL+1) * sizeof(char)); stringpool[0] = '\0'; next_stringpool = 1;
string = malloc((MAX_WORDS+1) * sizeof(char *)); string[0] = stringpool; next_word = 1;
lb = malloc((MAX_WORDS+1) * sizeof(int)); lb[0] = 0; lbpool[0] = 0; next_lb = 1;
ls = malloc((MAX_WORDS+1) * sizeof(int)); ls[0] = 0; lspool[0] = 0; next_ls = 0;
if ((!string) || (!lb) || (!ls) || (!stringpool)) {
fprintf(stderr, "Malloc failed.\n\n");
exit(EXIT_FAILURE);
}
second_pass(dict);
create_letterbanks(bank);
create_letterswaps();
fprintf(stdout, "/* Generated tables: */\n");
fprintf(stdout, "\n");
fprintf(stdout, "const int lb[] = { /* Values are indexes of equivalent words, 0 terminates */\n");
{
int idx = 0;
for (;;) {
if ((idx != 0) && (lbpool[idx] == 0)) break;
fprintf(stdout, "/* %6d: */ ", idx);
for (;;) {
int to_word = lbpool[idx++];
fprintf(stdout, " %d,", to_word);
if (to_word == 0) break;
}
fprintf(stdout, "\n");
}
}
fprintf(stdout, "};\n");
fprintf(stdout, "\n");
fprintf(stdout, "const int ls[] = {\n");
{
int idx = 0;
for (;;) {
fprintf(stdout, "/* %6d: */ ", idx);
for (;;) {
int to_word = lspool[idx++];
fprintf(stdout, " %d,", to_word);
if (to_word == 0) break;
}
fprintf(stdout, "\n");
if (idx >= next_ls) break;
}
}
fprintf(stdout, "};\n");
fprintf(stdout, "\n");
// NOTE: If modifying this code to produce tables for some other language,
// you may find it easier to output the strings, the lb links, and the ls links
// as three separate scalar arrays rather than a single array of 3-element records.
fprintf(stdout, "#define MAX_WORDS %d\n", MAX_WORDS);
fprintf(stdout, "\n");
fprintf(stdout, "transformation word[MAX_WORDS+1] = {\n");
for (next_word = 0; next_word <= MAX_WORDS; next_word++) {
fprintf(stdout, "/* %6d: */ { \"%s\", %d, %d },\n",
next_word, string[next_word], lb[next_word], ls[next_word]);
}
fprintf(stdout, "};\n");
free(string); free(lb); free(ls); free(stringpool);
exit(EXIT_SUCCESS);
return (0);
}