#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#ifndef FALSE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif
#define debug_input 1
#define debug_sizes 2
#define debug_strcmp 4
#define debug_lookup 8
#define debug_letterbanks 16
#define debug_letterswaps 32
static int debug = 0 /*|debug_letterswaps|debug_letterbanks|debug_lookup|debug_strcmp|debug_sizes|debug_input*/;
char **string = NULL;
int *lb = NULL; // points to a cycle of letterbank equivalents
int *ls = NULL; // points to a list of letterswap targets
#define LB_MAX 2000000
#define LS_MAX 1000000
static int lbpool[LB_MAX];
static int lspool[LS_MAX];
int next_lb = 0; /* index into lbpool */
int next_ls = 0; /* index into lspool */
int next_word = 0;
int MAX_WORDS = 0;
int lookup(char *word) {
// Return index of the word. Binary chop for now.
int low = 1, high = MAX_WORDS; // inclusive at each end
while (low <= high) {
int middle = (low+high)/2;
int comparison = strcmp(string[middle], word);
if (debug&debug_strcmp) fprintf(stderr, "[%s:%d %s:%d %s:%d] %d\n",
string[low], low,
string[middle], middle,
string[high], high, comparison);
if (comparison < 0) {
low = middle+1;
} else if (comparison > 0) {
high = middle-1;
} else return middle;
}
return 0; // not found
}
#ifndef MAX_LINE
#define MAX_LINE (8*1024)
#endif
void first_pass(FILE *in) {
char line[MAX_LINE+1];
int lp;
for (;;) { // read all words
lp = 0;
for (;;) { // read letters within a word
int c = fgetc(in);
if (c == EOF) {
if (lp == 0) {
if (debug&debug_sizes) fprintf(stderr, "#D MAX_WORDS = %d\n", MAX_WORDS);
return;
}
line[lp] = '\0';
fprintf(stderr, "Unexpected end of file at \"%s\"\n", line);
exit(EXIT_FAILURE);
} else if (c == '\n') {
break;
} else if (c == '\r') { // in case of M$
continue;
} else {
line[lp++] = c; if (lp == MAX_LINE) {
fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
exit(EXIT_FAILURE);
}
}
}
line[lp] = '\0';
MAX_WORDS += 1;
if (debug&debug_input) fprintf(stderr, "#D P0: %s\n", line);
}
/* NOT REACHED */
}
void second_pass(FILE *in) {
char line[MAX_LINE+1];
int lp;
for (;;) { // read all words
lp = 0;
for (;;) { // read letters within a word
int c = fgetc(in);
if (c == EOF) {
if (lp == 0) {
if (debug&debug_sizes) fprintf(stderr, "#D MAX_WORDS = %d, next_word = %d\n", MAX_WORDS, next_word);
if (next_word != MAX_WORDS+1) {
fprintf(stderr, "Second pass inconsistency - MAX_WORDS = %d, next_word = %d\n", MAX_WORDS, next_word);
exit(EXIT_FAILURE);
}
return;
}
exit(EXIT_FAILURE);
} else if (c == '\n') {
break;
} else if (c == '\r') { // in case of M$
continue;
} else {
line[lp++] = c; if (lp == MAX_LINE) exit(EXIT_FAILURE);
}
}
line[lp] = '\0';
string[next_word] = strdup(line); // Matbe change to a stringpool?
lb[next_word] = 0; // none generated yet
ls[next_word] = 0; // ditto
if (debug&debug_input) fprintf(stderr, "#D P1: %s\n", line);
next_word += 1; // So next_word will equal 1 for the first word, "aa"
}
/* NOT REACHED */
}
void add_letterbank_link(char *from_word, char *to_word) {
if (strcmp(from_word, to_word) == 0) return; // Don't link to self
if (debug&debug_letterbanks) fprintf(stderr, "Add link \"%s\" (word[%d].lb (%d) ) : \"%s\" (%d)\n",
from_word, lookup(from_word), next_lb,
to_word, lookup(to_word));
// And do it...
lbpool[next_lb++] = lookup(to_word); if (next_lb >= LB_MAX) {
fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
exit(EXIT_FAILURE);
}
}
void add_letterbank(char *from_word, char *group) {
/* Loop over each word in group, add a letterbank link from word -> each */
char *word, *end;
char copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP.
lb[lookup(from_word)] = next_lb;
if (debug&debug_letterbanks) fprintf(stderr, "Add \"%s\": \"%s\"\n", from_word, group);
strcpy(copy, group);
word = copy;
for (;;) {
end = strchr(word, ' ');
if (end != NULL) *end++ = '\0';
add_letterbank_link(from_word, word);
if ((word = end) == NULL) { // last word
// end of this group
lbpool[next_lb++] = 0; if (next_lb >= LB_MAX) {
fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2);
exit(EXIT_FAILURE);
}
return;
}
}
}
void add_letterbank_group(char *group) {
char *word, *end;
char copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP.
strcpy(copy, group);
word = copy;
for (;;) {
end = strchr(word, ' ');
if (end != NULL) *end++ = '\0';
add_letterbank(word, group);
word = end;
// last word
if (end == NULL) return;
}
}
void create_letterbanks(FILE *in) {
char line[MAX_LINE+1];
int lp;
for (;;) { // read all letterbank lines
lp = 0;
for (;;) { // read letters within a letterbank line
int c = fgetc(in);
if (c == EOF) {
if (lp == 0) return; // EOF should be at the start of a line.
line[lp] = '\0';
fprintf(stderr, "Unexpected end of file at \"%s\"\n", line);
exit(EXIT_FAILURE);
} else if (c == '\n') {
break;
} else if (c == '\r') { // in case of M$
continue;
} else {
line[lp++] = c; if (lp == MAX_LINE) {
fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2);
exit(EXIT_FAILURE);
}
}
}
line[lp] = '\0';
if (debug&debug_letterbanks) fprintf(stderr, "#D LB: %s\n", line);
add_letterbank_group(line);
}
}
void insert_letterswap(int from_word, int to_word) {
static int last_from_word = 0;
if (last_from_word != from_word) {
lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
exit(EXIT_FAILURE);
}
ls[from_word] = next_ls;
last_from_word = from_word;
}
lspool[next_ls] = to_word;
lspool[++next_ls] = 0; if (next_ls >= LS_MAX) {
fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2);
exit(EXIT_FAILURE);
}
}
void add_one_letterswap(char *from_word, char *to_word) {
int old_word, new_word;
new_word = lookup(to_word);
if (new_word != 0) {
old_word = lookup(from_word);
if (debug&debug_letterswaps) fprintf(stderr, "SWAP: %s (%d) -> %s (%d)\n",
from_word, old_word,
to_word, new_word);
insert_letterswap(old_word, new_word);
}
}
void add_letterswaps(char *from_word)
{
int i, letpos;
char saved_let;
char to_word[MAX_LINE+1];
strcpy(to_word, from_word);
for (letpos = 0; letpos < strlen(from_word); letpos++) {
// try all letters at position letpos (except saved_let)
saved_let = from_word[letpos];
// try all letters of the same case at this position
// case hack reduces the need to know the exact details
// of the wordlist file.
if (isalpha(saved_let)) {
if (isupper(saved_let)) {
for (i = 32; i < 256; i++) {
if (isalpha(i) && isupper(i) && (i != saved_let)) {
to_word[letpos] = (char)i;
add_one_letterswap(from_word, to_word);
to_word[letpos] = saved_let;
}
}
} else if (islower(saved_let)) {
for (i = 32; i < 256; i++) {
if (isalpha(i) && islower(i) && (i != saved_let)) {
to_word[letpos] = (char)i;
add_one_letterswap(from_word, to_word);
to_word[letpos] = saved_let;
}
}
}
}
}
}
void create_letterswaps(void) {
int wordno;
for (wordno = 1; wordno <= MAX_WORDS; wordno++) {
add_letterswaps(string[wordno]);
}
}
int main(int argc, char **argv) {
FILE *dict, *bank;
char *wordlist, *banklist;
if (argc == 1) {
wordlist = "wordlist.txt";
banklist = "letterbanks.txt";
} else if (argc == 2) {
wordlist = argv[1];
banklist = "letterbanks.txt";
} else if (argc == 3) {
wordlist = argv[1];
banklist = argv[2];
} else {
fprintf(stderr, "syntax: gentables [wordlist.txt [letterbanks.txt]]\n");
exit(EXIT_FAILURE);
}
dict = fopen(wordlist, "r");
if (dict == NULL) {
fprintf(stderr, "Cannot convert word list \"%s\" - %s\n\n", wordlist, strerror(errno));
exit(EXIT_FAILURE);
}
bank = fopen(banklist, "r");
if (bank == NULL) {
fprintf(stderr, "Cannot convert letterbank \"%s\" - %s\n\n", banklist, strerror(errno));
exit(EXIT_FAILURE);
}
fprintf(stderr, "Converting word list \"%s\"\n", wordlist);
fprintf(stderr, " and letter bank \"%s\"\n\n", banklist);
first_pass(dict); // determine MAX_WORDS
if (MAX_WORDS == 0) {
fprintf(stderr, "No valid words found in %s\n\n", wordlist);
exit(EXIT_FAILURE);
}
fseek(dict, SEEK_SET, 0L); // Re-read word list
next_word = 1;
string = malloc((MAX_WORDS+1) * sizeof(char *)); string[0] = strdup("");
/* lb and ls point to a flat array where the links are stored. lb and ls are
indexed by lookup("word"), ie another layer of indirection is needed
to get to all the links for a particular word */
lb = malloc((MAX_WORDS+1) * sizeof(int)); lb[0] = 0; lbpool[0] = 0; next_lb = 1;
ls = malloc((MAX_WORDS+1) * sizeof(int)); ls[0] = 0; lspool[0] = 0; next_ls = 0;
fprintf(stdout, "\n");
fprintf(stdout, "// Array of all words. word[1].string returns \"aa\" etc.\n");
fprintf(stdout, "\n");
fprintf(stdout, "\n");
fprintf(stdout, "typedef struct transformation {\n");
fprintf(stdout, " char *string;\n");
fprintf(stdout, " int lb; // points to a list of letterbank equivalents\n");
fprintf(stdout, " int ls; // points to a list of letterswap targets\n");
fprintf(stdout, "} transformation;\n");
fprintf(stdout, "\n");
second_pass(dict);
create_letterbanks(bank);
create_letterswaps();
fprintf(stdout, "/* Generated tables: */\n");
fprintf(stdout, "\n");
fprintf(stdout, "const int lb[] = { /* Values are indexes of equivalent words, 0 terminates */\n");
{
int idx = 0;
for (;;) {
if ((idx != 0) && (lbpool[idx] == 0)) break;
fprintf(stdout, "/* %6d: */ ", idx);
for (;;) {
int to_word = lbpool[idx++];
fprintf(stdout, " %d,", to_word);
if (to_word == 0) break;
}
fprintf(stdout, "\n");
}
}
fprintf(stdout, "};\n");
fprintf(stdout, "\n");
fprintf(stdout, "const int ls[] = {\n");
{
int idx = 0;
for (;;) {
fprintf(stdout, "/* %6d: */ ", idx);
for (;;) {
int to_word = lspool[idx++];
fprintf(stdout, " %d,", to_word);
if (to_word == 0) break;
}
fprintf(stdout, "\n");
if (idx >= next_ls) break;
}
}
fprintf(stdout, "};\n");
fprintf(stdout, "\n");
fprintf(stdout, "#define MAX_WORDS %d\n", MAX_WORDS);
fprintf(stdout, "\n");
fprintf(stdout, "transformation word[MAX_WORDS+1] = {\n");
for (next_word = 0; next_word <= MAX_WORDS; next_word++) {
fprintf(stdout, "/* %6d: */ { \"%s\", %d, %d },\n",
next_word, string[next_word], lb[next_word], ls[next_word]);
}
fprintf(stdout, "};\n");
if (debug&debug_lookup) {
fprintf(stderr, "Lookup a -> %d\n", lookup("a"));
fprintf(stderr, "Lookup aa -> %d\n", lookup("aa"));
fprintf(stderr, "Lookup aargh -> %d\n", lookup("aargh"));
fprintf(stderr, "Lookup pitstop -> %d\n", lookup("pitstop"));
fprintf(stderr, "Lookup pitsaw -> %d\n", lookup("pitsaw"));
fprintf(stderr, "Lookup zyzzyvas -> %d\n", lookup("zyzzyvas"));
fprintf(stderr, "Lookup zzzzzz -> %d\n", lookup("zzzzzz"));
}
exit(EXIT_SUCCESS);
return (0);
}