// this is Martin Lauter's code which I received as a code fragment, so I've added a little
// bit of scaffolding around it so that I can run it in the context of my dawg utilities.
// Unfortunately it's now 12 years after I received this and I've either forgotten or never
// knew precisely how the code was invoked!
// cf multiscan.c in this same location. (As is Martin's ortiginal source, multidawg-mlauter.c.html)
// cc -Wall -g -o multidawg multidawg.c -lspell -L.
// ./multidawg -d -v /home/gtoal/dict/enable1-wwf3.12.dwg README
// gt. 20130501
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include "splib.h"
#include <assert.h>
typedef struct {
int len;
NODE *edge;
} TItem;
#define NUM_RECOGNISERS 1024 // original lower value was causing array bound errors with current test data
TItem item[NUM_RECOGNISERS];
static int nstr;
static NODE *dawg;
static INDEX nedges;
static int debug = FALSE;
int spell_verbose = FALSE;
unsigned char reslengths[NUM_RECOGNISERS];
int init()
{
nstr=0;
/* do your initing here */
return(0);
}
int uninit()
{
/* do your uniniting here */
return(0);
}
/* take the char and return the number of lengths returned in reslengths[] */
int process_one_char(unsigned char c)
{ int rl;
int i;
NODE *edge;
rl = 0;
/* append a root node as last item */
assert(nstr+1 < NUM_RECOGNISERS);
item[nstr].edge = &dawg[ROOT_NODE];
item[nstr++].len = 0;
for (i = 0; i < nstr; i++) {
NODE w;
/* ignore invalid items */
while (item[i].edge == dawg) {
if (--nstr == i) break;
item[i].edge = item[nstr].edge;
item[i].len = item[nstr].len;
}
if (nstr == i) break;
/* start processing valid item */
edge = item[i].edge;
do {
w = *edge;
/* check letter at the edge */
if ((((w)>>V_LETTER)&M_LETTER) == c) {
item[i].len++;
/* if string matched insert its length to result array */
if ((w&M_END_OF_WORD) != 0) reslengths[rl++] = item[i].len;
/* go through edge, will be ignored at next char if invalid */
item[i].edge = &dawg[w&M_NODE_POINTER];
break;
}
/* next edge if char didn't match */
edge++;
} while ((w&M_END_OF_NODE) == 0);
}
return(rl);
}
int main(int argc, char **argv)
{ int c; FILE *fin;
if (argc >= 4 && strcmp(argv[1], "-d") == 0) {
debug = TRUE; argc -= 1; argv += 1;
}
if (argc >= 4 && strcmp(argv[1], "-v") == 0) {
spell_verbose = TRUE; argc -= 1; argv += 1;
}
if (argc != 3) {
fprintf(stderr, "syntax: multidawg [-d] [-v] dawgfile textfile\n");
exit(EXIT_FAILURE);
}
if (!dawg_init((argc == 1 ? "" : argv[1]), &dawg, &nedges)) {
exit(EXIT_FAILURE);
}
init();
if ((fin = fopen(argv[2],"rb")) != NULL) {
while ((c=getc(fin)) != EOF) {
int rl;
if (debug) fprintf(stderr, "%c", c);
if ((rl = process_one_char(c)) != 0) {
// clearly we need to do something here, I'm just not sure what yet... [gt]
fprintf(stderr, "[%d]", rl);
}
}
fclose(fin);
} else {
fprintf(stderr, "multidawg: could not open %s - %s\n", argv[2], strerror(errno));
exit(EXIT_FAILURE);
}
uninit();
exit(0);
return(0);
}