
/* RCS Info: $Revision: 1.2 $ on $Date: 89/03/15 11:16:27 $
 *           $Source: /yew3/faustus/src/scrabble/RCS/dict.c,v $
 * Copyright (c) 1989 Wayne A. Christopher, U. C. Berkeley CS Dept
 *      faustus@renoir.berkeley.edu, ucbvax!faustus
 * Permission is granted to modify and re-distribute this code in any manner
 * as long as this notice is preserved.  All standard disclaimers apply.
 *
 */

#include "scrabble.h"

static int dicthash(char *word, int tabsize);
static void getem(word_t **wordp, char *lbuf, int place, char *opt,
               int numopt, int len);

static dict_t *dictionary;

void
readdict(char *file)
{
       FILE *fp;
       char buf[BSIZE], *e;
       word_t *ww;
       int i, j, k, n, t;

       if (!(fp = fopen(file, "r"))) {
               perror(file);
               exit(0);
       }
       fprintf(stderr, "Reading \"%s\" ", file);
       fflush(stderr);

       dictionary = (dict_t *) util_malloc(sizeof (dict_t));
       for (i = 0; i < MAX_LENGTH; i++)
               for (j = 0; j < HASH_SIZE; j++)
                       dictionary->buckets[i][j] = NULL;
       dictionary->size = 0;

       while (fgets(buf, BSIZE, fp)) {
               if (isupper(buf[0]) || !buf[2])
                       continue;

               for (e = buf; *e; e++)
                       ;
               *--e = '\0';
               addword(buf);

#ifdef notdef
               /* Add the plural... */
               s = e - 1;
               if (index("sxz", *s)) {
                       *++s = 'e';
                       *++s = 's';
                       *++s = '\0';
               } else {
                       *++s = 's';
                       *++s = '\0';
               }
               addword(buf);
               *e = '\0';
#endif
       }

       fprintf(stderr, " %d words.\n", dictionary->size);

       if (debug) {
               fprintf(stderr, "Stats:\n");
               for (i = 0; i < MAX_LENGTH; i++) {
                       fprintf(stderr, "\t%d:", i + 1);
                       n = t = 0;
                       for (j = 0; j < HASH_SIZE; j++) {
                               for (ww = dictionary->buckets[i][j], k = 0; ww;
                                               ww = ww->next)
                                       k++;
                               /* fprintf(stderr, " %d", k); */
                               if (k)
                                       n++;
                               t += k;
                       }
                       fprintf(stderr, " %d / %d full, %d total\n",
                                       n, HASH_SIZE, t);
               }
       }

       fclose(fp);

       return;
}

void
writedict(char *file)
{
       int i, j, k = 0;
       word_t *word;
       FILE *fp;
       char buf[BSIZE];

       if (!(fp = fopen(file, "w"))) {
               perror(file);
               return;
       }

       for (i = 0; i < MAX_LENGTH; i++)
               for (j = 0; j < HASH_SIZE; j++)
                       for (word = dictionary->buckets[i][j]; word;
                                       word = word->next) {
                               fprintf(fp, "%s\n", word->word);
                               k++;
                       }

       fclose(fp);

       sprintf(buf, "Wrote %d words to %s.", k, file);
       user_message(buf);

       return;
}

void
addword(char *word)
{
       int key = dicthash(word, HASH_SIZE);
       int len = strlen(word);
       word_t *ww;

       if ((key == -1) || (len > SIZE))
               return;

       ww = (word_t *) util_malloc(sizeof (word_t));

       ww->word = strsav(word);
       ww->length = len;
       ww->next = dictionary->buckets[len - 1][key];
       dictionary->buckets[len - 1][key] = ww;

       dictionary->size++;

       if (dictionary->size % 1000 == 0) {
               fprintf(stderr, ".");
               fflush(stderr);
       }

       return;
}

void
remword(char *word)
{
       int key = dicthash(word, HASH_SIZE);
       int len = strlen(word);
       word_t *ww, *lw = NULL;

       for (ww = dictionary->buckets[len - 1][key]; ww; ww = ww->next) {
               if (eq(ww->word, word))
                       break;
               lw = ww;
       }

       assert(ww);

       if (lw)
               lw->next = ww->next;
       else
               dictionary->buckets[len - 1][key] = ww->next;

       dictionary->size--;

       return;
}

bool
isaword(char *poss)
{
       word_t *word;
       int len = strlen(poss);
       int key = dicthash(poss, HASH_SIZE);

       if (key == -1)
               return (false);

       for (word = dictionary->buckets[len - 1][key]; word; word = word->next)
               if (eq(word->word, poss))
                       return (true);
       
       return (false);
}

/* Return a list of the buckets containing all the words we could make with
 * the given letters and are of the given length.  Look at all the combinations 
 * that include all the required ones and enough optional ones to make
 * up the length.
 */

word_t *
getpossibles(char *opt, int numopt, char *req, int numreq, int len)
{
       char lbuf[SIZE];
       int i;
       word_t *word = NULL;
       int wc1 = -1, wc2 = -1;

       for (i = 0; i < numreq; i++)
               lbuf[i] = req[i];

       if (numreq + numopt < len)
               return (NULL);
       
       for (i = 0; i < numopt; i++)
               if (opt[i] == WILD) {
                       if (wc1 == -1)
                               wc1 = i;
                       else
                               wc2 = i;
               }

       if (wc1 > -1) {
               for (opt[wc1] = 'a'; opt[wc1] <= 'z'; opt[wc1]++) {
                       if (wc2 > -1) {
                               for (opt[wc2] = 'a'; opt[wc2] <= 'z';
                                               opt[wc2]++)
                                       getem(&word, lbuf, numreq, opt, numopt,
                                                       len);
                               opt[wc2] = WILD;
                       } else
                               getem(&word, lbuf, numreq, opt, numopt, len);
               }
               opt[wc1] = WILD;
       } else
               getem(&word, lbuf, numreq, opt, numopt, len);

       return (word);
}

static void
getem(word_t **wordp, char *lbuf, int place, char *opt, int numopt, int len)
{
       int key;
       word_t *set, *ww;

assert(numopt >= 0);

       if ((numopt == len - place) || (place == len)) {
               while (place < len)
                       lbuf[place++] = *opt++;
               lbuf[len] = '\0';
               key = dicthash(lbuf, HASH_SIZE);
               set = dictionary->buckets[len - 1][key];
               if (set) {
                       for (ww = *wordp; ww; ww = ww->next_set)
                               if (ww == set)
                                       break;
                       if (ww)
                               return;

                       set->next_set = *wordp;
                       *wordp = set;
#ifdef notdef
                       if (debug) {
                               fprintf(stderr, "\t\t");
                               for (ww = set; ww; ww = ww->next)
                                       fprintf(stderr, " %s", ww->word);
                               fprintf(stderr, "\n");
                       }
#endif
               }
       } else {
               lbuf[place] = *opt;
               getem(wordp, lbuf, place + 1, opt + 1, numopt - 1, len);
               getem(wordp, lbuf, place, opt + 1, numopt - 1, len);
       }

       return;
}

#define NLETTERS       28      /* So it's a multiple of 4. */

/* A return value of -1 denotes that this is a bad word. */

static int
dicthash(char *word, int tabsize)
{
       int i;
       unsigned long result = 0;
       unsigned long letters[NLETTERS];
       char *s;

       for (i = 0; i < NLETTERS; i++)
               letters[i] = 0;

       for (s = word; *s; s++) {
               if (!isalpha(*s))
                       return (-1);
               letters[isupper(*s) ? (*s - 'A') : (*s - 'a')]++;
       }

       for (i = 0; i < NLETTERS; i++)
               result ^= letters[i] << i;
       
       return (result % tabsize);
}

