/* This source file has been superceded by 'findmoves.c' */
/*
In this version of the program I am writing an evaluator not
for my move, but for my opponent's replies. It will
calculate every possible play on the board by the opponent,
regardless of what tiles he holds (ie assuming that anything
that is unseen may be used) and then we will apply the
probability function (from ../jjchew/top300.c) to decide
what the probability of that play is. We can then do the
equivalent of a minimax using only the most probable plays.
The actual evaluation is trivial - it is the same as normal
except that instead of a random 7 tile sampled rack, we give
our opponent a rack of all 100 tiles.
We generate a play, look at what tiles he needed to put down, and
then use the P function to calculate the probability of him
having those tiles. We do this for each possible play at each
site on the board, and sum the probabilities appropriately.
Remembering this part of the maths is likely to be the hardest
part for me!
What we have to do is construct a histogram of score vs total
probability of making that score (regardless of how) on this
board, eg 5% for 10pts 7% for 11pts etc. Actually there
will be a broad range of scores with relatively low values
on each, but that doesn't matter. However at the end of
the day we're going to be looking for answers to questions
such as 'what is the minimum score he is likely to get at
the 90% confidence level' Our play will be determined
by the level of risk we're willing to take. We may find
that he has a 90% of scoring up to 12 pts, a 40% chance of
scoring up to 25pts, and a 3% chance of scoring up to 50pts,
and a 0.001% chance of scoring up to 150pts for this move.
We chose the cutoff point we are comfortable with for this
stage in the game, extract the appropriate score, and feed
that into a classic minimax.
Because in this testbed I have not yet added any scrabble
scoring, I shall just assume each tile is worth 1 pt (ie use
the length of the word formed as the score). This won't affect
the concept, just the actual results returned.
Currently the code does not pass along which tiles have been
played - only the tiles left. I need to add that extra
parameter to various procedures. Related to this - it does
not currently realise it is limited to playing a maximum
of 7 tiles. I've made a crude hack for testing but it has
to be fixed for when there are less than 7 tiles left.
Right now I'm modifying the read board procedure to remove
played tiles from the rack.
Because early game testing would otherwise print out every single
7-letter word in almost every position (and so on for shorter
words too) and I haven't yet put in code to optimise the
cases where slots are wide open, I'm going to start testing
with a fairly full board which has closed down most of the
options. Once that works I'll go back and fix the broad
wildcard calls.
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/* Only externals used here are my 'dawg' library, and even that is
only for loading the dict file. However in a later iteration I
may move the two dawg-based procedures here to the library */
#include "splib.h"
#ifndef FALSE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif
int spell_verbose = FALSE;
static int debug = FALSE;
/* There are too many literal constants in this code which should
be replaced by symbolic constants; and a few large static
strings which ought to come off the heap. */
#define FULLRACK "aaaaaaaaabbccddddeeeeeeeeeeeeffggghhiiiiiiiiijkllllmm\
nnnnnnooooooooppqrrrrrrssssttttttuuuuvvwwxyyz??"
/* 1..15 for board, 0 and 16 for tombstones */
static char board[17][17];
static char apparent_letter[17][17];
static int lettermult[17][17];
static int wordmult[17][17];
static int tiles_held = 0;
static char tiles[101]; /* Modified to allow full rack */
static char *crosschecks[17][17];
#define HORIZONTAL 1
#define VERTICAL 2
static int orientation = 0;
static NODE *dawg;
static INDEX nedges;
/*--------------------- crosscheck code -------------------------*/
/*
When placing a word horizontally, if it abuts to any tiles either
above or below, we will have generated a wildcard string consisting
of the tiles above, a "?", and the tiles below, from which we then
find all the valid words allowed which match that wildcard string.
From that we find which letters were valid at that position, and
we return the set of valid letters to the scrabble code, so that
the search for legal plays can be trimmed for speed.
*/
/* External interface procedure follows this internal procedure, which needs
a lot of extra parameters that the interface isn't really interested in */
static int crosscheck_internal(
NODE *dawg, INDEX i,
char *word, char *res, char *set,
int len, int *found
)
{
int endsword, last, ch, target;
NODE node;
INDEX link;
static int wildch = '\0';
for (;;) {
node = dawg[i++];
ch = (int)((node >> V_LETTER) & M_LETTER);
last = ((node & (INDEX)M_END_OF_NODE) != 0);
endsword = ((node & M_END_OF_WORD) != 0);
link = node & M_NODE_POINTER;
res[len] = ch; res[len+1] = '\0';
target = ((int)*word)&255;
if (ch != 0) {
if (ch == target || target == '?') {
if (target == '?') {
wildch = ch;
}
if (endsword && *(word+1) == '\0') {
int i;
(*found)++;
/* Add ch to crosscheck set if not already there */
/* We just assume only 1 '?' per word, and last wildch was it */
if (strchr(set, wildch) == NULL) {
i = strlen(set);
set[i] = wildch; set[i+1] = '\0';
}
}
if (*(word+1) != '\0' && link != 0)
(void) crosscheck_internal(dawg, link, word+1, res, set, len+1, found);
}
}
if (last) break;
}
return(0==0);
}
/* This is a 'black box' procedure which can be replaced by anything which
behaves the same even though implemented differently. In fact the
internal code of this function is a 'quick hack' and probably should
be replaced by something neater.
INTERFACE: inputs: word list, wild-card string containing one "?"
output: string of letters which the "? could represent
or NULL if none found. (i.e this square is blocked)
example: twl98, "c?t" -> "auo"
(cat, cut and cot all matching the pattern)
*/
char *convert_wildcard_to_permitted_set(NODE *dawg, char *word)
{
char result[MAX_WORD_LEN];
static char set[MAX_WORD_LEN];
int i = 0;
*set = '\0';
if (debug) fprintf(stderr, "wildcard: %s\n", word);
(void)crosscheck_internal(dawg, (INDEX)ROOT_NODE, word, result, set, 0, &i);
if (i == 0) return(NULL);
if (debug) fprintf(stderr, " -> %s\n", set);
return(set);
}
/*--------------------- placement code -------------------------*/
/*
This finds words which can be played at this row,column position
of the length requested. It does a wildcard match of a string
such as "c[aiou]t[etrains]" which in this instance might return
cats, cots, cuts. The search is pruned in two ways - the first,
by which letters are allowed on certain squares, eg [aiou] may
be a constraint from cross-check words (see above); and the
second by the tiles in your rack (eg "etrains"). If you hold a
blank, the wildcard might look like "c[aiou]t?" instead.
The decision to do separate searches for different lengths of
words was a deliberate one, to simplify the code. Some scrabble
programs would prefer to do searches which looked more like
"c[aiou]t*" allowing more letters to be added at the right.
I chose not to do this for simplicity, and also because some
other optimisations are possible when you are using fixed-length
searches (although those have not yet been done).
This procedure is also passed the tiles which we hold, because
the wildcard itself does not describe only valid words. For
instance, if we hold the tiles "act" then the wildcard might
be "[act][act][act]" - but "tat" could not be played even
though wildcard expansion listed it. By removing the tiles
played at each point for a wild letter, we end up playing
only "act" and "cat" here.
One known design flaw: if we have the tiles "ob?" and want
to pay the word "bob", then the fixed "b" will always be put
down before the wild-card as a "b". This could be a mistake
if the second "b" is on a triple-letter score, for example.
*/
/* External interface procedure follows this internal procedure, which needs
a lot of extra parameters that the interface isn't really interested in */
static int fix_scrabble(
NODE *dawg, INDEX i,
char *word, char *res, char *tilesleft,
int len, int *found, int L, int n
)
{
int endsword, last, ch, target;
NODE node;
INDEX link;
for (;;) {
node = dawg[i++];
ch = (int)((node >> V_LETTER) & M_LETTER);
last = ((node & (INDEX)M_END_OF_NODE) != 0);
endsword = ((node & M_END_OF_WORD) != 0);
link = node & M_NODE_POINTER;
res[len] = ch; res[len+1] = '\0';
target = *word; target &= 255;
if (ch != 0) {
if (ch == target) { /* matches a tile on the board already */
if (endsword && *(word+1) == '\0') {
fprintf(stdout, "play: %s at %c%d %s\n", res, L, n,
(orientation == HORIZONTAL ? "across" : "down"));
(*found)++;
}
if (*(word+1) != '\0' && link != 0) {
(void) fix_scrabble(dawg, link, word+1, res, tilesleft,
len+1, found, L, n);
}
} else if (target == '?') { /* We matched a wildcard. If we have
the correct letter, play it; otherwise play the blank */
if (endsword && *(word+1) == '\0') {
char *s;
int i;
s = strchr(tilesleft, ch);
if (s != NULL) { /* Do we have the actual tile? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
fprintf(stdout, "play: %s at %c%d %s\n", res, L, n,
(orientation == HORIZONTAL ? "across" : "down")); (*found)++;
} else {
s = strchr(tilesleft, '?');
if (s != NULL) { /* If not, do we have a blank left to play? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
fprintf(stdout, "play: %s at %c%d %s\n", res, L, n,
(orientation == HORIZONTAL ? "across" : "down")); (*found)++;
}
}
}
if (*(word+1) != '\0' && link != 0) {
char *s;
int i;
s = strchr(tilesleft, ch);
if (s != NULL) { /* Do we have the actual tile? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
(void) fix_scrabble(dawg, link, word+1, res, tilesleft+1,
len+1, found, L, n);
} else {
s = strchr(tilesleft, '?');
if (s != NULL) { /* If not, do we have a blank left? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
(void) fix_scrabble(dawg, link, word+1, res, tilesleft+1,
len+1, found, L, n);
}
}
}
} else if (target == '[') { /* Is this letter in our set of valid letters? */
char choices[8000];
char *s, *saved = word;
strcpy(choices, word+1);
s = strchr(choices, ']'); /* We assume well-formed expressions */
*s = '\0';
word = strchr(word, ']');
if (strchr(choices, ch) != NULL) {
if (endsword && *(word+1) == '\0') {
char *s;
int i;
s = strchr(tilesleft, ch);
if (s != NULL) { /* Do we have the actual tile? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
fprintf(stdout, "play: %s at %c%d %s\n", res, L, n,
(orientation == HORIZONTAL ? "across" : "down"));
(*found)++;
} else {
s = strchr(tilesleft, '?');
if (s != NULL) { /* If not, do we have a blank left? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
fprintf(stdout, "play: %s at %c%d %s\n", res, L, n,
(orientation == HORIZONTAL ? "across" : "down"));
(*found)++;
}
}
}
if (*(word+1) != '\0' && link != 0) {
char *s;
int i;
s = strchr(tilesleft, ch);
if (s != NULL) { /* Do we have the actual tile? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
(void) fix_scrabble(dawg, link, word+1, res, tilesleft+1,
len+1, found, L, n);
} else {
s = strchr(tilesleft, '?');
if (s != NULL) { /* If not, do we have a blank left? */
i = *s;
*s = tilesleft[0]; tilesleft[0] = i;
(void) fix_scrabble(dawg, link, word+1, res, tilesleft+1,
len+1, found, L, n);
}
}
}
}
word = saved;
}
}
if (last) break;
}
return(0==0);
}
/* This is a 'black box' procedure which can be replaced by anything which
behaves the same even though implemented differently. In fact the
internal code of this function (above) is a 'quick hack' and probably
should be replaced by something neater.
INTERFACE: inputs: word list, wild-card, rack, row & column
outputs: should be list of words to play. (Currently
the output is printed, not returned)
The wild card can contain only fixed letters, [abcd...] (sets of
letters), or "?" (single-character wild-card). There are no multi-
character wild-cards (eg "*") - the word length is fixed.
IMPORTANT NOTE: "cat?" is not the same as "c[a]t?" - the
former expects ONE tile to be placed, the
latter exects TWO. I.e. simple letters
represent tiles already placed on the board.
A cleaned-up interface would not need L/n (letter/number, ie row/col
in format such as "H7 across" or "G5 down". Also note 'orientation'
is wrongly being passed around as a global, not as a parameter) because
this procedure should just return the list of words, and doesn't need
to know where they are placed if it is not doing the placement itself.
*/
int scrabble_match(NODE *dawg, char *word, char *tiles, int L, int n)
{
char result[MAX_WORD_LEN];
int i = 0;
(void)fix_scrabble(dawg, (INDEX)ROOT_NODE, word, result, tiles, 0, &i, L, n);
return(i);
}
/*--------------------------------------------------------------*/
/*
This reads a board in the simple file format which Kevin Cowtan
uses in his board-graphic image generator software. I just happened
to have some cgi web page software which generates this format so I
plan to reuse it to create a graphical front-end to solving scrabble
problems.
*/
void read_board(char *fname)
{
int row, col;
int c;
int score, tiles_left_in_bag;
int rackletter;
char *s;
FILE *sample;
sample = fopen(fname, "r");
if (sample == NULL) {
fprintf(stderr, "Cannot open board file '%s'\n", fname);
exit(0);
}
for (row = 1; row < 16; row++) {
if (debug) fprintf(stderr, "Row %02d: ", row);
for (col = 1; col < 16; col++) {
c = fgetc(sample);
if (isalpha(c)) { /* Take care with locale for other language versions */
board[row][col] = c;
apparent_letter[row][col] = tolower(c);
if (isupper(c)) {
rackletter = tolower(c);
} else {
/* Currently not including blanks in the calculation */
rackletter = '?';
}
s = strchr(tiles, rackletter);
if (s == NULL) {
fprintf(stderr, "Error: we appear to have too many %c's\n", rackletter);
fprintf(stderr, " %s\n", FULLRACK);
fprintf(stderr, " %s\n", tiles);
exit(0);
}
memmove(s, s+1, strlen(s));
if (debug) fprintf(stderr, "%c", tolower(c));
} else if (debug) fprintf(stderr, " ");
}
c = fgetc(sample); /* newline */
if (c != '\n') {
fprintf(stderr, "Data format in .dat file\n");
exit(0);
}
if (debug) fprintf(stderr, "\n");
}
tiles_held = strlen(tiles);
fprintf(stderr, "%0d Tiles: %s\n", tiles_held, tiles);
}
/*--------------------------------------------------------------*/
/*
This looks at the place where a tile is likely to be played, and
whether there are tiles abutting above or below it, which would
permit or deny a word to be placed there. This generates a
wild-card string which is then expanded in the code above to
generate the set of letters which are valid here. It is that
set which is stored, not the wild card string or the words themselves.
*/
char *create_crosscheck_wildcard(int r, int c)
{
char crosscheck[17];
char *s;
int rp;
/* Already a tile here so crosscheck is meaningless */
if (board[r][c] != 0) return(NULL);
/* none above and none below? */
if ((board[r-1][c] == 0) && (board[r+1][c] == 0)) return(NULL);
// this code looks quite different from that in scrabble (findmoves)
// where I fixed a bug about blanks not being treated as a single fixed
// character once played. Do i need to retrofit this fix?:
//if (*s == '?') *s = board[rp][c]; // FIX FOR BUG IN "wildcard:"
// (both above and below)
/* what's above? */
rp = r-1;
while (board[rp][c] != 0) rp -= 1;
rp += 1; /* row of 1st letter in crosscheck word */
s = crosscheck;
while (rp != r) *s++ = apparent_letter[rp++][c];
*s++ = '?'; /* r */
/* what's below? */
rp = r+1;
while (board[rp][c] != 0) {
*s++ = apparent_letter[rp][c];
rp += 1;
}
*s = '\0';
return(strdup(crosscheck));
}
/*--------------------------------------------------------------*/
/*
Can we slot a word into the board at this row,col and length(*)?
If so, then the very last thing we do before returning is to actually
generate the words. This is not really good design - we should
pass the wildcard which this generates up a level and have that
level do the search. This procedure generates a wild-card string
using fixed letters, "[...]" letter sets, and wildcards ("?")
which when expanded returns valid words which can be played here.
*: Note 'length' here doesn't mean word length. It means number of
tiles to be placed. The word may be longer because of tiles
already on the board that we are placing around.
*/
int slot(int r, int c, int l)
{
int i, pp = 0, p = c;
int touches_horiz = FALSE;
int touches_vert = FALSE;
int touches_center = FALSE;
int valid;
char pattern[8000];
if ((board[r][c-1] != 0) && (c != 1)) return(FALSE);
/* Covered with an earlier hook; (special case for 1st col) */
for (i = 0; i < l; ) {
/* Ran off end of board with tiles still to place? */
if (p == 16) return(FALSE);
if (board[r][p] == 0) {
/* square is free */
/* If center square is empty this must be the first move */
if ((r == 8) && (p == 8)) touches_center = TRUE;
if (board[r][p-1] != 0 || board[r][p+1] != 0) touches_horiz = TRUE;
if (board[r-1][p] != 0 || board[r+1][p] != 0) touches_vert = TRUE;
if (crosschecks[r][p] != NULL) {
/* If no valid crosscheck at all, then reject this slot */
if (*crosschecks[r][p] == '!') return(FALSE);
pattern[pp] = '\0';
if ((strcmp(crosschecks[r][p], "?") == 0)
&& (strchr(tiles, '?') == NULL)) {
/* we don't hold a blank, so do a reduction-in-strength of a
wild-card "?" to [abcdefg] (i.e. only the tiles we hold) */
sprintf(pattern+pp, "[%s]", tiles);
} else if (strcmp(crosschecks[r][p], "?") == 0) {
/* we do hold a blank, so allow anything */
sprintf(pattern+pp, "%s", crosschecks[r][p]);
} else {
/* This letter is constrained by cross-checks */
sprintf(pattern+pp, "[%s]", crosschecks[r][p]);
}
pp = strlen(pattern);
} else {
/* No crosscheck neighbours */
if (strchr(tiles, '?') != NULL) {
/* We hold a blank so it could be anything */
sprintf(pattern+pp, "?");
} else {
/* restrict search space when possible, ie only those letters
for which we hold tiles */
sprintf(pattern+pp, "[%s]", tiles);
}
pp = strlen(pattern);
}
i += 1; /* We have placed another letter */
} else {
/* There is already a tile on the board here. */
pattern[pp++] = apparent_letter[r][p]; pattern[pp] = '\0';
/* don't increment 'i' because we haven't placed a tile */
}
p += 1; /* next column */
}
/* after placing all tiles, are there still some letters abutting
to the right? If so, add them to the string to be matched */
while (board[r][p] != 0) pattern[pp++] = apparent_letter[r][p++];
pattern[pp] = '\0';
/* If this is the first move and it is placed on the center square,
or it touches another tile, it is a valid play - but if not,
you're putting tiles down in the middle of space! Note, if
a horizontal play of one single tile does not touch any abutting
horizontal tiles, but does touch a vertical tile, then it is
really a vertical play, and is inhibited here so that it will
show up correctly when played in the other orientation. Also
you cannot play a single tile on the center as the first move,
though this test is in fact redundant because there should be no
single-letter words in the dictionary */
valid = (touches_horiz ||
(touches_vert && (l > 1)) ||
(touches_center && (l > 1)));
/* Find valid plays, with a search trimmed by the wildcard generation
above. Currently they are printed out. */
if (valid) scrabble_match(dawg, pattern, tiles,
(orientation == HORIZONTAL ? c : r)-1+'A',
(orientation == HORIZONTAL ? r : c));
return(valid);
}
int main(int argc, char **argv)
{
int row, col;
int length;
char *dict, *boardfile;
if (argc != 3) {
if (argc == 2) {
dict = "twl98";
} else {
fprintf(stderr, "syntax: %s board.dat dict?\n", argv[0]);
exit(1);
}
} else dict = argv[2]; /* Default word list is TWL98 */
boardfile = argv[1];
if (!dawg_init(dict, &dawg, &nedges)) {
fprintf(stderr, "%s: cannot open wordlist '%s'\n", argv[0], dict);
exit(2);
}
/* Clear the arrays */
for (row = 0; row < 17; row++) {
for (col = 0; col < 17; col++) {
board[row][col] = 0; apparent_letter[row][col] = 0;
lettermult[row][col] = 1; wordmult[row][col] = 1;
crosschecks[row][col] = NULL;
}
}
/* Supplying the full rack is an excssive approximation. We should
really subtract all played tiles from 'fullrack' and use the
truly remaining tiles. */
strcpy(tiles, FULLRACK);
/* hand-coded random board. This could be replaced by a more complex
format which includes tile distributions, values, language etc */
read_board(boardfile);
/* tiles[] is unknown. We replace the normal value of tiles[]
with the contents of the bag, which is known. */
for (orientation = HORIZONTAL; orientation <= VERTICAL; orientation++) {
/* We assume throughout that we are examining only horizontal plays,
and just flip the board to handle vertical plays. We flip it back
at the end, in case we want to update it with a play and write it
back to file. (but we don't do that yet) */
for (row = 1; row < 16; row++) {
/* calculate crosschecks for this row before we look at
placements for this row. We could actually do this for
the whole board first, but it isn't necessary */
for (col = 1; col < 16; col++) {
char *s;
/* Clean up from previous tries */
if (crosschecks[row][col] != NULL) free(crosschecks[row][col]);
/* first of all get wildcard pattern, eg "w?rd" : */
crosschecks[row][col] = create_crosscheck_wildcard(row, col);
/* Then convert it to a set of valid letters */
if (crosschecks[row][col] == NULL) {
/* crosschecks don't make sense here */
crosschecks[row][col]= strdup("?");
} else {
s = convert_wildcard_to_permitted_set(dawg, crosschecks[row][col]);
free(crosschecks[row][col]);
if (s == NULL) {
/* No letter can currently be placed in this square legally */
crosschecks[row][col] = strdup("!");
/* "!" is easier to debug than empty string */
} else {
crosschecks[row][col] = strdup(s);
}
}
}
/* Now do placements: try every square on this row as position for
first tile to be placed down. Rest follow automatically */
for (col = 1; col < 16; col++) {
/* if (debug) fprintf(stderr, "Testing row %d col %d\n", row, col); */
for (length = 1; length <= 7/*tiles_held*/; length++) {
/* Can I legally place this many tiles onto the board here? */
if (slot(row, col, length)) {
/* would be nice to pass the wildcard string back to here
and then expand it/search for plays, and make the plays
separately. The hack above was for convenience only. */
/* record row,col,length and wildcard for move generator (maybe) */
}
}
}
} /* Try every row on the board */
/* flip the board around the x=y diagonal */
for (row = 1; row < 16; row++) {
for (col = 1; col < 16; col++) {
int i;
if (row > col) { /* avoid doing twice (no-op) */
/* swap board and apparent_letter at row,col */
i = board[row][col];
board[row][col] = board[col][row];
board[col][row] = i;
i = apparent_letter[row][col];
apparent_letter[row][col] = apparent_letter[col][row];
apparent_letter[col][row] = i;
}
}
}
} /* end of horiz/vert placement loop */
/* If want to make best play on board and save it, do it here */
exit(0);
}