/* This source file has been superceded by 'findmoves.c' */ /* In this version of the program I am writing an evaluator not for my move, but for my opponent's replies. It will calculate every possible play on the board by the opponent, regardless of what tiles he holds (ie assuming that anything that is unseen may be used) and then we will apply the probability function (from ../jjchew/top300.c) to decide what the probability of that play is. We can then do the equivalent of a minimax using only the most probable plays. The actual evaluation is trivial - it is the same as normal except that instead of a random 7 tile sampled rack, we give our opponent a rack of all 100 tiles. We generate a play, look at what tiles he needed to put down, and then use the P function to calculate the probability of him having those tiles. We do this for each possible play at each site on the board, and sum the probabilities appropriately. Remembering this part of the maths is likely to be the hardest part for me! What we have to do is construct a histogram of score vs total probability of making that score (regardless of how) on this board, eg 5% for 10pts 7% for 11pts etc. Actually there will be a broad range of scores with relatively low values on each, but that doesn't matter. However at the end of the day we're going to be looking for answers to questions such as 'what is the minimum score he is likely to get at the 90% confidence level' Our play will be determined by the level of risk we're willing to take. We may find that he has a 90% of scoring up to 12 pts, a 40% chance of scoring up to 25pts, and a 3% chance of scoring up to 50pts, and a 0.001% chance of scoring up to 150pts for this move. We chose the cutoff point we are comfortable with for this stage in the game, extract the appropriate score, and feed that into a classic minimax. Because in this testbed I have not yet added any scrabble scoring, I shall just assume each tile is worth 1 pt (ie use the length of the word formed as the score). This won't affect the concept, just the actual results returned. Currently the code does not pass along which tiles have been played - only the tiles left. I need to add that extra parameter to various procedures. Related to this - it does not currently realise it is limited to playing a maximum of 7 tiles. I've made a crude hack for testing but it has to be fixed for when there are less than 7 tiles left. Right now I'm modifying the read board procedure to remove played tiles from the rack. Because early game testing would otherwise print out every single 7-letter word in almost every position (and so on for shorter words too) and I haven't yet put in code to optimise the cases where slots are wide open, I'm going to start testing with a fairly full board which has closed down most of the options. Once that works I'll go back and fix the broad wildcard calls. */ #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <string.h> /* Only externals used here are my 'dawg' library, and even that is only for loading the dict file. However in a later iteration I may move the two dawg-based procedures here to the library */ #include "splib.h" #ifndef FALSE #define TRUE (0==0) #define FALSE (0!=0) #endif int spell_verbose = FALSE; static int debug = FALSE; /* There are too many literal constants in this code which should be replaced by symbolic constants; and a few large static strings which ought to come off the heap. */ #define FULLRACK "aaaaaaaaabbccddddeeeeeeeeeeeeffggghhiiiiiiiiijkllllmm\ nnnnnnooooooooppqrrrrrrssssttttttuuuuvvwwxyyz??" /* 1..15 for board, 0 and 16 for tombstones */ static char board[17][17]; static char apparent_letter[17][17]; static int lettermult[17][17]; static int wordmult[17][17]; static int tiles_held = 0; static char tiles[101]; /* Modified to allow full rack */ static char *crosschecks[17][17]; #define HORIZONTAL 1 #define VERTICAL 2 static int orientation = 0; static NODE *dawg; static INDEX nedges; /*--------------------- crosscheck code -------------------------*/ /* When placing a word horizontally, if it abuts to any tiles either above or below, we will have generated a wildcard string consisting of the tiles above, a "?", and the tiles below, from which we then find all the valid words allowed which match that wildcard string. From that we find which letters were valid at that position, and we return the set of valid letters to the scrabble code, so that the search for legal plays can be trimmed for speed. */ /* External interface procedure follows this internal procedure, which needs a lot of extra parameters that the interface isn't really interested in */ static int crosscheck_internal( NODE *dawg, INDEX i, char *word, char *res, char *set, int len, int *found ) { int endsword, last, ch, target; NODE node; INDEX link; static int wildch = '\0'; for (;;) { node = dawg[i++]; ch = (int)((node >> V_LETTER) & M_LETTER); last = ((node & (INDEX)M_END_OF_NODE) != 0); endsword = ((node & M_END_OF_WORD) != 0); link = node & M_NODE_POINTER; res[len] = ch; res[len+1] = '\0'; target = ((int)*word)&255; if (ch != 0) { if (ch == target || target == '?') { if (target == '?') { wildch = ch; } if (endsword && *(word+1) == '\0') { int i; (*found)++; /* Add ch to crosscheck set if not already there */ /* We just assume only 1 '?' per word, and last wildch was it */ if (strchr(set, wildch) == NULL) { i = strlen(set); set[i] = wildch; set[i+1] = '\0'; } } if (*(word+1) != '\0' && link != 0) (void) crosscheck_internal(dawg, link, word+1, res, set, len+1, found); } } if (last) break; } return(0==0); } /* This is a 'black box' procedure which can be replaced by anything which behaves the same even though implemented differently. In fact the internal code of this function is a 'quick hack' and probably should be replaced by something neater. INTERFACE: inputs: word list, wild-card string containing one "?" output: string of letters which the "? could represent or NULL if none found. (i.e this square is blocked) example: twl98, "c?t" -> "auo" (cat, cut and cot all matching the pattern) */ char *convert_wildcard_to_permitted_set(NODE *dawg, char *word) { char result[MAX_WORD_LEN]; static char set[MAX_WORD_LEN]; int i = 0; *set = '\0'; if (debug) fprintf(stderr, "wildcard: %s\n", word); (void)crosscheck_internal(dawg, (INDEX)ROOT_NODE, word, result, set, 0, &i); if (i == 0) return(NULL); if (debug) fprintf(stderr, " -> %s\n", set); return(set); } /*--------------------- placement code -------------------------*/ /* This finds words which can be played at this row,column position of the length requested. It does a wildcard match of a string such as "c[aiou]t[etrains]" which in this instance might return cats, cots, cuts. The search is pruned in two ways - the first, by which letters are allowed on certain squares, eg [aiou] may be a constraint from cross-check words (see above); and the second by the tiles in your rack (eg "etrains"). If you hold a blank, the wildcard might look like "c[aiou]t?" instead. The decision to do separate searches for different lengths of words was a deliberate one, to simplify the code. Some scrabble programs would prefer to do searches which looked more like "c[aiou]t*" allowing more letters to be added at the right. I chose not to do this for simplicity, and also because some other optimisations are possible when you are using fixed-length searches (although those have not yet been done). This procedure is also passed the tiles which we hold, because the wildcard itself does not describe only valid words. For instance, if we hold the tiles "act" then the wildcard might be "[act][act][act]" - but "tat" could not be played even though wildcard expansion listed it. By removing the tiles played at each point for a wild letter, we end up playing only "act" and "cat" here. One known design flaw: if we have the tiles "ob?" and want to pay the word "bob", then the fixed "b" will always be put down before the wild-card as a "b". This could be a mistake if the second "b" is on a triple-letter score, for example. */ /* External interface procedure follows this internal procedure, which needs a lot of extra parameters that the interface isn't really interested in */ static int fix_scrabble( NODE *dawg, INDEX i, char *word, char *res, char *tilesleft, int len, int *found, int L, int n ) { int endsword, last, ch, target; NODE node; INDEX link; for (;;) { node = dawg[i++]; ch = (int)((node >> V_LETTER) & M_LETTER); last = ((node & (INDEX)M_END_OF_NODE) != 0); endsword = ((node & M_END_OF_WORD) != 0); link = node & M_NODE_POINTER; res[len] = ch; res[len+1] = '\0'; target = *word; target &= 255; if (ch != 0) { if (ch == target) { /* matches a tile on the board already */ if (endsword && *(word+1) == '\0') { fprintf(stdout, "play: %s at %c%d %s\n", res, L, n, (orientation == HORIZONTAL ? "across" : "down")); (*found)++; } if (*(word+1) != '\0' && link != 0) { (void) fix_scrabble(dawg, link, word+1, res, tilesleft, len+1, found, L, n); } } else if (target == '?') { /* We matched a wildcard. If we have the correct letter, play it; otherwise play the blank */ if (endsword && *(word+1) == '\0') { char *s; int i; s = strchr(tilesleft, ch); if (s != NULL) { /* Do we have the actual tile? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; fprintf(stdout, "play: %s at %c%d %s\n", res, L, n, (orientation == HORIZONTAL ? "across" : "down")); (*found)++; } else { s = strchr(tilesleft, '?'); if (s != NULL) { /* If not, do we have a blank left to play? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; fprintf(stdout, "play: %s at %c%d %s\n", res, L, n, (orientation == HORIZONTAL ? "across" : "down")); (*found)++; } } } if (*(word+1) != '\0' && link != 0) { char *s; int i; s = strchr(tilesleft, ch); if (s != NULL) { /* Do we have the actual tile? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; (void) fix_scrabble(dawg, link, word+1, res, tilesleft+1, len+1, found, L, n); } else { s = strchr(tilesleft, '?'); if (s != NULL) { /* If not, do we have a blank left? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; (void) fix_scrabble(dawg, link, word+1, res, tilesleft+1, len+1, found, L, n); } } } } else if (target == '[') { /* Is this letter in our set of valid letters? */ char choices[8000]; char *s, *saved = word; strcpy(choices, word+1); s = strchr(choices, ']'); /* We assume well-formed expressions */ *s = '\0'; word = strchr(word, ']'); if (strchr(choices, ch) != NULL) { if (endsword && *(word+1) == '\0') { char *s; int i; s = strchr(tilesleft, ch); if (s != NULL) { /* Do we have the actual tile? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; fprintf(stdout, "play: %s at %c%d %s\n", res, L, n, (orientation == HORIZONTAL ? "across" : "down")); (*found)++; } else { s = strchr(tilesleft, '?'); if (s != NULL) { /* If not, do we have a blank left? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; fprintf(stdout, "play: %s at %c%d %s\n", res, L, n, (orientation == HORIZONTAL ? "across" : "down")); (*found)++; } } } if (*(word+1) != '\0' && link != 0) { char *s; int i; s = strchr(tilesleft, ch); if (s != NULL) { /* Do we have the actual tile? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; (void) fix_scrabble(dawg, link, word+1, res, tilesleft+1, len+1, found, L, n); } else { s = strchr(tilesleft, '?'); if (s != NULL) { /* If not, do we have a blank left? */ i = *s; *s = tilesleft[0]; tilesleft[0] = i; (void) fix_scrabble(dawg, link, word+1, res, tilesleft+1, len+1, found, L, n); } } } } word = saved; } } if (last) break; } return(0==0); } /* This is a 'black box' procedure which can be replaced by anything which behaves the same even though implemented differently. In fact the internal code of this function (above) is a 'quick hack' and probably should be replaced by something neater. INTERFACE: inputs: word list, wild-card, rack, row & column outputs: should be list of words to play. (Currently the output is printed, not returned) The wild card can contain only fixed letters, [abcd...] (sets of letters), or "?" (single-character wild-card). There are no multi- character wild-cards (eg "*") - the word length is fixed. IMPORTANT NOTE: "cat?" is not the same as "c[a]t?" - the former expects ONE tile to be placed, the latter exects TWO. I.e. simple letters represent tiles already placed on the board. A cleaned-up interface would not need L/n (letter/number, ie row/col in format such as "H7 across" or "G5 down". Also note 'orientation' is wrongly being passed around as a global, not as a parameter) because this procedure should just return the list of words, and doesn't need to know where they are placed if it is not doing the placement itself. */ int scrabble_match(NODE *dawg, char *word, char *tiles, int L, int n) { char result[MAX_WORD_LEN]; int i = 0; (void)fix_scrabble(dawg, (INDEX)ROOT_NODE, word, result, tiles, 0, &i, L, n); return(i); } /*--------------------------------------------------------------*/ /* This reads a board in the simple file format which Kevin Cowtan uses in his board-graphic image generator software. I just happened to have some cgi web page software which generates this format so I plan to reuse it to create a graphical front-end to solving scrabble problems. */ void read_board(char *fname) { int row, col; int c; int score, tiles_left_in_bag; int rackletter; char *s; FILE *sample; sample = fopen(fname, "r"); if (sample == NULL) { fprintf(stderr, "Cannot open board file '%s'\n", fname); exit(0); } for (row = 1; row < 16; row++) { if (debug) fprintf(stderr, "Row %02d: ", row); for (col = 1; col < 16; col++) { c = fgetc(sample); if (isalpha(c)) { /* Take care with locale for other language versions */ board[row][col] = c; apparent_letter[row][col] = tolower(c); if (isupper(c)) { rackletter = tolower(c); } else { /* Currently not including blanks in the calculation */ rackletter = '?'; } s = strchr(tiles, rackletter); if (s == NULL) { fprintf(stderr, "Error: we appear to have too many %c's\n", rackletter); fprintf(stderr, " %s\n", FULLRACK); fprintf(stderr, " %s\n", tiles); exit(0); } memmove(s, s+1, strlen(s)); if (debug) fprintf(stderr, "%c", tolower(c)); } else if (debug) fprintf(stderr, " "); } c = fgetc(sample); /* newline */ if (c != '\n') { fprintf(stderr, "Data format in .dat file\n"); exit(0); } if (debug) fprintf(stderr, "\n"); } tiles_held = strlen(tiles); fprintf(stderr, "%0d Tiles: %s\n", tiles_held, tiles); } /*--------------------------------------------------------------*/ /* This looks at the place where a tile is likely to be played, and whether there are tiles abutting above or below it, which would permit or deny a word to be placed there. This generates a wild-card string which is then expanded in the code above to generate the set of letters which are valid here. It is that set which is stored, not the wild card string or the words themselves. */ char *create_crosscheck_wildcard(int r, int c) { char crosscheck[17]; char *s; int rp; /* Already a tile here so crosscheck is meaningless */ if (board[r][c] != 0) return(NULL); /* none above and none below? */ if ((board[r-1][c] == 0) && (board[r+1][c] == 0)) return(NULL); // this code looks quite different from that in scrabble (findmoves) // where I fixed a bug about blanks not being treated as a single fixed // character once played. Do i need to retrofit this fix?: //if (*s == '?') *s = board[rp][c]; // FIX FOR BUG IN "wildcard:" // (both above and below) /* what's above? */ rp = r-1; while (board[rp][c] != 0) rp -= 1; rp += 1; /* row of 1st letter in crosscheck word */ s = crosscheck; while (rp != r) *s++ = apparent_letter[rp++][c]; *s++ = '?'; /* r */ /* what's below? */ rp = r+1; while (board[rp][c] != 0) { *s++ = apparent_letter[rp][c]; rp += 1; } *s = '\0'; return(strdup(crosscheck)); } /*--------------------------------------------------------------*/ /* Can we slot a word into the board at this row,col and length(*)? If so, then the very last thing we do before returning is to actually generate the words. This is not really good design - we should pass the wildcard which this generates up a level and have that level do the search. This procedure generates a wild-card string using fixed letters, "[...]" letter sets, and wildcards ("?") which when expanded returns valid words which can be played here. *: Note 'length' here doesn't mean word length. It means number of tiles to be placed. The word may be longer because of tiles already on the board that we are placing around. */ int slot(int r, int c, int l) { int i, pp = 0, p = c; int touches_horiz = FALSE; int touches_vert = FALSE; int touches_center = FALSE; int valid; char pattern[8000]; if ((board[r][c-1] != 0) && (c != 1)) return(FALSE); /* Covered with an earlier hook; (special case for 1st col) */ for (i = 0; i < l; ) { /* Ran off end of board with tiles still to place? */ if (p == 16) return(FALSE); if (board[r][p] == 0) { /* square is free */ /* If center square is empty this must be the first move */ if ((r == 8) && (p == 8)) touches_center = TRUE; if (board[r][p-1] != 0 || board[r][p+1] != 0) touches_horiz = TRUE; if (board[r-1][p] != 0 || board[r+1][p] != 0) touches_vert = TRUE; if (crosschecks[r][p] != NULL) { /* If no valid crosscheck at all, then reject this slot */ if (*crosschecks[r][p] == '!') return(FALSE); pattern[pp] = '\0'; if ((strcmp(crosschecks[r][p], "?") == 0) && (strchr(tiles, '?') == NULL)) { /* we don't hold a blank, so do a reduction-in-strength of a wild-card "?" to [abcdefg] (i.e. only the tiles we hold) */ sprintf(pattern+pp, "[%s]", tiles); } else if (strcmp(crosschecks[r][p], "?") == 0) { /* we do hold a blank, so allow anything */ sprintf(pattern+pp, "%s", crosschecks[r][p]); } else { /* This letter is constrained by cross-checks */ sprintf(pattern+pp, "[%s]", crosschecks[r][p]); } pp = strlen(pattern); } else { /* No crosscheck neighbours */ if (strchr(tiles, '?') != NULL) { /* We hold a blank so it could be anything */ sprintf(pattern+pp, "?"); } else { /* restrict search space when possible, ie only those letters for which we hold tiles */ sprintf(pattern+pp, "[%s]", tiles); } pp = strlen(pattern); } i += 1; /* We have placed another letter */ } else { /* There is already a tile on the board here. */ pattern[pp++] = apparent_letter[r][p]; pattern[pp] = '\0'; /* don't increment 'i' because we haven't placed a tile */ } p += 1; /* next column */ } /* after placing all tiles, are there still some letters abutting to the right? If so, add them to the string to be matched */ while (board[r][p] != 0) pattern[pp++] = apparent_letter[r][p++]; pattern[pp] = '\0'; /* If this is the first move and it is placed on the center square, or it touches another tile, it is a valid play - but if not, you're putting tiles down in the middle of space! Note, if a horizontal play of one single tile does not touch any abutting horizontal tiles, but does touch a vertical tile, then it is really a vertical play, and is inhibited here so that it will show up correctly when played in the other orientation. Also you cannot play a single tile on the center as the first move, though this test is in fact redundant because there should be no single-letter words in the dictionary */ valid = (touches_horiz || (touches_vert && (l > 1)) || (touches_center && (l > 1))); /* Find valid plays, with a search trimmed by the wildcard generation above. Currently they are printed out. */ if (valid) scrabble_match(dawg, pattern, tiles, (orientation == HORIZONTAL ? c : r)-1+'A', (orientation == HORIZONTAL ? r : c)); return(valid); } int main(int argc, char **argv) { int row, col; int length; char *dict, *boardfile; if (argc != 3) { if (argc == 2) { dict = "twl98"; } else { fprintf(stderr, "syntax: %s board.dat dict?\n", argv[0]); exit(1); } } else dict = argv[2]; /* Default word list is TWL98 */ boardfile = argv[1]; if (!dawg_init(dict, &dawg, &nedges)) { fprintf(stderr, "%s: cannot open wordlist '%s'\n", argv[0], dict); exit(2); } /* Clear the arrays */ for (row = 0; row < 17; row++) { for (col = 0; col < 17; col++) { board[row][col] = 0; apparent_letter[row][col] = 0; lettermult[row][col] = 1; wordmult[row][col] = 1; crosschecks[row][col] = NULL; } } /* Supplying the full rack is an excssive approximation. We should really subtract all played tiles from 'fullrack' and use the truly remaining tiles. */ strcpy(tiles, FULLRACK); /* hand-coded random board. This could be replaced by a more complex format which includes tile distributions, values, language etc */ read_board(boardfile); /* tiles[] is unknown. We replace the normal value of tiles[] with the contents of the bag, which is known. */ for (orientation = HORIZONTAL; orientation <= VERTICAL; orientation++) { /* We assume throughout that we are examining only horizontal plays, and just flip the board to handle vertical plays. We flip it back at the end, in case we want to update it with a play and write it back to file. (but we don't do that yet) */ for (row = 1; row < 16; row++) { /* calculate crosschecks for this row before we look at placements for this row. We could actually do this for the whole board first, but it isn't necessary */ for (col = 1; col < 16; col++) { char *s; /* Clean up from previous tries */ if (crosschecks[row][col] != NULL) free(crosschecks[row][col]); /* first of all get wildcard pattern, eg "w?rd" : */ crosschecks[row][col] = create_crosscheck_wildcard(row, col); /* Then convert it to a set of valid letters */ if (crosschecks[row][col] == NULL) { /* crosschecks don't make sense here */ crosschecks[row][col]= strdup("?"); } else { s = convert_wildcard_to_permitted_set(dawg, crosschecks[row][col]); free(crosschecks[row][col]); if (s == NULL) { /* No letter can currently be placed in this square legally */ crosschecks[row][col] = strdup("!"); /* "!" is easier to debug than empty string */ } else { crosschecks[row][col] = strdup(s); } } } /* Now do placements: try every square on this row as position for first tile to be placed down. Rest follow automatically */ for (col = 1; col < 16; col++) { /* if (debug) fprintf(stderr, "Testing row %d col %d\n", row, col); */ for (length = 1; length <= 7/*tiles_held*/; length++) { /* Can I legally place this many tiles onto the board here? */ if (slot(row, col, length)) { /* would be nice to pass the wildcard string back to here and then expand it/search for plays, and make the plays separately. The hack above was for convenience only. */ /* record row,col,length and wildcard for move generator (maybe) */ } } } } /* Try every row on the board */ /* flip the board around the x=y diagonal */ for (row = 1; row < 16; row++) { for (col = 1; col < 16; col++) { int i; if (row > col) { /* avoid doing twice (no-op) */ /* swap board and apparent_letter at row,col */ i = board[row][col]; board[row][col] = board[col][row]; board[col][row] = i; i = apparent_letter[row][col]; apparent_letter[row][col] = apparent_letter[col][row]; apparent_letter[col][row] = i; } } } } /* end of horiz/vert placement loop */ /* If want to make best play on board and save it, do it here */ exit(0); }