kragen-hacks

<-- Thread -->
word squares

From: Kragen Sitaker
Subject: word squares
Date: Sun, 27 Jan 2002 00:22:58 -0800
Solution of "word squares" was a popular amusement among word freaks
in the 1800s.  A word square is a square grid of letters that reads
the same horizontally and vertically; for example, 

B e a
e a r
a r c

Here's a program to produce word squares, given an initial seed word
or two.  It runs under Python 1.5.2 and 2.1.1 on my machine, and it
needs the bsddb module, which you'll probably have if your Python runs
on Linux.

I ran it on all the 45404 words from /usr/share/dict/words on my
system, which took 3 hours and found 5797 word squares (12.77% of the
words could start word squares).  This means that it took, on average,
about a quarter of a second to try a word, and about two seconds to
find a word square.  The largest ones were for two seven-letter words,
"prepare" and "mergers":

kragen@detached:devel/wordsquares2$ cat mergers.wsq 
m e r g e r s
e t e r n a l
r e g a t t a
g r a v i t y
e n t i t l e
r a t t l e r
s l a y e r s
kragen@detached:devel/wordsquares2$ cat prepare.wsq 
p r e p a r e
r e m o d e l
e m u l a t e
p o l e m i c
a d a m a n t
r e t i n u e
e l e c t e d

There were 40 two-letter words, 509 three-letter words, 1999
four-letter words, 2760 five-letter words, 487 six-letter words, and
the above two seven-letter words that formed word squares.

#!/usr/bin/python
# known bugs:
# only produces the first word square.
# error messages kind of suck.

import bsddb, os, string, stat, errno, sys

def startswith(seq, prefix):
    """Returns true if seq starts with prefix.

    Like string.startswith in Python 2.x."""
    return seq[:len(prefix)] == prefix

def possiblewords(wordlist, s, length):
    """Return a list of words that start with 's' and have length 'length'.

    Gets the words from wordlist, which can be a bsddb object or
    something supporting a similar protocol.  Its keys are of the form
    '%d %s' % (len(word), word).

    """
    s = '%d %s' % (length, s)
    # 'agonizing' used to provoke a KeyError here --- set_location
    # apparently raises KeyError for '9 zn', at least with my
    # /usr/share/dict/words.  Presumably that's because it's after the
    # last item in the db file.
    try: key, value = wordlist.set_location(s)
    except KeyError: return []
    rv = []
    while 1:
        if not startswith(key, s): return rv
        spindex = string.index(key, ' ')
        rv.append(key[spindex+1:])
        key, value = wordlist.next()

def possiblewordsexist(wordlist, s, length):
    "Return true if there are words starting with 's' of length 'length'."
    s = '%d %s' % (length, s)
    try: key, value = wordlist.set_location(s)
    except KeyError: return 0
    return startswith(key, s)

def wordsquare(wordlist, words):
    assert len(words) > 0
    for word in words: assert len(word) == len(words[0])

    # before this check, 'rage dbed' produced a "word square"
    # even though "d" isn't the second letter of "rage"
    for ii in range(len(words)):
        for jj in range(len(words)):
            assert words[ii][jj] == words[jj][ii]
    return compute_wordsquare(wordlist, words)

def compute_wordsquare(wordlist, words):
    size = len(words[0])
    if len(words) == size: return words
    prefixes = []
    for index in range(len(words), size):
        prefix = (string.join(map(lambda string, index=index:
                                  string[index],
                                  words), ''))
        if not possiblewordsexist(wordlist, prefix, size):
            # prune early; this makes this program several times faster
            # especially for impossible word squares
            return None
        prefixes.append(prefix)
    candidates = possiblewords(wordlist, prefixes[0], size)
    for word in candidates:
        rv = compute_wordsquare(wordlist, words + [word])
        if rv is not None: return rv

def filenametrans(pathname):
    "Turn a pathname into a filename in the current dir."
    filename = string.replace(pathname, os.sep, '_')
    while filename[0] == '_': filename = filename[1:]
    return filename

def mtime(filename):
    return os.stat(filename)[stat.ST_MTIME]

def uptodate(sourcefile, dependentfile):
    sourcefilemtime = mtime(sourcefile)
    try: dependentfilemtime = mtime(dependentfile)
    except OSError, val:
        code, str = val
        if code == errno.ENOENT:
            return 0  # nonexistent, so not up to date
        else:
            raise  # some other error
    return sourcefilemtime < dependentfilemtime

def worddb(sourcefile="/usr/share/dict/words"):
    dbfile = filenametrans(sourcefile) + '.db'
    mustupdate = not uptodate(sourcefile, dbfile)
    dbfileobj = bsddb.btopen(dbfile, 'c')
    if mustupdate:
        try:
            sourcefileobj = open(sourcefile)
            # doesn't exist: dbfileobj.clear()
            for word in dbfileobj.keys(): del dbfileobj[word]
            for word in sourcefileobj.xreadlines():
                while word[-1] in '\r\n': word = word[:-1]
                dbfileobj['%d %s' % (len(word), word)] = '1'
            sourcefileobj.close()
            dbfileobj.sync()
        except:
            # if there's an exception, maybe we fucked up the file
            # so blow it away
            os.unlink(dbfile)
            raise
    return dbfileobj

def printwordsquare(words):
    if words is None: print "No word square"
    else:
        for word in words:
            for char in word:
                print char,
            print

def main(words):
    wordlist = worddb()
    try: square = wordsquare(wordlist, words)
    except AssertionError:
        print "Must specify at least one word; all words must be same length."
        sys.exit(2)
    printwordsquare(square)
    if square is not None: return 0
    return 1

if __name__ == "__main__": sys.exit(main(sys.argv[1:]))
word squares, Kragen Sitaker
<-- Chronological -->
<-- Thread -->