/* POTM ENTRY:     SnakePit
 * Name:           Tim Ruhl
 * Email:          tim@cs.vu.nl
 * Compilation:    Should work with cc and gcc.
 */

#define NDEBUG

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <math.h>

                                /* Constants */
#define NR_TIMEOUT 1
#define TIMEOUT    590

#define POP_SIZE   200
#define MUTATE     350

#define INIT_WORDS 5

                                /* Types */

typedef struct board {
    char fields[25];
} board_t, *board_p;

typedef struct member {         /* Member of the genetic population */
    board_t b;
    int     score;
} member_t, *member_p;

                                /* Constant variables */

static int moves[8] = {1, -1, 5, -5, 6, 4, -6, -4};
static int nb[25][8];
static int nrnb[25];
                                /* Variables */

static char *progname;
static int timeout = TIMEOUT;
static int nr_timeout = NR_TIMEOUT;
static int running;
static int verbose;
static int give_score;
static int dump_eval;

/* Word list data structures */
static char words[1000][26];
static int  wordlen[1000];
static int  wordcnt[1000];
static int  next[1000][25][26];
static int  nr_words;
static int  freq[26];
static int  sum_freq;


/* Word finder */
static int word_used[1000];
static int used[49];
static int letters, count;
static int seqno;
static int print_words;

/* Genetic population */
static member_t pop[2][POP_SIZE];
static int curr;
static member_t best;

                                /* Macros */
#define ABS(x) ((x) < 0 ? -(x) : (x))


                                /* Functions  */

static void
usage(void)
{
    fprintf(stderr, "Usage: %s "
            "[-r <seed>] "
            "[-t <timeout>] "
            "[-T <nr timeouts> ]"
            "[-vdse] "
            "<wordlist file>\n", progname);
    exit(1);
}

/* handler:
 *                 Handle SIGALRM. Set running to zero which causes the
 *                 computation to terminate.
 */
static void
handler(int s)
{
    running = 0;
}


/* init_neighbors:
 *                 Precompute the neighbor list for all fields.
 */
static void
init_neighbors(void)
{
    int row1, col1, row2, col2;
    int i, j, p;

    for(i = 0; i < 25; i++) {
        row1 = i / 5;
        col1 = i % 5;

        for(j = 0; j < 8; j++) {
            p = i + moves[j];
            if (p < 0 || p >= 25) continue;

            row2 = p / 5;
            col2 = p % 5;
        
            if (ABS(row1 - row2) <= 1 && ABS(col1 - col2) <= 1) {
                nb[i][nrnb[i]++] = p;
            }
        }
    }
}



static void
read_words(char *file)
{
    FILE *fp;

    if ((fp = fopen(file, "r")) == NULL) {
        fprintf(stderr, "Cannot open word file %s\n", file);
        exit(1);
    }

    nr_words = 0;
    while(fscanf(fp, "%s", words[nr_words]) == 1) {
        nr_words++;
    }

    fclose(fp);
}

static void
compute_wordlen(void)
{
    int i;

    for(i = 0; i < nr_words; i++) {
        wordlen[i] = strlen(words[i]);
    }
}


/* compute_freq:
 *                 Get the number of occurences of each letter in the
 *                 wordlist. Used to select random letters.
 */
static void
compute_freq(void)
{
    int i, j;

    for(i = 0; i < nr_words; i++) {
        for(j = 0; j < wordlen[i]; j++) {
            freq[words[i][j] - 'A']++;
        }
        sum_freq += wordlen[i];
    }
}

static void
sort_words(void)
{
    char buf[26];
    int i, j;
    int t;

    for(i = 0; i < nr_words; i++) {
        for(j = i + 1; j < nr_words; j++) {
            if (strcmp(words[i], words[j]) > 0) {
                strcpy(buf, words[i]);
                strcpy(words[i], words[j]);
                strcpy(words[j], buf);

                t = wordlen[i];
                wordlen[i] = wordlen[j];
                wordlen[j] = t;
            }
        }
    }
}

/* filter_duplicates:
 *                 Filter duplicate words. According to the FAQ, this is
 *                 not necessary, but I never removed it.
 */
static void
filter_duplicates(void)
{
    int i, j;

    j = 0;
    wordcnt[0] = 1;

    for(i = 1; i < nr_words; i++) {
        if (strcmp(words[i], words[j]) == 0) {
            wordcnt[j]++;
            continue;
        }
        
        j++;
        strcpy(words[j], words[i]);
        wordcnt[j] = 1;
        wordlen[j] = wordlen[i];
    }

    nr_words = j + 1;
}


/* compute_next:
 *                 For each word and each prefix length, compute the
 *                 index of the first word that has the same prefix plus
 *                 a letter.
 */
static void
compute_next(void)
{
    int i, j, k, l;
    int high;

    for(i = 0; i < nr_words; i++) {
        for(j = 0; j <= wordlen[i]; j++) {
            for(k = i + 1; k < nr_words; k++) {
                if (strncmp(words[i], words[k], j) != 0) break;
            }
            high = k;
                
            for(k = 0; k < 26; k++) {
                next[i][j][k] = -1;

                for(l = i; l < high; l++) {
                    if (words[l][j] == 'A' + k) {
                        next[i][j][k] = l;
                        break;
                    }
                }
            }
        }
    }
}

/* random_letter:
 *                 Generate a random letter. The selection uses the
 *                 frequency counts of the letters to select letters that
 *                 are likely to occur.
 */
static char
random_letter(void)
{
    int s, t;
    int i;

    s = 1 + (rand() % sum_freq);

    t = 0;
    for(i = 0; i < 26; i++) {
        t += freq[i];
        if (t >= s) {
            assert(freq[i]);
            return 'A' + i;
        }
    }

    assert(0);
    return 'A';
}


/* impose_word:
 *                 Tries to place a word on the given board. It does not
 *                 deal with overwriting its own letters.
 */
static void
impose_word(board_p b, int p, int w, int offset)
{
    int s;

    if (offset == wordlen[w]) return;

    b->fields[p] = words[w][offset];

    s = rand() % nrnb[p];
    impose_word(b, nb[p][s], w, offset + 1);
}

/* random_word:
 *                 Select a random word and place in on the board.
 *
 */
static void
random_word(board_p b)
{
    int w, p;

    w = rand() % nr_words;
    p = rand() % 25;

    impose_word(b, p, w, 0);
}

/* random_board:
 *                 Generate a random solution by placing random letters
 *                 on the board. After that, also some random words are
 *                 placed.
 */
static void
random_board(board_p b)
{
    int i;

    for(i = 0; i < 25; i++) {
        b->fields[i] = random_letter();
    }

    for(i = 0; i < INIT_WORDS; i++) {
        random_word(b);
    }
}

static void
print_board(board_p b)
{
    int i;

    for(i = 0; i < 25; i++) {
        putchar(b->fields[i]);
        if ((i + 1) % 5 == 0) putchar('\n');
    }
}

/* walk_path:
 *                 Try to find a path on the board that contains a word
 *                 from the wordlist. low specifies the first word in the
 *                 sorted wordlist that has a prefix of length len that
 *                 contains the letters on the current path. Avoid
 *                 duplicate words by assigning a sequence number to the
 *                 words_used table. If the table entry for a word is
 *                 equal to the current sequence number, the word has
 *                 already been used.
 */
static void
walk_path(board_p b, int p, int low, int len)
{
    int new;
    int i;

    /* Check whether there are any words with the current prefix. */
    low = next[low][len][b->fields[p] - 'A'];
    if (low == -1) return;

    if (wordlen[low] == len + 1 && word_used[low] != seqno) {
        word_used[low] = seqno;

        if (print_words) {
            for(i = 0; i < wordcnt[low]; i++) {
                printf("%s\n", words[low]);
            }
        }

        letters += (len + 1) * wordcnt[low];
        count += wordcnt[low];
    }
        

    for(i = 0; i < nrnb[p]; i++) {
        new = nb[p][i];
        if (used[new]) continue;

        used[new] = 1;

        walk_path(b, new, low, len + 1);

        used[new] = 0;
    }
}

/* walk_board:
 *                 Count the number of words and letters on the given
 *                 board. The sequence number is used to detect
 *                 duplicates.
 */
static void
walk_board(board_p b)
{
    int i;

    letters = count = 0;
    seqno++;
    for(i = 0; i < 25; i++) {
        used[i] = 1;
        walk_path(b, i, 0, 0);
        used[i] = 0;
    }
}

static void
print_member(member_p m)
{
    if (verbose) printf("################\n");
    print_board(&m->b);
    if (verbose) printf("################\n");
    print_words = 1;
    walk_board(&m->b);
    print_words = 0;
    if (verbose || give_score) {
        printf("Score: %d letters with %d words\n", letters, count);
    }
    if (verbose) printf("\n");
}


/* eval_pop:
 *                 Dump statistics for the current population.
 */
static void
eval_pop(int p)
{
    int sum, sumsq, avg, var, std;
    int count[20];
    int max;
    int i;

    sum = 0; sumsq = 0;

    max = 0;
    for(i = 0; i < POP_SIZE; i++) {
        sum += pop[p][i].score;
        sumsq += pop[p][i].score * pop[p][i].score;
        if (pop[p][i].score > max) max = pop[p][i].score;
    }

    for(i = 0; i < 20; i++) count[i] = 0;

    for(i = 0; i < POP_SIZE; i++) {
        count[(pop[p][i].score * 20 - 1) / max]++;
    }

    printf("Population score histogram (max %d; population size %d):\n",
           max, POP_SIZE);
    for(i = 0; i < 20; i++) {
        printf("%4d", count[i]);
    }
    printf("\n");

    avg = sum / POP_SIZE;
    var = sumsq / POP_SIZE - (avg * avg);
    std = sqrt(var) * 100 / avg;

    printf("Average: %d variance: %d normalized standard deviation: %d\n",
           avg, var, std);
}

/* init_pop:
 *                 Generate the initial population, consisting of
 *                 POP_SIZE members.
 */
static void
init_pop(void)
{
    board_p b;
    int i;

    for(i = 0; i < POP_SIZE; i++) {
        b = &pop[0][i].b;

        random_board(b);
        walk_board(b);

        /* Add one bonus point to be able to randomly select members */
        pop[0][i].score = 1 + letters * 10 - count;
        if (pop[0][i].score > best.score) {
            best = pop[0][i];
        }
    }
}

/* find_random:
 *                 Find a weighted random member of the population. Array
 *                 sum contains the sum of the score of the current
 *                 member and all members before it. Using this
 *                 accumulative array, a binary search is applied that
 *                 finds a member.
 */
static int
find_random(int sum[POP_SIZE + 1], int total)
{
    int low, high, middle;
    int s;

    s = 1 + (rand() % total);

    low = 0;
    high = POP_SIZE + 1;
    middle = POP_SIZE / 2;

    while(sum[middle] < s || sum[middle - 1] > s) {
        if (sum[middle] < s) {
            low = middle;
        } else {
            high = middle;
        }

        middle = low + (high - low) / 2;
        
        assert(middle > 0 && middle <= POP_SIZE);
        assert(middle >= low);
        assert(middle <= high);
        assert(low <= high);
    }

    middle--;

    assert(middle >= 0 && middle < POP_SIZE);
    return middle;
}
        

/* new_member:
 *                 Generate a new member for the next population. First
 *                 two (different) members of the old population are
 *                 selected. The letters of the new member are selected
 *                 from one of the two old members, weighted by their
 *                 score. After that, the mutation factor decides whether
 *                 a new word should be added to the solution. I think
 *                 a mutation parameter of 35% is much too high to call
 *                 it a genetic algorithm, but this was the best value I
 *                 found for my testcase.
 */
static void
new_member(int p, member_p new, int sum[], int total)
{
    board_p b, b1, b2;
    member_p m1, m2;
    int score;
    int i;

    /* All scores are greater than 0 */
    assert(total > 0);

    m1 = &pop[p][find_random(sum, total)];
    do {
        m2 = &pop[p][find_random(sum, total)];
    }while(m1 == m2);

    b = &new->b;
    b1 = &m1->b;
    b2 = &m2->b;

    score = m1->score + m2->score;

    for(i = 0; i < 25; i++) {
        if ((rand() % score) < m1->score) {
            b->fields[i] = b1->fields[i];
        } else {
            b->fields[i] = b2->fields[i];
        }
    }

    if ((rand() % 1000) < MUTATE) {
        random_word(b);
    }

    walk_board(b);

    /* Add one bonus point to randomly select members */
    new->score = 1 + letters * 10 - count;

    if (new->score > best.score) {
        best = *new;
        if (verbose) print_member(&best);
    }
}

/* next_pop:
 *                 Based on the current population, compute the
 *                 next. First precompute the prefix sums of the
 *                 scores. Also put the best solution found so for in the
 *                 next population.
 */
static void
next_pop(void)
{
    int sum[POP_SIZE + 1];
    int total;
    int next;
    int i;

    total = 0;
    sum[0] = 0;
    for(i = 0; i < POP_SIZE; i++) {
        total += pop[curr][i].score;
        sum[i + 1] = sum[i] + pop[curr][i].score;
    }

    next = 1 - curr;

    pop[next][0] = best;
    for(i = 1; i < POP_SIZE; i++) {
        new_member(curr, &pop[next][i], sum, total);
    }

    curr = next;
}

int
main(int argc, char **argv)
{
    int dump_pop = 0;
    int i;

    progname = argv[0];

    argv++; argc--;

    while(argc && argv[0][0] == '-') {
        switch(argv[0][1]) {
        case 'r':
            if (argc < 2) usage();
            srand(atoi(argv[1]));
            argv++; argc--;
            break;
        case 't':
            if (argc < 2) usage();
            timeout = atoi(argv[1]);
            argv++; argc--;
            break;
        case 'T':
            if (argc < 2) usage();
            nr_timeout = atoi(argv[1]);
            argv++; argc--;
            break;
        case 'd':
            dump_pop = 1 - dump_pop;
            break;
        case 'v':
            verbose = 1 - verbose;
            break;
        case 's':
            give_score = 1 - give_score;
            break;
        case 'e':
            dump_eval = 1 - dump_eval;
            break;
        default:
            usage();
        }

        argv++; argc--;
    }

    if (argc != 1) {
        usage();
    }

    init_neighbors();

    read_words(argv[0]);
    compute_wordlen();
    compute_freq();
    sort_words();
    filter_duplicates();
    compute_next();

    init_pop();
    if (verbose) print_member(&best);
    if (dump_eval) eval_pop(0);

    /* Generate statistics more than once. */
    while(nr_timeout--) {
        running = 1;
        signal(SIGALRM, handler);
        alarm(timeout);

        while(running) {
            next_pop();
        }

        if (dump_eval) eval_pop(curr);
    }

    if (dump_eval) eval_pop(curr);

    print_member(&best);

    if (dump_pop) {
        printf("++++++++++++++++++++ Population dump +++++++++++++++++++\n");
        for(i = 0; i < POP_SIZE; i++) {
            printf("Member %d\n", i);
            print_member(&pop[curr][i]);
        }
    }

    return 0;
}


/*
Local Variables:
compile-command: "gcc -g -ansi -Wall -pedantic -o b1 b1.c -lm"
End:
*/


