
/* for now, edit by hand */

/*
> TODAY'S AUTHOR: XDRZW MSCQBX
>
> 'R DIJB NC, KYBTW,' JYRV TXYBTKZRUU, JXXFRQN BC XGBSRKYBX TRDJXZU USCD
> TRJ KCDHYQRCQ'J YSDJ. 'MIB RU R ZRLX, R'ZZ JXX WCI YNYRQ MXUCSX WCI YSX
> YJZXXH.  R ECQ'B JBSYW URLX WYSVJ USCDWCIS ERQVCE.'
> 'WCI DIJB QCB NC!'  JTX YQJEXSXV, TCZVRQN TRD YJ URSD YJ TXS JBSXQNTB
> YZZCEXV, 'WCI JTYZZ QCB, R BXZZ WCI.'

'I MUST GO, CATHY,' SAID HEATHCLIFF, SEEKING TO EXTRICATE HIMSELF FROM
HIS COMPANION'S ARMS. 'BUT IF I LIVE, I'LL SEE YOU AGAIN BEFORE YOU ARE
ASLEEP. I WON'T STRAY FIVE YARDS FROMYOUR WINDOW.'
'YOU MUST NOT GO!' SHE ANSWERED, HOLDING HIM AS FIRM AS HER STRENGHT
ALLOWED, 'YOU SHALL NOT, I TELL YOU.'

*/

/*

"hjeznxj", "jzedghf", "lzwcgwf", "brcf", "fprbb", "bkog", "reg",

"azdrhjrd fckyh"
"'brcf reg bkog lzwcgwf cjklj pru lrhlj fprbb",
"                                       ybkgf,",
"     wnh bgh crfvf rdi jzedghf wegro hjeznxj.' ",

*/

/*"uscdwcis",*/
/*
"xgbsrkybx",
"kcdhyqrcq",
"jxxfrqn",
"mxucsx",
"trdjxzu",
"tczvrqn",
"trd",
"bxzz",
"dijb",
"erqvce",
"jtx",
"jtyzz",
"jxx",
"jyrv",
"mib",
"bc",
"nc",
"qcb",
"ru",
"trj",
"txs",
"urlx",
"ursd",
"uscd",
"wci",
"wysvj",
"yj",
"yjzxxh",
"ynyrq",
"yqjexsxv",
"ysdj",
"ysx",
"yzzcexv",
"zrlx",


"xdrzw", "mscqbx",
"'r dijb nc, kybtw,' jyrv txybtkzruu, jxxfrqn bc xgbsrkybx trdjxzu uscd",
"trj kcdhyqrcq'j ysdj. 'mib ru r zrlx, r'zz jxx wci ynyrq mxucsx wci ysx",
"yjzxxh.  r ecq'b jbsyw urlx wysvj uscdwcis erqvce.'",
"'wci dijb qcb nc!'  jtx yqjexsxv, tczvrqn trd yj ursd yj txs jbsxqntb",
"yzzcexv, 'wci jtyzz qcb, r bxzz wci.'",

*/




/*
"rddl", "kesar", "mt", "hzspd", "utne", "utn", "zdsl",

./crypto r=N t=O m=T u=Y n=U e=R q=M a=I k=B s=A h=S z=H l=D d=E o=L p=V y=G

"ltr'm", "othd", "utne", "zdsl",
"mt", "ysar", "s", "qarnmd.",
"utn", "*rddl*", "utne", "zdsl.",
"utne", "kesar", "ah", "ar", "am.",
"kneqs", "hzspd",
 */

/*
"ugrkzqwa", "lngeatmon", "lngeao", "eqlnse",
"tsqeo", "rqecsz", "egoso", "agl", "fg",

./crypto l=T n=H s=E o=S f=D e=R c=V t=B

"fg", "agl", "ugrkzqwa", "lnql", "lns", 
"egos", "nqo", "lngeao.", "eqlnse", 
"rqecsz", "lnql", "lns", 
"lngeatmon", "tsqeo", "egoso.",
*/

/*
  "efcr", "cr", "l", "njyy", "xmakeji.", "nmwor",
  "flqy", "l", "jcsfe", "em", "rykb", "uy", "wyeeyjr,",
  "lkb", "c", "flqy", "l", "jcsfe", "kme", "em", "jylb",
  "efyu.",
    "- zcwwclu", "nlawokyj",
*/

/*

     cryptogram.c - take 1:1 alphabet substitution cypher puzzles
       from magazines and decrypt using dawg-based word list.

     my hack at a genetic algorithm was crap.  Next suggestion is
     to do a more traditional tree search: pick a word at random,
     take each word from the lexicon that could be a possible
     decryption, substitute it, and then explore all the ramifactions
     that the substitution leads to.  Stop when a word containing some
     decoded letters has no possible fit with the letters left.

     This should optimise faster if you start with the longest words
     (actually the words for which there is the lowest number of
     possible matches) - the more duplicated letters in the word
     the better to constrain it.

     Alternatively, instead of starting with the longest words, start
     with the shortest words and a short starting dictionary of
     'and the of is' etc, or whatever the most common words are.

     Next hack needed here: as well as rejecting all uppercase
     decryptions, reject all partial decryptions that can't even
     be satisfied by anything eg hxBLO (??blo) matches no words.

     this code existed in earlier versions but unfortunately I
     deleted it :-(

     Also, once that works, need to change to a breadth-first search,
     starting with the pattern that has the fewest mappings and taking
     the mapping with the highest score.

     breadth-first algorithm:  find best fit for one word.  Replace
     null map with updated map.   go to next word.

 */

#define MAXITER 200000

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include "splib.h"


static int nextword = 0;

static char *words[1024];

static char *cryptogram[1024];

void extract_words(char *line)
{
  /* strdup storage for extracted words into words[nextword++], last <- NULL */
  int c;
  char sword[1024];
  char *word;
  char *s = line;
for (;;) {
  for (;;) {
    c = *s++;
    if (c == '\0') {
      words[nextword] = NULL;
      return;
    }
    if (isalpha(c)) break;
  }
  word = sword;
  for (;;) {
    *word++ = c;
    c = *s++;
    if (!isalpha(c)) break;
  }
  *word = '\0';
  words[nextword++] = strdup(sword);
}
}

static NODE           *dawg;
static INDEX           nedges;

static int debug = FALSE;

int do_wild(
  NODE *dawg, INDEX i, char *word, char *res, int len, int *found) {

  int endsword, last, ch, target;
  NODE node;
  INDEX link;
  INDEX origi;

  origi = i;

  target = ((int)*word)&255; /* care taken for signed ISO alphabets */

  if (*word == '*') {
    res[len] = '\0'; 
    (void) do_wild(dawg, origi, word+1 /* skip '*', match nothing */,
                     res, len, found);
  }

  for (;;) {
    node = dawg[i++];
    ch = (int)((node >> V_LETTER) & M_LETTER);
    last = ((node & (INDEX)M_END_OF_NODE) != 0);
    endsword = ((node & M_END_OF_WORD) != 0);
    link = node & M_NODE_POINTER;

    res[len] = ch; res[len+1] = '\0';

    if (ch != 0) {
      if (ch == target || target == '?') {
        /* single wildcard 1 letter match */
        if (endsword && *(word+1) == '\0') {
          if (debug) fprintf(stdout, "word: %s\n", res); (*found)++;
        } else if ((endsword && *(word+1) == '*') && (*(word+2) == '\0')) {
          /* special-case hack for trailing * */
          if (debug) fprintf(stdout, "word: %s\n", res); (*found)++;
        }
        if (*(word+1) != '\0' && link != 0)
          (void) do_wild(dawg, link, word+1, res, len+1, found);
      } else if (target == '*') {
        /* multiple wildcard - 0-N letters match */
        if (endsword && *(word+1) == '\0') {
          /* trailing* */
          if (debug) fprintf(stdout, "word: %s\n", res); (*found)++;
        }
        if (link != 0) { /* link == 0 means this letter terminates here */
          /* skip the * and see what we get if it has matched to here: */
          (void) do_wild(dawg, link, word+1 /* skip '*' */, res, len+1, found);
          /* let this letter match the * and see
             if the rest of the pattern matches: */
          (void) do_wild(dawg, link, word /* keep '*' */, res, len+1, found);
	}
      }
    }
    if (last) break;
  }

  return(0==0);
}

int wildcard(NODE *dawg, char *word) {
  char result[MAX_WORD_LEN];
  int i = 0;

  (void)do_wild(dawg, (INDEX)ROOT_NODE, word, result, 0, &i);
  return(i);
}

int badword(char *s)
{
  char pat[128];
  int i = 0;
  int try = FALSE;

  /* If s contains any uppercase... */
  for (;;) {
    if (s[i] == '\0') break;
    if (!isalpha(s[i])) return(FALSE);
    if (isupper(s[i])) {
      try = TRUE;
      pat[i] = tolower(s[i]);
    } else {
      pat[i] = '?';
    }
    i += 1;
  }
  pat[i] = '\0';
  if (try) {
    if (strchr(pat, '?') == NULL) {
      if (debug) fprintf(stderr, "Spelling check of %s\n", pat);
      /* simple spelling check */
      return(!dawg_check(dawg, pat));
    } else {
      i = (wildcard(dawg, pat) == 0);
      if (debug) fprintf(stderr, "Does '%s' have any possible solutions? - %s\n",
        pat, (i ? "no" : "yes"));
      return(i);
    }
  }
  return(FALSE);
}

char *translate(char *word, char *map)
{
  static char result[256];
  char *s = word, *t = result;
  while (*s != '\0') {
    *t++ = map[(*s++)&255];
  }
  *t = '\0';
  return(result);
}

static char bestmap[256];
static char lastouts[1024];
static int minlc = 99999;
static int maxuc = -1;
int plausible_solution(char *map, FILE *out)
{
  int j = 0;
  int i = 0;
  int lc = 0, uc = 0;
  int thiswordanyupper;
  int c;
  char *s, *tr;
  char lv[256];
  int badwords = 0;

  for (;;) {
    s = words[i++];
    if (s == NULL) break;
    tr = translate(s, map);
    s = tr;
    thiswordanyupper = FALSE;
    for (;;) {
      c = *s++;
      if (c == '\0') break;
      if ((isalpha(c)) && (islower(c))) lc += 1;
      if (isalpha(c)) {
        if (isupper(c)) {
          uc += 1;
          thiswordanyupper = TRUE;
	}
      }
    }

    if (thiswordanyupper) {
      j = 0;
      for (;;) {
        if (tr[j] == '\0') break;
        lv[j] = tolower(tr[j]);
        j += 1;
      }
      lv[j] = '\0';
      if (badword(tr)) {
        badwords += 1;
      }
    }
  }
  if (badwords == 0) {
    char outs[1024];
    maxuc = uc;
    minlc = lc;
    /* best solution so far - should stack it for further investigation */

    /* NOTE: if a particular map generates a sequence of all upper case
       letters for some encrypted word, and the 'unencrypted' word
       doesn't actually exist in the lexicon, we should fail and backtrack.
       For instance the demo program generates "WO" as a word and doesn't
       ever reject it! */

    outs[0] = '\0';
    i = 0;
    for (;;) {
      s = cryptogram[i++];
      if (s == NULL) break;
      tr = translate(s, map);
      sprintf(outs+strlen(outs), "%s\n", tr);
    }
    sprintf(outs+strlen(outs), "\n");

    if (strcmp(outs, lastouts) != 0) {
      fprintf(stdout, "%s", outs);
      strcpy(lastouts, outs);
    }

    if (lc == 0) {
      fprintf(stdout, "Best fit: %s", outs);
      exit(0);
    }
    memcpy(bestmap, map, 256);
    return(TRUE);
  }
  return(FALSE);
}

void print_table(char *map, char **words)
{
  int i = 0;
  char *word;
  for (;;) {
    word = words[i++];
    if (word == NULL) break;
    fprintf(stdout, "%s ", translate(word, map));
  }
}

int can_modify_map(char **words, char *newmap, char *word, char *res)
{
  /* word maps to res.  Change 'newmap' with the letter mappings
     that this pair generates. */
  int i;
  int crypted, clear;
  char *iword = word, *ires = res;
  if (debug) fprintf(stderr, "Trying translation of %s -> %s\n", word, res);
  for (;;) {
    crypted = *word++;
    clear = *res++;
    if (crypted == '\0') break;
    /* Is this word already incompatible? */
    if (debug) fprintf(stderr, "%c -> %c\n", crypted, clear);
    if (isupper(newmap[crypted]) && (newmap[crypted] != toupper(clear))) {
      /* ERROR! should have been caught earlier... */
      if (debug) fprintf(stderr,
        "INCOMPATIBLE MAPPING: %c already maps to %c\n",
        crypted,
        newmap[crypted]);
      return(FALSE);
    }
    for (i = 'a'; i < 'z'+1; i++) {
      if ((newmap[i] == toupper(clear)) && (i != crypted)) {
      if (debug) fprintf(stderr,
        "INCOMPATIBLE MAPPING: %c also maps to %c\n",
        i,
        newmap[i]);
        return(FALSE);
      }
    }
    newmap[crypted] = toupper(clear);
  }
  return(plausible_solution(newmap, stderr));
}

int any_letter_in(char *this_encrypted, char *next_encrypted)
{
  int c;
  char *s = this_encrypted;
  if (debug) fprintf(stderr,
    "Looking for letters from %s in %s\n",
    this_encrypted,
    next_encrypted);
  for (;;) {
    c = *s++;
    if (c == '\0') break;
    if (strchr(next_encrypted, c) != NULL) {
      if (debug) fprintf(stderr, "%s%s%s%s%s\n",
        "Having chosen a mapping for '",
        this_encrypted,
        "',\nwe'll now see what substituting into '",
        next_encrypted,
        "' gives us...");
      return(TRUE);
    }
  }
  return(FALSE);
}

void treewalk(char **words, int idx, char *map);

void dawg_walk(
  NODE *dawg, INDEX i, 
  char *word, char *iword, char *res,
  int len,
  char **words, char *map,
  int idx
)
{
  int endsword, last, ch, target;
  NODE node;
  INDEX link;
  int sx;
  char newmap[256];

  memcpy(newmap, map, 256);

  for (;;) {
    node = dawg[i++];
    ch = (int)((node >> V_LETTER) & M_LETTER);
    last = ((node & (INDEX)M_END_OF_NODE) != 0);
    endsword = ((node & M_END_OF_WORD) != 0);
    link = node & M_NODE_POINTER;

    res[len] = ch; res[len+1] = '\0';
    target = ((int)*word)&255;
    if (ch != 0) {
      if (ch == target || islower(target)) {
        if (endsword && *(word+1) == '\0') {
          if (debug) fprintf(stderr, "pattern %s word: %s\n", iword, res);

          memcpy(newmap, map, 256);
          if (can_modify_map(words, newmap, iword, res)) {
            /* Now we test the remaining words that this affects */
            sx = idx;
            for (;;) {
              idx += 1;
              if (words[idx] == NULL) {
                /* Doesn't mean it's incompatible, just means that
                   all consequences of this translation have been tested.
                   There may be other independent words/permutations
                   still to try... this bit of code needs work... */
                if (debug) fprintf(stderr, "End of knock-on effect for %s\n", res);
                break;
  	      }
              if (any_letter_in(iword, words[idx])) {
                /* Adjusted newmap with the mapping from word */
                treewalk(words, idx, newmap);
              }
	    }
            idx = sx;
            return; /* maybe? */
          }
	}
        if (*(word+1) != '\0' && link != 0) {
          dawg_walk(dawg, link, word+1, iword, res, len+1, words, map, idx /*+1?*/);
        }
      }
    }
    if (last) break;
  }
  return;
}

void treewalk(char **words, int idx, char *map)
{
  char res[256];
  char *word = words[idx];
  *res = '\0';
  dawg_walk(dawg, 0, word, word, res, 0, words, map, idx);
}


int main(int argc, char **argv)
{
  int i, c;
  char map[256];
  char line[1024];
  char *s;
  int nextc = 0;

  if (!dawg_init("crypto", &dawg, &nedges)) exit(EXIT_FAILURE);

  /* Read cryptogram from stdin to cryptogram[] */
  line[1023] = '\0';
  for (;;) {
    s = fgets(line, 1023, stdin);
    if (s == NULL) break;
    s = strchr(line, '\n');
    if (s != NULL) *s = '\0';
    s = line;
    for (;;) {
      c = *s;
      if (c == '\0') break;
      if (isalpha(c) && isupper(c)) c = tolower(c);
      *s = c;
      s += 1;
    }
    if (*line != '\0') cryptogram[nextc++] = strdup(line);
    /* Extract unique words to words[] */
    extract_words(line);
  }
  cryptogram[nextc] = NULL;

  for (i = 0; i < nextword; i++) {
    fprintf(stderr, "%s\n", words[i]);
  }
  /* Sort in order of largest first */

  for (i = 0; i < 256; i++) map[i] = i;

  if (argc > 1) {
    char *s;
    for (i = 1; i < argc; i++) {
      fprintf(stderr, "Processing '%s'\n", argv[i]);
      s = argv[i];
      if ((strlen(s) != 3) || (s[1] != '=')) {
        fprintf(stderr, "syntax: ./crypto a=X\n");
        exit(1);
      }
      map[s[0]] = s[2];
    }
  }

  treewalk(words, 0, map);

  {
    char outs[1024];
    char *tr;
    outs[0] = '\0';
    i = 0;
    for (;;) {
      s = cryptogram[i++];
      if (s == NULL) break;
      tr = translate(s, map);
      sprintf(outs+strlen(outs), "%s\n", tr);
    }
    sprintf(outs+strlen(outs), "\n");
    fprintf(stdout, "manual params: %s", outs);
  }

  fflush(stderr); fflush(stdout);

  exit(0);
  return(1);
}

