/* ----------------------------------------------------------------------------
 crypto.C  V1.2

 DESCRIPTION:
 This program is intended to solve the cryptoquote problems found in RPI's
 POLY.

 COMPILATION:
 g++ -o crypto crypto.C -lga

 NOTE:
 Must have GALib installed and
 a dictionary at /usr/dict/words

 Version 1.1 - added more robust I/O - now ignores all punctuation
 Version 1.2 - added ability to seed initial population with solutions
               to big words
---------------------------------------------------------------------------- */
#include "list.H"
#include "read_dict.H"

//**************************
// function.parsew()
//**************************
void parsew(char *word1, char*word2) {
  int i,j=0;
  char ch;

  int lenw = strlen(word1);
  for (i=0; i<lenw; i++) {           //Pull out anything not letter or '
    ch = word1[i];
    if ( ((ch>='A') && (ch<='z')) || (ch=='\'') ) {
      if ((ch>='A') && (ch<='Z'))    // Is it uppercase?
	ch = ch - ('A' - 'a');       // convert it to lower case
      word2[j++] = ch;
    }
  }
  word2[j] = '\0';

}

//**************************
// function.printl()
//**************************
void printl() {
  char *word;

  for (int i=0; i < Num; i++) {
    word = get();
    put(word);
    cout << word << " ";
  }
  cout << endl << endl;

}

//**************************
// function.getinput()
//**************************
void getinput() {
  char *word;
  char parword[MAXSIZE];
  int len, chnum, sum;
  char ch;

  //Read in the CryptoQuote
  cout << "Enter the cryptoquote (end with '$'): ";
  while (1) {
    Num++;
    word = new char[MAXSIZE];
    cin >> word;
    len = strlen(word);
    for (int i=0; i < len; i++) {
      ch = word[i];
      if ((ch>='a') && (ch<='z')) {
	chnum = (int) ch - 'a';
	Alpha[chnum]++;              // Keep track of # of letters
      }
      if ((ch>='A') && (ch<='Z')) {
	chnum = (int) ch - 'A';
	Alpha[chnum]++;              // Keep track of # of letters
      }
    }

    if (word[len-1] == '$') {    //If word ends in '$'
      word[len-1] = '\0';        //delete and
      if (strlen(word) == 0)
	Num--;
      else
	put(word);                // don't insert word if empty
	
      break;                     //break out of loop
    }

    put(word);
  }

  // Determine value of each word
  //  cout << endl << "Value of each word:\n";
  for (int i=0; i<Num; i++) {
    word = get();
    put(word);
    // cout << "  " << word << " = ";
    parsew(word,parword);
    len = strlen(parword);
    sum = 0;
    for (int j=0; j<len; j++) {
      chnum = (int) parword[j] - 'a';
      sum += Alpha[chnum];
    }
    //cout << sum << " / " << len << " = ";
    Vwords[i] = (float) sum / len;
    //cout << Vwords[i] << endl;
  }

  // Percentage of letter in other words
  //  cout << endl << "Percentage of letters in other words:\n";
  for (int i=0; i<Num; i++) {
    word = get();
    put(word);
    parsew(word,parword);
    //cout << "  " << word << " = ";
    len = strlen(parword);
    sum = 0;
    for (int j=0; j<len; j++) {
      chnum = (int) parword[j] - 'a';
      if ( (Alpha[chnum]-1) > 0 )
	sum++;
    }
    //cout << sum << " / " << len << " = ";
    Pwords[i] = (float) sum / len;
    //cout << Pwords[i] << endl;
  }

  cout << "\nProcessing:\n** ";
  printl();
}

//**************************
// function.inits()
//**************************
void inits() {
  int i,j,len;
  int count=0;

  listtype *Qdummy = new listtype;

  Qdummy->data = NULL;
  Qdummy->next = NULL;
  Qhead = Qdummy;

  for (i=0; i<26; Alpha[i++]=0 );
  getinput();

  cout << "Initializing:\n";
  // *****Get Dictionary HASHED********
  cout << "  Reading in dictionary";
  ifstream inFile(Dict, ios::in);

  int maxline=64;
  char inBuf[maxline];

  while(inFile.getline(inBuf, maxline, '\n')){
    if (count % 2000 == 0){
      cout << ".";
      cout.flush();
    }
    count++;
    len = strlen(inBuf);
    char ch;
    for (i=0; i < len; i++) {
      ch = inBuf[i];
      if ((ch>='A') && (ch<='Z'))    // Is it uppercase?
	ch = ch - ('A' - 'a');       // convert it to lower case
      inBuf[i] = ch;
    }    
    insert(inBuf,hp);
  }
  // ******Dictionary is HASHED

  // ******Search for solutions to big words
  int countbw=0;
  char *temp;
  char word[MAXSIZE];

  cout << "\n  Searching for solutions to big words:\n";
  for(i=0; i<MAXNUMBW; i++)                    //Initialize BWnum to 0
    BWnum[i] = 0;
  for (i=0; i<Num; i++) {
    temp = get();
    put(temp);
    parsew(temp,word);
    len = strlen(word);
    if (len > BIGW) {
      count = process(word, countbw);
      if (count < SMALLNUM && count > 0) {
	cout << "    Found " << word << "\n";
	BWnum[countbw] = count;
	for (j=0;j<len;j++)                //Save bigword in BW
	  BW[countbw][j] = word[j];
	BW[countbw][j] = '\0';
	countbw++;
	if (countbw>=MAXNUMBW)            //If we found our max# of bigwords, exit
	  break;
      }
    }
  }
}

//**************************
// function.convert()
//**************************
void convert(char *word, GA1DArrayAlleleGenome<int> & genome, char *p) { 
  int size = strlen(word);
  char ch;
  int i,j;

  for (i=0; i < size; i++) {
    ch = word[i];                        // Get Crypto letter
    if (ch == '\'')
      p[i] = ch;
    else {
      j = (int) ch - 'a';                  // Find its number in alphabet
      p[i] = (char) 'a' + genome.gene(j);  // Find mate and convert back to alpha
    }
  }
  p[i] = '\0';
}

//**************************
// function.main()
//**************************
int main(int argc, char *argv[]) {
  system("clear");
  cout << "           CRYPTO SOLVER - v1.2\n\n";
  cout << "     A CryptoQuote is a simple substition code where each\n";
  cout << "letter that appears may stand for a different letter. The substitutions\n";
  cout << "are consistent throughout the puzzle.  Punctuation is not translated\n";
  cout << "     For example: POLYYONTOP = RENSSELAER\n\n";
  cout.flush();

  // See if we've been given a seed to use (for testing purposes).  When you
  // specify a random seed, the evolution will be exactly the same each time
  // you use that seed number.

  unsigned int seed = 0;
  for(int ii=1; ii<argc; ii++) {
    if(strcmp(argv[ii++],"seed") == 0) {
      seed = atoi(argv[ii]);
    }
  }

  inits();

  int aset[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
		15,16,17,18,19,20,21,22,23,24,25};
  GAAlleleSet<int> alleles(26, aset);
  GA1DArrayAlleleGenome<int> genome(26, alleles, objective);

  genome.initializer(UniqueAllelesInitializer);
  genome.mutator(GA1DArrayGenome<int>::SwapMutator);
  genome.crossover(GA1DArrayGenome<int>::PartialMatchCrossover);
  
  //  genome.initializer(GA1DArrayAlleleGenome<int>::UniformInitializer);
  //  genome.mutator(GA1DArrayAlleleGenome<int>::FlipMutator);
  //  genome.crossover(GA1DArrayGenome<int>::OnePointCrossover);
  
   GASteadyStateGA ga(genome);
   // GASimpleGA ga(genome);

  //Scaling Factor=================
  GALinearScaling scale;
  GASigmaTruncationScaling trunc;
  ga.scaling(scale);
  
  GAUniformSelector uniform;
  GARankSelector ranking;
  ga.selector(uniform);


  ga.set(gaNpopulationSize, 400);
  ga.set(gaNpCrossover, 0.75);
  ga.set(gaNpMutation, 0.10);
  ga.set(gaNnGenerations, 10000);
  ga.set(gaNpReplacement, 0.65);//percentage of population to replace
  ga.parameters(argc, argv);
  ga.initialize(seed);

  cout << "\nEvolving:\n";
  while(!ga.done()){
    ga.step();
    if(ga.generation() % 50 == 0){
      cout << "Gen #" << ga.generation() << " Diversity:" << ga.population().div() 
	   << " Fitness:" << ga.population().max() << " Convergence:" 
	   << ga.convergence();
      cout << " Best:\n** " << ga.population().best()<< "\n\n";
      cout.flush();
    }
  }
  cout << "\n\n";

  cout << "the most fit individual:\n" 
       << ga.statistics().bestIndividual() << endl;

  return 0;
}
 
//**************************
// function.objective()
//**************************
// Get alleles to go from 1 to 26.  A-Z.
float objective(GAGenome & c) {
  GA1DArrayAlleleGenome<int> & genome = (GA1DArrayAlleleGenome<int> &)c;

  char word[MAXSIZE], *temp, partemp[MAXSIZE];
  int i;
  float value=0.0;
  
  for (i=0; i < Num; i++) {
    temp = get();
    put(temp);
    parsew(temp,partemp);
    convert(partemp, genome, word);
    if ( (strlen(word) == 1) && (word[0]=='i' || word[0]=='a') )
      value += 10;
    if (find(word, &p, hp) > 0){
      value+= strlen(word)*strlen(word);    // wordsize*wordsize

      // Other fitness ideas:
      //  - Something involving value[i] of word, where the value
      //    corresponds to how much a words letters are in other words
      //  - Something involving pvalue[i] of word, where the value
      //    corresponds to the percentage a words letters are in other words
      //  - Something involving size of word and how many possible combinations
      //    in dictionary there are of it - statistically
      //  - ??? 


    }
  }

  return(value);
}

//**************************
// function.write()
//**************************
// Here we override the built-in write method for the 2DArray genome so
// that we get better spacing.  The default just stacks the characters one
// after another.  Here we do fixed spacing so that the -1 and 1 don't screw
// each other up.
int GA1DArrayAlleleGenome<int>::write(ostream & os) const 
{ char *word;
  char p[MAXSIZE];

  for (int i=0; i < Num; i++) {
    word = get();
    put(word);

    int size = strlen(word);
    char ch;
    int i,j;

    for (i=0; i < size; i++) {
      ch = word[i];                            // Get Crypto letter
      if ((ch>='A') && (ch<='z')) {
	if ( ch <= 'Z' ) {                     // upper case
	  j = (int) ch - 'A';                  // Find its number in alphabet
	  p[i] = (char) 'A' + gene(j);         // Find mate and convert back to alpha
	} else {                               // lower case
	  j = (int) ch - 'a';                  // Find its number in alphabet
	  p[i] = (char) 'a' + gene(j);         // Find mate and convert back to alpha
	}
      } else                                   // if not a word just include
	p[i] = ch;

    }
    p[i] = '\0';

    cout << p << " ";
  }

  return os.fail() ? 1 : 0;
}

//Globals for UniqueAllelesInitializer() only
int Icount=0;
int IBWnum=0;
int Iwordnum=0;
int Ialpha[26];
//************************************
// function.UniqueAllelesInitializer()
//************************************
void UniqueAllelesInitializer(GAGenome & c)
{
  GA1DArrayAlleleGenome<int> &genome=(GA1DArrayAlleleGenome<int> &)c;
  int i,j,count;
  char ch;
  char temp[MAXSIZE];

  if (Icount < 150) {      //Seeding this amount of the population
    if ( (BWnum[IBWnum] != 0) && (IBWnum < MAXNUMBW) ) {
      count = strlen(Bigwordsol[IBWnum][Iwordnum]);
      for (i=0;i<count;i++)
	temp[i] = Bigwordsol[IBWnum][Iwordnum][i];
      temp[i] = '\0';
      cout << "    Seeding " << temp << "....\n";

      for (i=0; i<genome.size(); i++) {           //initialize genome to -1
	genome[i] = -1;
	Ialpha[i] = 0;
      }
      for (i=0; i<count; i++) {                 //insert each letter of word into genome

	ch = BW[IBWnum][i];                     // Get Crypto letter
	j = (int) ch - 'a';                     // Find its number in alphabet
	genome[j] = (int) temp[i] - 'a';        // Find mate and insert

	Ialpha[(int) temp[i] - 'a'] = 1;
      } 
      for (i=0; i<genome.size(); i++) {         //fill the rest with random
	if (genome[i] == -1) {
	  genome[i] = GARandomInt(0, genome.size()-1);
	  if (Ialpha[genome[i]]) {              //If a letter is picked that was already
	    genome[i] = -1;                     //picked, try again
	    i--;
	  } else {
	    Ialpha[genome[i]] = 1;
	  }
	}
      }
      //      cout << "      " << genome << endl;
      Iwordnum++;
      if (Iwordnum == BWnum[IBWnum]) {     //Move on to next BigWord?
	Iwordnum = 0;
	IBWnum++;
      }
    }
    Icount++;
  } else {

    for(i=genome.size()-1; i>=0; i--)
      genome[(genome.size()-1)-i] = i; //initializes it backwards.. a->z, z->a
    //This way, you can input straight(not neccesarily crypto) strings and it won't
    //generate misleading results
    
    //  genome.gene(25-i, 'a'+i);    // this is another way to set the genes

    for(i=genome.size()-1; i>=0; i--)
      if(GARandomBit()) genome.swap(i, GARandomInt(0, genome.size()-1));
  }
}



// If your compiler does not do automatic instantiation (e.g. g++ 2.6.8),
// then define the NO_AUTO_INST directive.
#ifdef NO_AUTO_INST
#include <ga/GAAllele.C>
#include <ga/GA1DArrayGenome.C>
#if defined(__GNUG__)
template class GAAlleleSet<int>;
template class GAAlleleSetCore<int>;
template class GAArray<int>;
template class GA1DArrayGenome<int>;
template class GA1DArrayAlleleGenome<int>;
#else
GAAlleleSet<int>;
GAAlleleSetCore<int>;
GAArray<int>;
GA1DArrayGenome<int>;
GA1DArrayAlleleGenome<int>;
#endif
#endif

