// Compile using cc -O -o stripdup stripdup.c
// and invoke with: ./stripdup < all-unblockme-games.txt > unique-unblockme-games.txt

// This reads the large file of generated Unblockme
// puzzles generated by "allgames.c",  and removes
// those which are redundant because of blocks that
// are in the same relative position but have been
// slid around into adjacent open spaces.

// Unfortunately it appears that this can only remove
// 39427 redundant entries from the 14,345,851 layouts
// that were generated. Nevertheless, for completeness,
// the code here really ought to be incorporated into
// allgames.c so that redundant layouts are never
// generated rather than having to be removed in a
// subsequent external pass...


/* It appears there are some groups of starting positions
   where one can be mapped to another by moving blocks around
   in more complicated ways that simply sliding one horizontal
   block left or right, or one vertical block up or down.

   To detect these, I have to define layouts as equivalent
   whenever they have the same number of blocks in a row
   where the number and size of the blocks in that row are
   identical. And similarly for columns.

   I believe such layouts are identical with one exception
   which is discussed in the code below, but I haven't yet
   enumerated a proof that this is so.


                   this
    "..HAAA"
    "..HJ.K"
    ".XXJ.K"
    "BBICCK"
    "FGIDD."
    "FGEE.."
                   transforms into this, by executing: EIc
    "..HAAA"
    "..HJ.K"
    ".XXJ.K"
    "BBCC.K"
    "FGIDD."
    "FGIEE."


    Here's the problem...

          One issue that I haven't yet taken into account in the more loose way of calculating equivalent boards:
          if there is a row containing a 3 and a 2, then there are only 3 positions in that row where a vertical
          block can cross from above it to below it, i.e. the locations marked 'x' below:

                [AAABBx]             [AABBBx]
                [xAAABB]     or      [AAxBBB]   depending on the order of the 2 and 3 size of block.
                [AAAxBB]             [AABBBx]

          Given those blocked rows, a vertical 2-block above the blocked rows can never slide to below those two rows,
          *but* the equivalence tests here will consider them to be equivalent even though they force different games:

              |......|
              |..u...|
              |..u...|
              |.**.*.|   (marking the blocked squares with a '*')
              |..d...|
              |..d...|

          In this case, the block in the 'uu' position can never slide into the 'dd' position.  This code does not detect
          this situation.  Because of the lengths of the blocks, it is a problem which only applies to the center row (or
          column) and only when there is a 2-block above or below and no other blocks in that column.  This could be
          detected as a special case when hoption is [012] or [345], since voption must be 16 or 20.

          Actually it just occurred to me that in those circumstances, the vertical blocks ('uu' or 'dd') are fixed
          in place, and I wanted to reject solutions where there are blocks that never move anyway.  (Although I don't
          do so yet.)

          Also the blocking horizontal row cannot be the exit row because only the "AAAXX." or "AAXXX." patterns could
          block and that leaves no non-trivial options for movement to unblock the vertical which is stopping the X-block
          from escaping, so this can only apply to the fourth row as in the diagram above.  So the *only* problem worth
          even considering is the one above.

          So for now, even though it may be wrong, I am going to leave the equivalence code as-is at the risk of making
          a tiny number of uninteresting problems with short solutions seem equivalent.


  There may be a second problem (which I haven't given any thought to yet so I'm not sure about it yet) which
  is that the comparison is only on the *shape* of the block positions, I don't actually look at the specific
  block-letter for each block.  I suspect that this is already sufficient but I need to analyze it to be sure.

*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Valid shapes of all columns, and all rows except the exit row:
/*
const char *options12456[] = {
  "AAABB.", //  0   Group '0'
  "AAA.BB", //  1
  ".AAABB", //  2

  "AABBB.", //  3   Group '1'
  "AA.BBB", //  4
  ".AABBB", //  5

  "AABB..", //  6   Group '2'
  "AA.BB.", //  7
  "AA..BB", //  8
  ".AABB.", //  9
  ".AA.BB", // 10
  "..AABB", // 11
  
  "AAA...", // 12   Group '3'
  ".AAA..", // 13
  "..AAA.", // 14
  "...AAA", // 15
  
  "AA....", // 16   Group '4'
  ".AA...", // 17
  "..AA..", // 18
  "...AA.", // 19
  "....AA", // 20
  
  "......", // 21   Group '5'
};
*/

const int class12456[22] = { '0','0','0', '1','1','1', '2','2','2','2','2','2', '3','3','3','3', '4','4','4','4','4', '5' };

// The exit row (3) is constrained.  The block to be extracted cannot have any horizontal blocks to the right of it,
// and it must have at least one column free to the right of it to be obstructed by a vertical block.

/*
const char *options3[] = {
  "AAAXX.", // 0   Group '6'

  "AAXXX.", // 1   Group '7'

  "AAXX..", // 2   Group '8'
  "AA.XX.", // 3
  ".AAXX.", // 4
  
  "XXX...", // 5   Group '9'
  ".XXX..", // 6
  "..XXX.", // 7
  
  "XX....", // 8   Group 'A'
  ".XX...", // 9
  "..XX..", // 10
  "...XX.", // 11

// WAIT! Do I also need a "......" option vertically to encode columns with no vertical blocks?
// - an empty entry was not needed when *adding* blocks but I think it might be needed when encoding
// a layout with the 12 loop control integers!  ***OOPS!***

};
*/

const int class3[12] = { '6', '7', '8','8','8', '9','9','9', 'A','A','A','A' };

int main(int argc, char **argv) {
  int moves, percent_done;
  int voption1, voption2, voption3, voption4, voption5, voption6,
       option1,  option2,  option3,  option4,  option5,  option6;
  char layout[6*6+1], solution[128+1];
  int line = 0, games = 0;
  char prevclass[13], thisclass[13];

  strcpy(prevclass, "000000000000");
  for (;;) {
    int rc;
    if (feof(stdin)) break;
    rc = fscanf(stdin,
                "%d "
                "\"%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c\""
                " # %s  %d%% done.  "
                "VState=%d/22 %d/22 %d/12 %d/22 %d/22 %d/22  "
                "HState=%d/22 %d/22 %d/12 %d/22 %d/22 %d/22\n\n",
                &moves,
                &layout[ 0], &layout[ 1], &layout[ 2], &layout[ 3], &layout[ 4], &layout[ 5],  
                &layout[ 6], &layout[ 7], &layout[ 8], &layout[ 9], &layout[10], &layout[11],  
                &layout[12], &layout[13], &layout[14], &layout[15], &layout[16], &layout[17],  
                &layout[18], &layout[19], &layout[20], &layout[21], &layout[22], &layout[23],  
                &layout[24], &layout[25], &layout[26], &layout[27], &layout[28], &layout[29],  
                &layout[30], &layout[31], &layout[32], &layout[33], &layout[34], &layout[35],  
                solution,
                &percent_done,
                &voption1, &voption2, &voption3, &voption4, &voption5, &voption6,
                 &option1,  &option2,  &option3,  &option4,  &option5,  &option6);
    layout[6*6] = '\0';

    if (rc == 51) {
      line += 2;

#ifdef VERTICALS_MUST_MATCH_EXACTLY
      // This was a stricter test which eliminated the problem below, *but* also failed to detect
      // a lot of genuine equivalences.  This version is not enabled by default.
      
      // Since redundant verticals were already removed at generation time, the vertical columns must match exactly,
      // leaving only the horizontal rows to be checked for equivalence (i.e. where a block can be slid left or
      // right without affecting the complexity).
      thisclass[ 0] = voption1+'a';
      thisclass[ 1] = voption2+'a';
      thisclass[ 2] = voption3+'a';
      thisclass[ 3] = voption4+'a';
      thisclass[ 4] = voption5+'a';
      thisclass[ 5] = voption6+'a';
#else
      // This was my first way of detecting equivalences but I realised that it could generate a few
      // false positives in one restricted circumstance.
      
      thisclass[0]=class12456[voption1];
      thisclass[1]=class12456[voption2];
      thisclass[2]=class12456[voption3];
      thisclass[3]=class12456[voption4];
      thisclass[4]=class12456[voption5];
      thisclass[5]=class12456[voption6];
#endif

      // but the horizontals take part in an equivalence class...
      thisclass[ 6]=class12456[option1];
      thisclass[ 7]=class12456[option2];
      thisclass[ 8]=class3    [option3];
      thisclass[ 9]=class12456[option4];
      thisclass[10]=class12456[option5];
      thisclass[11]=class12456[option6];
      thisclass[12]='\0';

      if (strcmp(prevclass, thisclass) == 0) fprintf(stdout, "// ");
      fprintf(stdout,
              "%d "
              "\"%s\""
              " # %s  %d%% done.  "
              "VState=%d/22 %d/22 %d/12 %d/22 %d/22 %d/22  "
              "HState=%d/22 %d/22 %d/12 %d/22 %d/22 %d/22"
              " %s %s"
              "\n\n",
              moves,
              layout,
              solution,
              percent_done,
              voption1, voption2, voption3, voption4, voption5, voption6,
              option1,  option2,  option3,  option4,  option5,  option6,
              prevclass, thisclass);
      strcpy(prevclass, thisclass);
      
      games += 1;
    } else {
      fprintf(stdout, "rc = %d at line %d\n", rc, line);
      exit(EXIT_FAILURE);
    }
  }

  exit(EXIT_SUCCESS);
  return EXIT_FAILURE;
}
