//#define ORIG 1
#define STUPID_COMP_SHORTCUT
// macro-process pseudo stack-based assembly language into actual x86 code, and compile with nasm
/*


http://en.wikipedia.org/wiki/X86_calling_conventions
http://unixwiz.net/techtips/win32-callconv-asm.html
http://www.codeproject.com/KB/cpp/calling_conventions_demystified.aspx

http://www.agner.org/optimize/calling_conventions.pdf - see table 4, register usage

eax, ecx and edx may be corrupted inside a function.

edx is used for long long results, which I'm not returning, so don't care.

I push left->right, not right->left, so incompatible with CDECL at the moment, unless you
call in reverse from the program (actually, same as Imp77 on intel does)


  another pending optimisation:

              call   putchar                         ;; CALL putchar , 1 , 0
              add    esp, 4
              mov    esp, ebp                        ;; RET 0
              pop    ebp
              ret

  i think any assignment to esp is redundant before a return.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

static char *Progname;
FILE *infile, *outfile;

#define TRUE (0==0)
#define FALSE (0!=0)

#define DEBUG_PARSER 1
#define DEBUG_LEXER 1
//#define DEBUG 1
int debug_parser = FALSE;
int debug_lexer = FALSE;
int opt_optimiser = FALSE;

#define sourcefile infile
#define curfile "infile"

#define MAXLINE 1024

#include "assemble.h"

/* C[] is the source character token stream */

typedef struct sourceinfo { // ATOMS for processed input stream
    char *s; // string contents
    int l;   // lineno
    int col; // column
    int t;   // type - tag, "string", 'charconst', or char, so far
    char *f; // source or includefile  name
} sourceinfo;

static sourceinfo c[16*1024];

int nextfree, arraysize;
char onecharstr[1024];

int A[16*1024];
int next_free_a, a_size;

int cp;  // code pointer.  Has to be global state.
int ap;  // Analysis record pointer.

// Built-in phrase codes.  Must match with grammar file.
#define TYPE_EOF 0
#define TYPE_TAG 1
#define TYPE_STRING 2
#define TYPE_CHARCONST 3
#define TYPE_CHAR 4
#define TYPE_INT 5
//#define TYPE_HEXINT 6
#define TYPE_KEYWORD 7
#define TYPE_COMMENT 8
#define TYPE_NEWLINE 9

int lineno = 1, col = 0;
int startline = TRUE;

#define MAXPOOL (1024*32)
char stringpool[MAXPOOL];
int nextstring = 0;

static char *itoaf(char *fmt, int i) // unsafe, can't call twice in one statement
{
  static char s[128];
  sprintf(s, fmt, i);
  return s;
}

static char *f(char *fmt, char *p) // unsafe, can't call twice in one statement
{
  static char s[512];
  sprintf(s, fmt, p);
  return s;
}

static int next_free_entry = 0;
#define MAX_ENTRIES 10000

typedef struct line {
  char *psect;
  char *label;
  char *opcode;
  char *opd1;
  char *opd2;
  char *comment;
} line;

line program[MAX_ENTRIES];


static char xline[1024];
static int xi = 0, xcur_line = 1;

static void xungetc(int c, FILE *f)
{
    if (xi > 0) {
	xi -= 1;
    } else if (c == '\n') {
	xcur_line -= 1;
	xi = strlen(xline);
    }
    ungetc(c, f);
}

static int xfgetc(FILE *f)
{
    int c, ch;
    c = fgetc(f);
    if (c == EOF) return EOF;
    ch = c&255;
    if (ch == '\n') {
	xline[xi] = '\0'; xi = 0;
//	(void)make_binary_tuple(LINE, xcur_line++, (int)strdup(xline));
    } else xline[xi++] = ch;
    if (xi == 1023) xi = 1022;
    xline[xi] = '\0';
    return c;
}

int str_to_pool(char *s)
{
    int tag;
    for (tag = 0; tag <= nextstring; tag++) {
        if (strcmp(stringpool+tag, s) == 0) {
            return tag; /* found, one way or another */
        }
    }
}

void stores(char *s, int lineno, int col, int type, char *fname) {
  int tag;

  if (nextstring + strlen(s) + 1 >= MAXPOOL) exit(1); // TODO: add message
  strcpy(stringpool+nextstring, s); /* Create a backstop for when not found */
  tag = str_to_pool(s);
  if (tag == nextstring) nextstring += strlen(s)+1; /* Not found, add it */

//  makespace(c, nextfree, arraysize);
  c[nextfree].s = stringpool+tag; c[nextfree].l = lineno; c[nextfree].col = col;
  c[nextfree].f = fname; c[nextfree].t = type;
  nextfree++;
}

void storec(int ch, int lineno, int col, int type, char *fname) {
  onecharstr[ch*2] = ch; onecharstr[ch*2+1] = '\0';  // convert char to 1-char string before saving.
  stores(&onecharstr[ch*2], lineno, col, type, fname);
}
/*>*/

int iskeyword(char *s) {
  int i;
  for (i = 0; i < MAX_KEYWORD; i++) if (strcmp(s, keyword[i]) == 0) return TRUE;
  return FALSE;
}

void line_reconstruction(void)
{
  static int whitespace;
  static int peek;
  int ch;
  /* Pre-process input ready for parsing.  Tokens are stored in array C[] */

  whitespace = FALSE;
  for (;;) {
    ch = xfgetc(sourcefile); if (ch == EOF) break;
    ch &= 255; // int, positive.

    peek = xfgetc(sourcefile); xungetc(peek, sourcefile);

    if (isalpha(ch) || (ch == '_')) {
        /*< token or keyword */
        int nextfree = 0, strsize = 0, startcol = col;
        char token[1024];

        whitespace = FALSE;
        for (;;) {
//          makespace(token, nextfree, strsize);
          if (isalpha(ch) || isdigit(ch) || (ch == '_')) {
            // digits allowed after 1st char.
            col++;
            token[nextfree++] = ch;
          } else {
            token[nextfree] = '\0'; xungetc(ch, sourcefile);
            break;
          }
          ch = xfgetc(sourcefile);
        }
        stores(token, lineno, startcol, iskeyword(token) ? TYPE_KEYWORD : TYPE_TAG, curfile);
        //free(token);
        /*>*/
    } else if (isdigit(ch)) {
        /*< Number */
        int nextfree = 0, numsize = 0;
        char number[1024];

        // Store as a string...
        whitespace = FALSE;
        for (;;) {
//          makespace(number, nextfree, numsize);
          if (isdigit(ch)) {
            col++;
            number[nextfree++] = ch;
          } else {
            number[nextfree] = '\0'; xungetc(ch, sourcefile);
            break;
          }
          ch = xfgetc(sourcefile);
        }
        stores(number, lineno, col, TYPE_INT, curfile);
        //free(number);
	/*>*/
    } else switch (ch) {
        case '\'': // Handle 'c' character constants
        case '"': // Handle "string"
      /*< literals */
      {
        int nextfree = 0, strsize = 0, quotech = ch;
        char string[1024];

        whitespace = FALSE;
        col++;
        for (;;) {
          ch = xfgetc(sourcefile); // Newlines are allowed
          col++;
//          makespace(string, nextfree, strsize);
          if (ch == '\\') {
            ch = xfgetc(sourcefile); col++;
            if (ch == '\\') {           string[nextfree++] = ch;
            } else if (ch == '\'') {    string[nextfree++] = '\'';
            } else if (ch == '"') {     string[nextfree++] = '"';
            } else if (ch == 'n') {     string[nextfree++] = '\n';
            } else if (ch == 'r') {     string[nextfree++] = '\r';
            } else if (ch == 't') {     string[nextfree++] = '\t';
            } else {
              // Warn of unknown (to me) \x escape.  Probably an error.
	      string[nextfree++] = '\\'; string[nextfree++] = ch;
            }
          } else if (ch != quotech) {   string[nextfree++] = ch;
          } else {
            string[nextfree] = '\0';
            break;
          }
        }

        if (quotech == '\'') {
          if (strlen(string) == 1) {
          } else if (strlen(string) <= 4) {
            // Warn that 'xx' as a 32-bit int is a non-standard extension
          } else {
            // Warn that this is probably a string with the wrong type of quote.
          }
        }
        stores(string, lineno, col, (quotech == '\'' ? TYPE_CHARCONST : TYPE_STRING), curfile);
        //free(string);
      }
      break;
      /*>*/

    // this parser is line-at-a-time.  If it was whole-file, we wouldn't return here
        case '\n':      lineno++; startline = TRUE; col = 0; whitespace = TRUE;
      storec(ch, lineno, col++, TYPE_NEWLINE, curfile);
      return;

        case '\t':
        case ' ':       col++; // Does not affect whitespace
      break;

    default:
      whitespace = FALSE;
      storec(ch, lineno, col++, TYPE_CHAR, curfile);
    }
  }
  // set up a dummy at the end because we sometimes look ahead by 1
  // in the parsing code and don't want to hit uninitialised data.
  c[nextfree].t = TYPE_EOF;
  c[nextfree].s = "<EOF>";
  c[nextfree].l = lineno;
  c[nextfree].col = col;
  /*>*/
}



// ============================== code generation section ============================
int pos(int i)
{
  if (i < 0) return 0;
  return i;
}

static int access = TRUE;
static char *hack = NULL; // source
static char cur_psect[128] = { '\0' };

char *strdup_or_null(const char *s)
{
  char *t;
  if (s == NULL) return(NULL);
  t = strdup(s);
  if (t == NULL) {
    fprintf(stderr, "Won't\n");
    exit(2);
  }
  return t;
}

int eq(const char *s, const char *t)
{
  if (s == t) return(TRUE); // includes if both NULL
  if ((s == NULL) || (t == NULL)) return(FALSE);
  return (strcmp(s, t) == 0);
}

int isnull(const char *s)
{
  if (s == NULL) return(TRUE);
  if (*s == '\0') return(TRUE);
  return(FALSE);
}

//                       "CMPGE", "CMPLT", "CMPLE", "CMPGT", "CMPEQ", "CMPNE"; 
static char *cond[6] = { "ge",    "l",     "le",    "g",     "e",     "ne" };
static char *jcond[6] = { "jge",    "jl",     "jle",    "jg",     "je",     "jne" };

char *inverse_jump(char *jump)
{
  int i;
  for (i = 0; i < 7; i++) {
    if (strcmp(jcond[i], jump) == 0) return(jcond[i^1]);
  }
  fprintf(stderr, "Coding error.  Is there a new opcode beginning with 'j' ???\n");
}

int iscondjump(char *jump)
{
  int i;
  for (i = 0; i < 7; i++) {
    if (strcmp(jcond[i], jump) == 0) return TRUE;
  }
  return FALSE;
}


static char *spacestr = "                                                            ";
static char *spaces = NULL;

void plant(const char *psect, const char *label, const char *opcode, const char *opd1, const char *opd2, char *comment)
{
  int elide = FALSE;
  // using strings, and especially multiple heap strings, is scandalously inefficient.
  if (next_free_entry == MAX_ENTRIES) {
    fprintf(stderr, "Won't\n");
    exit(1);
  }

  if (spaces == NULL) spaces = spacestr + strlen(spacestr); // points to \0

  if (hack != NULL) {
    static char c2[256];
    if (comment == NULL) {
      sprintf(c2, "; %s", hack);
    } else {
      sprintf(c2, "; %s%s  ; %s", hack, spaces-pos(11-strlen(hack)), comment);
    }
    comment = c2;
    hack = NULL;
  }

  if (eq(psect, "text")) {
    if (isnull(label)) {
      if (!access) elide = TRUE;
    } else {
      access = TRUE;
    }
    if (eq(opcode, "jmp") || eq(opcode, "ret")) access = FALSE; // for *following* code, not this line.
  } // other psects don't affect access

  if (!elide) { // previously we commented these out; now we just drop them silently
    program[next_free_entry].psect = strdup_or_null(psect);
    program[next_free_entry].label = strdup_or_null(label);
    program[next_free_entry].opcode = strdup_or_null(opcode);
    program[next_free_entry].opd1 = strdup_or_null(opd1);
    program[next_free_entry].opd2 = strdup_or_null(opd2);
    program[next_free_entry].comment = strdup_or_null(comment);
    next_free_entry += 1;
  }
}

void print_comment(char *t)
{
  char *s;
  for (;;) {
    s = strstr(t, ";;");
    if (s == NULL) break;
    *s = '\0'; s += 2;
    fprintf(outfile, "%s\n                                                   ;;", t);
    t = s;
  }
  fprintf(outfile, "%s", t);
}

void reap(const char *psect, const char *label, const char *opcode, const char *opd1, const char *opd2, char *comment)
{
  char opc[128];
  int i;

  *opc = '\0';
  if (opcode != NULL) sprintf(opc, "%s", opcode);

  if (spaces == NULL) spaces = spacestr + strlen(spacestr); // points to \0

  if (strcmp(psect, cur_psect) != 0) {
    strcpy(cur_psect, psect);
    fprintf(outfile, "%sSECTION .%s\n", spaces-12, psect);
  }

  if ((label != NULL) && (*label != '\0')) {
    i = 10-strlen(label); fprintf(outfile, "%s:%s ", label, spaces-pos(i));
  } else {
    fprintf(outfile, "%s", spaces-12);
  }

  // if (elide) fprintf(outfile, ";");

  if (isalpha(opc[0]) && isupper(opc[0])) opc[0] = tolower(opc[0]);
  i = 6-strlen(opc); fprintf(outfile, "%s %s", opc, spaces-pos(i));
  fprintf(outfile, "%s", opd1);
  i = strlen(opd1);
  if (opd2 != NULL) {
    i += 2+strlen(opd2);
    fprintf(outfile, ", %s", opd2);
  }
  i = 30-i;
  fprintf(outfile, "%s ", spaces-pos(i+1));
  if ((comment != NULL) && (*comment != '\0')) {
    if (*comment == ';') {
      fprintf(outfile, ";");
    } else {
      fprintf(outfile, "                ; ");
    }
    print_comment(comment);
    //fprintf(outfile, "%s", comment);
  }
  fprintf(outfile, "\n");

}

void backcomment(int source, int target)
{
  static char comment[1024];
  if (program[target].comment == NULL) {
    program[target].comment = program[source].comment;
    return;
  }
  if (program[source].comment == NULL) return;
  sprintf(comment, "%s ;%s", program[target].comment, program[source].comment);
  program[target].comment = strdup(comment);
  program[source].comment = NULL; // for wipeout
}

void wipeout(int line, int count)
{
  int i = line + count;
  for (i = line; i < line+count; i++) {
    if (!isnull(program[i].opcode)) {
      fprintf(stderr, "Won't: '%s'\n", program[i-count].opcode);
      exit(3); // trying to wipe out a non-empty line.
    }
  }
  while (i < next_free_entry) {
    program[i-count] = program[i]; // gcc, copy entire record contents
    i += 1;
  }
  next_free_entry -= count;
}

void optimise(void)
{
  int i;
  static char tmp[128];

  // some of these optimisations are overly precise in which patterns they match; it's safer
  // to start narrow and then widen an existing rule by adding more options (eg adding "and"
  // to a test for "add") than it is to assume from the start that any arithmetic op is valid
  // at that point... for example, you could include "add", and "sub", forgetting that sub
  // is not commutative.   It doesn't matter if you omit a tiny number of optimisation
  // opportunities, but it matters a lot if you get even one wrong.

  // this style of optimising make several assumptions about the style of code being
  // generated, for instance that registers are loaded once and then used once, but not
  // remembered.  Once we start doing register remembering, some of these optimisations
  // could break, so we need to structure our optimisations into sequential passes, with
  // known assumptions for each pass.


  i = 0;
  for (;;) {
    if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file)

    // LABELS NECESSARY:

    // sequences of 2 instructions + label following:
    if (eq(program[i].psect, "text") &&
        eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
        eq(program[i+2].psect, "text") && (!isnull(program[i+2].label))
       ) {

      if (
	  (program[i].opcode[0] == 'j' ) && (!eq(program[i].opcode, "jmp")) && (strncmp(program[i].opd1, "F_", 2) == 0)
          && eq(program[i+1].opcode, "jmp")
          && eq(program[i].opd1, program[i+2].label)
         ) { // sequences of 4 instructions:
      /*
            je     F_1118                          ;; BF F_1118 
            jmp    nq                              ;; B nq 
F_1118:                                            ;; F_1118 : 

            =>

            jne    nq

       */
	 program[i].opcode = inverse_jump(program[i].opcode);
         program[i].opd1 = program[i+1].opd1;

         program[i+1].opcode = "";
         program[i+1].opd1 = "";

         program[i+2].label = ""; // safe assumptions as F_ labels only ever used once
         backcomment(i+1, i);
         wipeout(i+1, 1);
         if (isnull(program[i+1].opcode) && isnull(program[i+1].opd1) && isnull(program[i+1].opd2)) {
           backcomment(i+1, i); wipeout(i+1, 1);
	 }

         i -= 10; if (i < 0) i = 0; continue;

       }
    }
    // UNBROKEN BY LABELS:

    // sequences of 5 instructions:
    if (eq(program[i].psect, "text") &&
        eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
        eq(program[i+2].psect, "text") && isnull(program[i+2].label) &&
        eq(program[i+3].psect, "text") && isnull(program[i+3].label) &&
        eq(program[i+4].psect, "text") && isnull(program[i+4].label)
       ) {


      if ( eq(program[i].opcode, "push")   && 
           eq(program[i+1].opcode, "mov")  && eq(program[i+1].opd1, "eax") &&
           eq(program[i+2].opcode, "add")  && eq(program[i+2].opd1, "eax") &&
           eq(program[i+3].opcode, "mov")  && eq(program[i+3].opd1, "ecx") &&
           eq(program[i+4].opcode, "pop")  && eq(program[i+4].opd1, "edx")
          ) {

    /*
            push   dword [ci]                      ;; PUSH ci  ;; PUSH & c     ; note param is rel to ebp in procedures
            mov    eax, dword [text]               ;; PUSH text 
            add    eax, dword 1                    ;; PUSH # 1  ;; ADD  ;; POPI 4 
            mov    ecx, c                          
            pop    edx                             
    =>
            mov    edx, dword [ci]                 ;; PUSH ci  ;; PUSH & c     ; note param is rel to ebp in procedures
            mov    eax, dword [text]               ;; PUSH text 
            add    eax, dword 1                    ;; PUSH # 1  ;; ADD  ;; POPI 4 
            mov    ecx, c                          

    */

	program[i].opcode = "mov"; // convert pop to a move
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = "edx";

	program[i+4].opcode = ""; // remove the push
        program[i+4].opd1 = "";
        program[i+4].opd2 = "";

        backcomment(i+4, i+3);
        wipeout(i+4, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

    }

    // sequences of 4 instructions:
    if (eq(program[i].psect, "text") &&
        eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
        eq(program[i+2].psect, "text") && isnull(program[i+2].label) &&
        eq(program[i+3].psect, "text") && isnull(program[i+3].label)
       ) {

       if ((eq(program[i].opcode, "push") ) &&
          (eq(program[i+1].opcode, "push") ) &&
          (eq(program[i+2].opcode, "pop") ) &&
	    ((eq(program[i+2].opd1, "eax") ) || (eq(program[i+2].opd1, "ecx") )) &&
          (eq(program[i+3].opcode, "pop") ) &&
	    ((eq(program[i+3].opd1, "eax") ) || (eq(program[i+3].opd1, "ecx") ))
          ) {

    /*
    push   A
    push   B
    pop    eax
    pop    ecx

    =>

    mov    eax, A
    mov    ecx, B
    */

        program[i].opcode = "mov";
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = program[i+3].opd1;

        program[i+1].opcode = "mov";
        program[i+1].opd2 = program[i+1].opd1;
        program[i+1].opd1 = program[i+2].opd1;

        program[i+2].opcode = ""; program[i+2].opd1 = "";
        backcomment(i+2, i+1);
        program[i+3].opcode = ""; program[i+3].opd1 = "";
        backcomment(i+3, i+1);

        wipeout(i+2, 2);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if ((eq(program[i].opcode, "push") ) &&
           (eq(program[i+1].opcode, "mov") ) && (!eq(program[i+1].opd1, "ecx")) &&
	   (eq(program[i+2].opcode, "add") || eq(program[i+2].opcode, "sub") || eq(program[i+2].opcode, "and"))
                 && (!eq(program[i+1].opd1, "ecx") ) &&
           (eq(program[i+3].opcode, "pop") && (eq(program[i+3].opd1, "ecx")))
          ) {

    /*
            push   c                               ;; PUSH & c     ; note param is rel to ebp in procedures
            mov    eax, dword [ci]                 ;; PUSH ci
            add    eax, dword 2                    ;; PUSH # 2  ;; ADD  ;; PUSHI 4
            pop    ecx
    =>
                                                   ;; PUSH & c     ; note param is rel to ebp in procedures
            mov    eax, dword [ci]                 ;; PUSH ci
            add    eax, dword 2                    ;; PUSH # 2  ;; ADD  ;; PUSHI 4
            mov    ecx, c

    */

	program[i+3].opcode = "mov"; // convert pop to a move
        program[i+3].opd2 = program[i].opd1;

	program[i].opcode = ""; // remove the push
        program[i].opd1 = "";
        program[i].opd2 = "";

        if (i > 0) backcomment(i, i-1);
        wipeout(i, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if ( eq(program[i].opcode, "push")   && /*eq(program[i].opd1, "dword [ecx+eax*4]") &&*/
           eq(program[i+1].opcode, "mov")  && eq(program[i+1].opd1, "ecx") &&
           eq(program[i+2].opcode, "mov")  && eq(program[i+2].opd1, "eax") &&
           eq(program[i+3].opcode, "pop")  && eq(program[i+3].opd1, "edx")
          ) {

    /*
            push   dword [ecx+eax*4]               
            mov    ecx, a                          ;; PUSH & a     ; note param is rel to ebp in procedures
            mov    eax, dword [pp]                 ;; PUSH pp  ;; POPI 4 
            pop    edx                             
    =>
            mov    edx, dword [ecx+eax*4]               
            mov    ecx, a                          ;; PUSH & a     ; note param is rel to ebp in procedures
            mov    eax, dword [pp]                 ;; PUSH pp  ;; POPI 4 

similarly for
            push   dword [v]                       ;; PUSH v 
            mov    ecx, c                          ;; PUSH & c     ; note param is rel to ebp in procedures
            mov    eax, dword [ci]                 ;; PUSH ci  ;; POPI 4 
            pop    edx                             

    */

	program[i].opcode = "mov"; // convert pop to a move
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = "edx";

	program[i+3].opcode = ""; // remove the push
        program[i+3].opd1 = "";
        program[i+3].opd2 = "";

        backcomment(i+3, i+2);
        wipeout(i+3, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

    }


    // sequences of 3 instructions:
    if (eq(program[i].psect, "text") &&
        eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
        eq(program[i+2].psect, "text") && isnull(program[i+2].label)
       ) {

      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
          (eq(program[i+1].opcode, "sub") || eq(program[i+1].opcode, "add")) && eq(program[i+1].opd1, "ecx") &&
          eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i+2].opd2, "ecx")
          ) {
        /*
                 mov    ecx, dword [ci]                 ;; PUSH ci
		 sub    ecx, dword 3                    ;; PUSH # 3  ;; SUB
		 mov    eax, ecx                        ;; PUSHI 4
	 
                 ->

                 mov    eax, dword [ci]                 ;; PUSH ci
		 sub    eax, dword 3                    ;; PUSH # 3  ;; SUB
	                                                ;; PUSHI 4
													    
	*/
        program[i].opd1 = "eax";
        program[i+1].opd1 = "eax";
        program[i+2].opcode = ""; program[i+2].opd1 = ""; program[i+2].opd2 = "";
        backcomment(i+2, i+1); wipeout(i+2, 1);
        i -= 10; if (i < 0) i = 0; continue;
      }

      if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
           (eq(program[i+1].opcode, "sub") || eq(program[i+1].opcode, "add")) && eq(program[i+1].opd1, "ecx") &&
	        (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()?
           eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, program[i].opd2) && eq(program[i+2].opd2, "ecx")
          ) {
        /*
            mov    ecx, dword [fp] ;; PUSH fp
            add    ecx, dword 1                    ;; PUSH # 1   (or any constant)
                                                   ;; ADD
            mov    dword [fp], ecx                 ;; POP fp

=>
            add    dword [fp], dword 1
													    
	*/

        // *POSSIBLY* decide not to do this optimisation if program[i].opd2 is referred to anywhere
        // in the opd2 fields of the next 4 or 5 instructions, ie avoid a redundant memory access
        // if the value can stay in a register until needed again shortly. [TODO]


        // THIS BREAKS if a subsequent instruction has already been optimised to use the cached version of
        // ecx which this originally left behind.  It's really the other optimisation that's wrong.

        int next;

        for (next = i+3; next < i+8; next++) {
          if (!isnull(program[next].label)) break;
          if (eq(program[i].opd2, program[next].opd2) || eq(program[i].opd2, program[next].opd1)) {
            goto skip;
	  }
	}

        program[i+2].opcode = program[i+1].opcode; program[i+2].opd2 = program[i+1].opd2;
        program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL;
        program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;
        backcomment(i, i-1); 
        backcomment(i+1, i-1); 
        wipeout(i, 2);
        i -= 10; if (i < 0) i = 0; continue;
      skip:;
        // PROTECT:
	program[i].opcode = "Mov";
        //program[i+2].opcode = "Mov";
      }

      if (eq(program[i].opcode, "mov")   && (strncmp(program[i].opd1, "dword [", 7) == 0) && eq(program[i].opd2, "ecx") &&
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") && (!eq(program[i+1].opd2, "eax")) &&
                                                                            (!eq(program[i+1].opd2, "ecx")) &&
          eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i].opd1, program[i+2].opd2)
          ) {
        /*
           knock on effect of avoiding store-to-store increment when memory address reused soon after:

            mov    dword [pp], ecx                 ;; POP pp
            mov    ecx, a                          ;; PUSH & a
            mov    eax, dword [pp]                 ;; PUSH pp

          =>

            mov    dword [pp], ecx                 ;; POP pp
            mov    eax, ecx                        ;; PUSH & a
            mov    ecx, a                          ;; PUSH pp

	*/
        char *temp = program[i+1].opd2;
        program[i].opcode = "Mov"; // stop this opcode being absorbed by the previous two opcodes
                                   // and becoming part of an:  add dword[loc], dword 1
	program[i+1].opd2 = program[i+1].opd1;
	program[i+1].opd1 = program[i+2].opd1;
        program[i+2].opd1 = program[i+1].opd2;
        program[i+2].opd2 = temp;

        // i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov")   && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) &&
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") &&
          eq(program[i+2].opcode, "mov") && ((eq(program[i+2].opd1, "[ecx+eax*4]") && eq(program[i+2].opd2, "edx")) ||
					     (eq(program[i+2].opd1, "edx") && eq(program[i+2].opd2, "dword [ecx+eax*4]")))
          ) {
        /*
            Start this optimisation with a simple reordering:

            mov    ecx, stored                     ;; PUSH & stored
            mov    eax, dword [i]                  ;; PUSH i
                                                   ;; POPI 4
            mov    [ecx+eax*4], edx

          =>
            mov    eax, dword [i]                  ;; PUSH i
                                                   ;; POPI 4
            mov    ecx, stored                     ;; PUSH & stored
            mov    [ecx+eax*4], edx

        Also works for:
            mov    ecx, a                          ;; PUSH & a
            mov    eax, dword [fp]                 ;; PUSH fp
                                                   ;; PUSHI 4
            mov    edx, dword [ecx+eax*4]
	*/
        char *temp;

        // swap opd1;
        temp = program[i].opd1;
        program[i].opd1 = program[i+1].opd1;
        program[i+1].opd1 = temp;

        // swap opd2:
        temp = program[i].opd2;
        program[i].opd2 = program[i+1].opd2;
        program[i+1].opd2 = temp;

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov")   && eq(program[i].opd1, "edx") && (eq(program[i].opd2, "ecx") || (strncmp(program[i].opd2, "dword ", 6) == 0)) &&
 	                                                                  (!eq(program[i].opd2 + strlen(program[i].opd2) - strlen("+eax*4]"), "+eax*4]")) &&
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && (!eq(program[i+1].opd1, "edx")) &&
          eq(program[i+2].opcode, "mov") && 
   	              eq(program[i+2].opd1 + strlen(program[i+2].opd1) - strlen("+eax*4]"), "+eax*4]") &&
                      eq(program[i+2].opd2, "edx")
          ) {
        /*
            Add a second simple reordering:

            mov    edx, dword [nl]                 ;; PUSH nl
            mov    eax, dword [fp]                 ;; PUSH & a
                                                   ;; PUSH fp
                                                   ;; POPI 4
            mov    [a+eax*4], edx

          =>
            mov    eax, dword [fp]                 ;; PUSH & a
                                                   ;; PUSH fp
                                                   ;; POPI 4
            mov    edx, dword [nl]                 ;; PUSH nl
            mov    [a+eax*4], edx

	*/
        char *temp;

        // swap opd1;
        temp = program[i].opd1;
        program[i].opd1 = program[i+1].opd1;
        program[i+1].opd1 = temp;

        // swap opd2:
        temp = program[i].opd2;
        program[i].opd2 = program[i+1].opd2;
        program[i+1].opd2 = temp;

        i -= 10; if (i < 0) i = 0; continue;
      }


      if (eq(program[i].opcode, "mov")   && eq(program[i].opd1, "eax") && 
          (eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") && 
                                                      (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()?
          eq(program[i+2].opcode, "mov") &&
   	              eq(program[i+2].opd2 + strlen(program[i+2].opd2) - strlen("+eax*4]"), "+eax*4]")
          ) { // quite a nice one, given this isn't being done at the AST level...
        /*
            mov     eax, dword [text]               ;; PUSH text
            add/sub eax, dword 1                    ;; PUSH # 1
            mov     eax, dword [c+eax*4]
           =>
            mov     eax, dword [text]               ;; PUSH text

            mov     eax, dword [c+eax*4+1*4]
 	*/
        char *temp;

        sprintf(tmp, "%s", program[i+2].opd2);
        temp = strrchr(tmp, ']');
        sprintf(temp, "%c%s*4]", (eq(program[i+1].opcode, "add") ? '+' : '-'), program[i+1].opd2+6); // NO - REMOVE "dword " FIRST, AND GET "4" FACTOR FROM ORIG STRING.
        program[i+2].opd2 = strdup(tmp);

        program[i+1].opcode = "";
        program[i+1].opd1 = "";
        program[i+1].opd2 = "";
        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov")   && eq(program[i].opd1, "eax") && 
          (eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") && 
                                                      (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()?
          eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1 + strlen(program[i+2].opd1) - strlen("+eax*4]"), "+eax*4]")
          ) { // same as above, but 3rd instr operands swapped
        /*
            mov    eax, dword [text]               ;; PUSH text
            add    eax, dword 1                    ;; PUSH # 1
                                                   ;; ADD
                                                   ;; POPI 4
            mov    [c+eax*4], edx
 	*/
        char *temp;

        sprintf(tmp, "%s", program[i+2].opd1);
        temp = strrchr(tmp, ']');
        sprintf(temp, "%c%s*4]", (eq(program[i+1].opcode, "add") ? '+' : '-'), program[i+1].opd2+6); // NO - REMOVE "dword " FIRST, AND GET "4" FACTOR FROM ORIG STRING.
        program[i+2].opd1 = strdup(tmp);

        program[i+1].opcode = "";
        program[i+1].opd1 = "";
        program[i+1].opd2 = "";
        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "call") && 
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "[esp]") && eq(program[i+1].opd2, "eax") &&
          eq(program[i+2].opcode, "pop") && eq(program[i+2].opd1, "eax")
          ) {
      /*
            call   rfib1                           ;; CALL rfib1 , 1 , 1
            mov    [esp], eax
            pop    eax                             ;; ADD
         =>
            call   rfib1                           ;; CALL rfib1 , 1 , 1
            add    esp, 4
       */

        program[i+1].opcode = "add"; program[i+1].opd1 = "esp"; program[i+1].opd2 = "4";
        program[i+2].opcode = ""; program[i+2].opd1 = ""; program[i+2].opd2 = "";
        backcomment(i+2, i+1); wipeout(i+2, 1);
        i -= 10; if (i < 0) i = 0; continue;
      }



    }

    // sequences of 2 instructions:
    if ((eq(program[i].psect, "text") ) &&
        (eq(program[i+1].psect, "text") ) && isnull(program[i+1].label)
        ) {

      if ((eq(program[i].opcode, "push") ) &&
	    ((eq(program[i].opd1, "eax") ) || (eq(program[i].opd1, "ecx") )) &&
          (eq(program[i+1].opcode, "pop") )
          ) { // sequences of 2 instructions:
	/*
           push eax; pop fred  =>   mov  eax, fred
	 */
        program[i].opcode = "mov";
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = program[i+1].opd1;

        program[i+1].opcode = ""; program[i+1].opd1 = "";
        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (  (eq(program[i].opcode, "push")  && (strncmp(program[i].opd1, "dword [", 7) == 0)) &&
            (eq(program[i+1].opcode, "pop") && (strncmp(program[i+1].opd1, "dword [", 7) == 0))
          ) {

        // stack-based design of code generator ensures that push followed by pop is never
	// done anywhere that a register holds any significant content

        program[i].opcode = "mov";
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = "eax"; // ANY REGISTER WILL DO!

        program[i+1].opcode = "mov";
        program[i+1].opd2 = program[i].opd1;
        i -= 10; if (i < 0) i = 0; continue;
      }

      if (  eq(program[i].opcode, "push")  && (eq(program[i].opd1, "eax") || eq(program[i].opd1, "ecx") || eq(program[i].opd1, "edx")) &&
            eq(program[i+1].opcode, "pop") && (strncmp(program[i].opd1, "dword [", 7) == 0)
          ) {

	/*
            push   edx
            pop    dword [xrem]                    ;; POP xrem
           =>
            mov dword [xrem], edx
	 */

        program[i].opcode = "mov";
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = program[i+1].opd1;

        program[i+1].opcode = "";
        program[i+1].opd2 = "";
        program[i+1].opd2 = "";

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (  eq(program[i].opcode, "push")
               && (strncmp(program[i].opd1, "dword ", 6) == 0) && (program[i].opd1[6] != '[') &&
            eq(program[i+1].opcode, "pop")
	 ) {

      //  push   dword 0                         ;; PUSH # 0
      //  pop    eax                             ;; RET 1        ; pass result back in register eax

        // stack-based design of code generator ensures that push followed by pop is never
	// done anywhere that a register holds any significant content

        program[i].opcode = "mov";
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = program[i+1].opd1;

#ifdef NEVER
{// use this snippet when testing changes
  static char x[1024];
  if (program[i].comment == NULL) {
    sprintf(x, "CHECK HERE");
  } else {
    sprintf(x, "%s ;; CHECK HERE", program[i].comment);
  }
  program[i].comment = strdup(x);
}
#endif

        program[i+1].opcode = "";
        program[i+1].opd1 = "";

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (  eq(program[i].opcode, "push") &&
            eq(program[i+1].opcode, "pop") && 
	      (eq(program[i+1].opd1, "eax") || eq(program[i+1].opd1, "ecx") || eq(program[i+1].opd1, "edx"))
	) {

      //  push   dword 0                         ;; PUSH # 0
      //  pop    eax                             ;; RET 1        ; pass result back in register eax

      /*
      push   dword [fp]                      ;; PUSH fp 
      pop    eax                             ;; RET 1        ; pass result back in register eax
       */

        // stack-based design of code generator ensures that push followed by pop is never
	// done anywhere that a register holds any significant content

        program[i].opcode = "mov";
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = program[i+1].opd1;

        program[i+1].opcode = "";
        program[i+1].opd1 = "";

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (  eq(program[i].opcode, "add") && eq(program[i].opd1, "esp") && eq(program[i].opd2, "4") &&
            eq(program[i+1].opcode, "push") && (strncmp(program[i+1].opd1, "dword [", 7) != 0)
	       ) { // successive procedure calls ( > 1 arg is a logical extension ...)

	/*
          NOTE: due to addressing limitations, the followingt *cannot* be optimised:

            push   dword [code]                    ;; PUSH code 
            call   stack                           ;; CALL stack , 1 , 0 
            add    esp, 4                          
            push   dword [text]                    ;; PUSH text 
            call   stack                           ;; CALL stack , 1 , 0 
            add    esp, 4                          
            push   dword [num]                     ;; PUSH num 
            call   stack                           ;; CALL stack , 1 , 0 
            add    esp, 4                          
            mov    ecx, dword [ci]                 ;; PUSH ci 

	 */
        program[i].opcode = "mov";
        program[i].opd1 = "[esp]";
        program[i].opd2 = program[i+1].opd1;

        program[i+1].opcode = "";
        program[i+1].opd1 = "";

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "add") && eq(program[i].opd1, "ecx") && eq(program[i].opd2, "eax") &&
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "ecx")
          ) { // sequences of 2 instructions:
	/*
            add    ecx, eax
            mov    eax, ecx

            ->  add ecx, eax
	 */
        char *swap = program[i].opd2;
        program[i].opd2 = program[i].opd1;
        program[i].opd1 = swap;

        program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") &&
          (eq(program[i+1].opcode, "cmp") || eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub"))
             && eq(program[i+1].opd1, "ecx") && eq(program[i+1].opd2, "eax")
          ) {
         /*
            mov    eax, dword 'z'                  ;; PUSH # 'z'  ;; CMPGT
            cmp    ecx, eax

            -> cmp    ecx, dword 'z'
	 */

	/*
            mov    ecx, eax                                        ; transfer fn result to the stack
            cmp    ecx, dword 0                    ;; PUSH # 0  ;; CMPLT 
	 */

        program[i].opcode = program[i+1].opcode;
        program[i].opd1 = program[i+1].opd1;

        program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }


      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
          (eq(program[i+1].opcode, "and") || eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub"))
             && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "ecx")
          ) {
        /*
            mov    ecx, dword 95                   ;; PUSH # 95  ;; BAND
            and    eax, ecx

            -> and    eax, dword 95
         */
        program[i].opcode = program[i+1].opcode;
        program[i].opd1 = program[i+1].opd1;

        program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && eq(program[i].opd2, "eax") &&
          eq(program[i+1].opcode, "cmp") && eq(program[i+1].opd1, "ecx")
          ) {

	/*
            mov    ecx, eax                                        ; transfer fn result to the stack
            cmp    ecx, dword 0                    ;; PUSH # 0  ;; CMPLT 

            -> cmp    eax, dword 0
	 */

        program[i].opcode = program[i+1].opcode;
        program[i].opd1 = program[i].opd2;
        program[i].opd2 = program[i+1].opd2;

        program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;

        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && eq(program[i].opd2, "eax") &&
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") &&
          eq(program[i].opd1, program[i+1].opd2)
          ) {

	/*
            mov    dword [i], eax                  ;; POP i
            mov    ecx, dword [i]                  ;; PUSH i
   =>
            mov    dword [i], eax                  ;; POP i
            mov    ecx, eax

                 if i.opd1 == i+1.opd2    - trivial case of register remembering

	 */

        program[i].opcode = "Mov"; // PROTECT IT FROM BEING MERGED BY AN "inc"
        program[i+1].opd2 = program[i].opd2; // trivial case of register remembering

        if (eq(program[i+1].opd1, program[i+1].opd2)) { // although this may be worth its own entry below!
          program[i+1].opcode = "";
          program[i+1].opd1 = "";
          program[i+1].opd2 = "";
          backcomment(i+1, i);
          wipeout(i+1, 1);
	}

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && (eq(program[i].opd2, "eax") || eq(program[i].opd2, "ecx")) &&
          eq(program[i+1].opcode, "mov") && (eq(program[i+1].opd1, "eax") || eq(program[i+1].opd1, "ecx")) &&
	    eq(program[i].opd2, program[i+1].opd1) &&
            eq(program[i].opd1, program[i+1].opd2)
          ) {

	/*
            mov    dword [i], eax                  ;; POP i
            mov    eax, dword [i]                  ;; PUSH i
=>
            mov    dword [i], eax

                 if i.opd1 == i+1.opd2    - trivial case of register remembering

	 */

        program[i+1].opcode = "";
        program[i+1].opd1 = "";
        program[i+1].opd2 = "";
        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (  eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
                (strncmp(program[i].opd2, "dword ", 6) == 0) && (program[i].opd2[6] != '[') && (program[i].opd2[6] != '-') &&
            (eq(program[i+1].opcode, "shr") || eq(program[i+1].opcode, "shl")) && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "cl")
	       ) {

	/*
            mov    ecx, dword 7                    ;; PUSH # 7  
                                                   ;; RSH 
            shr    eax, cl                         
           =>
            shr    eax, dword 7

	 */
        program[i].opcode = program[i+1].opcode;
        program[i].opd1 = "eax";
        sprintf(tmp, "byte %s", program[i].opd2+6);
        program[i].opd2 = strdup(tmp);

        program[i+1].opcode = "";
        program[i+1].opd1 = "";
        program[i+1].opd2 = NULL;
        backcomment(i+1, i);
        wipeout(i+1, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) &&
          eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "[ecx+eax*4]") && eq(program[i+1].opd2, "edx")
          ) {

	/*
            mov    ecx, c                          ;; PUSH ci
                                                   ;; POPI 4
            mov    [ecx+eax*4], edx                ;; PUSH ci

          =>

            mov    [c+eax*4], edx

	 */

        sprintf(tmp, "[%s+eax*4]", program[i].opd2);
        program[i+1].opd1 = strdup(tmp);

        program[i].opcode = "";
        program[i].opd1 = "";
        program[i].opd2 = "";
        backcomment(i, i-1);
        wipeout(i, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) &&
          eq(program[i+1].opcode, "mov")                             && eq(program[i+1].opd2, "dword [ecx+eax*4]")
          ) { // now the reverse version

	/*
            mov    ecx, a                          
            mov    eax, dword [ecx+eax*4]          

          =>

            mov    eax, [a+eax*4]

	 */

        sprintf(tmp, "dword [%s+eax*4]", program[i].opd2);
        program[i+1].opd2 = strdup(tmp);

        program[i].opcode = "";
        program[i].opd1 = "";
        program[i].opd2 = "";
        backcomment(i, i-1);
        wipeout(i, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

      if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "edx") && (strncmp(program[i].opd2, "dword [", 7) != 0) &&
                                                                        // try it, and see what combinations are rejected?
          eq(program[i+1].opcode, "mov") && 

                   (eq(program[i+1].opd1 + strlen(program[i+1].opd1) - strlen("+eax*4]"), "+eax*4]")) && eq(program[i+1].opd2, "edx")
          ) {

	/*
            mov    edx, ecx                        ;; PUSH & a
                                                   ;; PUSH fp
                                                   ;; POPI 4
            mov    [a+eax*4], edx

          =>

            mov    [a+eax*4], ecx

	 */

        program[i+1].opd2 = program[i].opd2;

        program[i].opcode = "";
        program[i].opd1 = "";
        program[i].opd2 = "";
        backcomment(i, i-1);
        wipeout(i, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

    }

    // single instructions:

    if (eq(program[i].opcode, "mov") && eq(program[i].opd1, program[i].opd2)) {
      program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL;
      backcomment(i, i-1);
      wipeout(i, 1);
      i -= 10; if (i < 0) i = 0; continue;
    }

    i += 1;

  }


  // now we do optimisations that break the default assumptions relied on above
  // When these are done, you have to be very careful what you peephole afterwards

  i = 0;
  for (;;) {
    if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file)


      if (eq(program[i].opcode, "mov") && 
          eq(program[i+1].opcode, "cmp") && isnull(program[i+1].label) &&
                iscondjump(program[i+2].opcode) && isnull(program[i+2].label) &&
                eq(program[i].opd1, program[i+1].opd1)
          ) {
        int next = i+3, optimised = FALSE;
        while (eq(program[next].opcode, "mov") && isnull(program[next].label) && eq(program[next].psect, "text") &&
               eq(program[next+1].opcode, "cmp") && isnull(program[next+1].label) &&
               iscondjump(program[next+2].opcode) && isnull(program[next+2].label) &&
	       eq(program[i].opd1, program[next].opd1) && eq(program[next].opd1, program[next+1].opd1) &&
	       eq(program[i].opd2, program[next].opd2)
          ) {
          program[next].opcode = ""; program[next].opd1 = ""; program[next].opd2 = NULL; 
	  backcomment(next, next-1);
	  wipeout(next, 1);
          next = next + 2;
          optimised = TRUE;
	}
	if (optimised) {
          program[i].opcode = "Mov";
          // i -= 10; if (i < 0) i = 0; continue;
	}
      }

    i += 1;
  }

  i = 0;
  for (;;) {
    if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file)


      if (  eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
            eq(program[i+1].opcode, "cmp") && eq(program[i+1].opd1, "ecx") &&
                (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && (program[i+1].opd2[6] != '-')
	 ) {

	/*

            mov    ecx, dword [code]               ;; PUSH code 
            cmp    ecx, dword 'F'                  ;; PUSH # 'F'  
           =>
            cmp    dword [code], dword 'F'

           This optimisation should be done *after* the skip chains

	 */
        program[i+1].opd1 = program[i].opd2;

        program[i].opcode = "";
        program[i].opd1 = "";
        program[i].opd2 = NULL;
        backcomment(i, i-1);
        wipeout(i, 1);

        i -= 10; if (i < 0) i = 0; continue;
      }

    i += 1;
  }


}

void dump(void)
{
  int i;
  plant("text", "__end_of_program", "", "", NULL, "End of program");

  if (opt_optimiser) {
    plant("text", "", "", "", NULL, "(a few dummies for the optimizer's fenceposting)");
    plant("text", "", "", "", NULL, "...");
    plant("text", "", "", "", NULL, "...");
    plant("text", "", "", "", NULL, "...");
    plant("text", "", "", "", NULL, "...");

    optimise();
  }

    // Should print all .bss and .data entries first, leaving only .text
    // - this will allow optimising over instruction sequences which were broken
    // by inline data.  Remember to wipeout the deleted entries.

  for (i = 0; i < next_free_entry; i++) {
    if (eq(program[i].label, "__end_of_program")) break;
    if (eq(program[i].psect, "data")) reap(
	 program[i].psect,
	 program[i].label,
	 program[i].opcode,
	 program[i].opd1,
	 program[i].opd2,
	 program[i].comment
	 );
  }
  for (i = 0; i < next_free_entry; i++) {
    if (eq(program[i].label, "__end_of_program")) break;
    if (eq(program[i].psect, "bss")) reap(
	 program[i].psect,
	 program[i].label,
	 program[i].opcode,
	 program[i].opd1,
	 program[i].opd2,
	 program[i].comment
	 );
  }
  for (i = 0; i < next_free_entry; i++) {
    if (eq(program[i].label, "__end_of_program")) break;
    if (eq(program[i].psect, "text")) reap(
	 program[i].psect,
	 program[i].label,
	 program[i].opcode,
	 program[i].opd1,
	 program[i].opd2,
	 program[i].comment
	 );
  }
}

// ===================================================================================



int bestparse;        // for error reporting.
char *looking_for;    // 'while looking for <PHRASENAME>' (or literal) ...

void indent(int depth, FILE *f)
{
    int i;
    for (i = 0; i < depth; i++) fprintf(f, "  ");
}

int startparse(int pp) // depth is only for indentation in diags
{
  bestparse = -1; // for error reporting.
  looking_for = "<UNKNOWN>";    // 'while looking for <PHRASENAME>' (or literal) ...
  parse(pp, 0);
}


static int nexttmp = 1;
int parse(int pp, int depth) // depth is only for indentation in diags
{
  /*  Main parsing procedure.  This is a table-driven parser, with the tables
      being generated from the grammar rules embedded in the 'compile' procedure
      below.  The result of the parse is a tree structure, and the values of the
      nodes in the tree structure are used to drive a large 'case' statement
      which selects the actions to be performed after a successful parse.

      There is no grammatical structure embedded in this procedure.  If you're
      looking for the grammar definition, see the procedure called 'compile' instead.
   */
  int saved_cp, saved_ap, i, gp, alts, match;
  char saved_desc[256];

  /*  Initialisation */

  gp = phrase_start[pp-512-MAX_BIP];
  alts = gram[gp];

  /*  Debugging */
#ifdef DEBUG_PARSER
  if (debug_parser) {
    fprintf(outfile, "\n");
    indent(depth, outfile);
    fprintf(outfile, "Phrase %s/%d (%d alternatives) = ", phrasename[pp-512], pp, alts);
    fflush(outfile);
  }
#endif
  
  gp++; // gp now points to first element (length) of first alt

  saved_cp = cp;
  saved_ap = ap;
  

  for (i = 0; i < alts; i++) {
    /*  Starting with the root phrase, recursively examine each alternative */
    int each, phrases = gram[gp++], phrase_count, gap = 0;

    cp = saved_cp;
    ap = saved_ap;

    if (ap+3 > next_free_a) next_free_a = ap+3;
//    makespace(A, next_free_a, a_size);

    A[ap++] = pp;   // record which phrase (could be done outside loop)
    A[ap++] = i;    // and which alt.


    // Count slots needed.  *Could* be precalculated and stored
    // in the grammar, either embedded (after the ALT) or as a
    // separate table

    for (each = 0; each < phrases; each++) if (gram[gp+each] >= 512) gap++;

    A[ap++] = gap;    // Count of alts (gap)
    // ap+gap now points to the slot after the space required, which
    // is where the first subphrase will be stored.
    ap = ap+gap; // recursive subphrases are stored after this phrase.
                 // ap is left updated if successful.


    // successfully parsed phrases are stored in A[saved_ap+3+n]

    if (saved_ap+3+gap > next_free_a) next_free_a = saved_ap+3+gap;
//    makespace(A, next_free_a, a_size);

    /*  Debug */
    // this loop is only for diagnostics
#ifdef DEBUG_PARSER
    if (debug_parser) {
      char *saved_descp;
      fprintf(outfile, "\n");
      indent(depth, outfile);
      fprintf(outfile, "Alternative %d: (%d phrases) ", i+1, phrases);
      saved_descp = saved_desc; *saved_descp = '\0';
      for (each = 0; each < phrases; each++) {
        int phrase = gram[gp+each];
        if (phrase < 256) {
          saved_descp += sprintf(saved_descp, " '%c'", phrase);
        } else if (phrase < 512) {
          saved_descp += sprintf(saved_descp, " \"%s\"/%d", keyword[phrase-256], phrase-256);
        } else if (phrase < 512+MAX_BIP) {
          saved_descp += sprintf(saved_descp, " {%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]);
        } else {
          saved_descp += sprintf(saved_descp, " <%s/%d>", phrasename[phrase-512], phrase);
        }
      }
      fprintf(outfile, "%s\n", saved_desc);
      fflush(outfile);
    }
#endif
    

    match = TRUE; // stays true if all subphrases match
    phrase_count = 0; // only phrases which make it into the A record,
                      // i.e. excluding literals and keywords
    for (each = 0; each < phrases; each++) {
      /*  Within a single grammar rule (alternative), ensure that each subphrase is present */
      int phrase = gram[gp+each];

      /*  Debug */
#ifdef DEBUG_PARSER
      if (debug_parser) {
        indent(depth, outfile);
        fprintf(outfile, "Input token stream = '%s' '%s' '%s' ...\n",
          (cp < nextfree ? c[cp].s : "EOF"),
          (cp+1 < nextfree ? c[cp+1].s : "EOF"),
          (cp+2 < nextfree ? c[cp+2].s : "EOF"));
      }
#endif
      if (cp > bestparse) {
        static char s[128];
#ifdef DEBUG_PARSER
        if (phrase < 256) {
          sprintf(s, "'%c'", phrase);
        } else if (phrase < 512) {
          sprintf(s, "\"%s\"", keyword[phrase-256]);
        } else if (phrase < 512+MAX_BIP) {
          sprintf(s, "{%s}", phrasename[phrase-512]);
        } else {
          sprintf(s, "<%s>", phrasename[phrase-512]);
        }
#endif
        looking_for = s;
        bestparse = cp;
      }
#ifdef DEBUG_PARSER
      if (debug_parser) indent(depth, outfile);
#endif
      

      if (phrase < 256) {
	/*  Literal */
#ifdef DEBUG_PARSER
        if (debug_parser) fprintf(outfile, "'%c'", phrase);
#endif
        if ((c[cp].t != TYPE_CHAR) || (c[cp].s[0] != phrase)) match = FALSE; else cp++;
        // Don't record literals
        
      } else if (phrase < 512) {
	/*  Keyword */
#ifdef DEBUG_PARSER
        if (debug_parser) fprintf(outfile, "\"%s\"/%d", keyword[phrase-256], phrase-256);
#endif
        if (strcmp(keyword[phrase-256], c[cp].s) != 0) match = FALSE; else cp++;
        // Don't record keywords
        
      } else if (phrase < 512+MAX_BIP) {
	/*  Built-in phrase */
        int where = ap; // next phrase to be parsed will be stored at current 'ap'.
#ifdef DEBUG_PARSER
        if (debug_parser) fprintf(outfile, "{%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]);
#endif
        if (c[cp].t != BIP[phrase-512]) match = FALSE; else {
          A[ap++] = phrase;
          A[ap++] = 1;
          A[ap++] = 1;
          A[ap++] = cp++;
          A[saved_ap+3+phrase_count++] = where; // Record BIP
        }
      } else {
	/*  Recursive call to parser for a subphrase */
        int where = ap; // next phrase to be parsed will be stored at current 'ap'.
#ifdef DEBUG_PARSER
        if (debug_parser) fprintf(outfile, "<%s/%d>", phrasename[phrase-512], phrase);
#endif
        if (!parse(phrase, depth+1)) match = FALSE; else {
          A[saved_ap+3+phrase_count++] = where;
        }
      }
      /*  debug */
#ifdef DEBUG_PARSER
      if (debug_parser) {
        fprintf(outfile, "\n");
        indent(depth, outfile);
        fprintf(outfile, "Tried alternative %d: %s - result was %s\n", each+1, saved_desc, (match ? "TRUE" : "FALSE"));
        fflush(outfile);
      }
#endif
      if (!match) break;
    }
    gp += phrases; // move over all phrases, to next alt

    if (match) break;
#ifdef DEBUG_PARSER
    else if (debug_parser) {
      indent(depth, outfile);
      fprintf(outfile, "** Alternative %d FAILED.\n", i+1);
    }
#endif
    // gp now points to first element (length) of next alt, or start of next phrase
  }

  return(match);

}

static char debugstr[1024];
static char *debugs;

static int paramoffset = 1;
static int next_string_const = 1000;
char *compile(int ap)
{
    static char arg[128];
    static int comparison_alt_cache;
    int saved_ap;
    int phrase;  // A[ap] is the phrase number. A[ap+1] is the alt.
    int alt;     // For consistency, in BIPs, the Alt should always be 1
                 // although that may not be the case at the moment :-(
    int phrases; // defined subphrases
    int i;
    int condno;

    // The following ecce command executed on this file will generate varcalc.g:
    // ecce -c "(v.//\\.s..(v/ /e)?m,k)0;%c" varcalc.c varcalc.g
    // May later tweak takeon.c to read from varcalc.c rather than varcalc.g
    // thus simplifying build process and becoming more like yacc.

    saved_ap = ap;
    phrase = A[ap++];
    alt = A[ap++];
    phrases = A[ap++];

#ifdef DEBUG
    fprintf(outfile, "compile(A[%d]) phrase=%s\n", saved_ap, phrasename[phrase-512]);
#endif

    switch (phrase) {

//\\ B<EOF> = 0;
	case P_EOF:
	    fprintf(stderr, "EOF!\n");
            dump();
	    exit(0);

//\\ B<TAG> = 1;
	case P_TAG:
	    return NULL;

//\\ B<STRING> = 2;
	case P_STRING:

//\\ B<CHARCONST> = 3;
	case P_CHARCONST:

//\\ B<INT> = 5;
	case P_INT:

//\\ B<COMMENT> = 8;
	case P_COMMENT:
	    break;

//\\ B<NEWLINE> = 9;
	case P_NEWLINE:

//\\ P<SS> = <EOF>, <LABEL> <NEWLINE>, <INSTR> <NEWLINE>, <dotPROC> <NEWLINE>, <dotEND> <NEWLINE>, <dotWORD> <NEWLINE>, <dotCONST> <NEWLINE>, <dotARRAY> <NEWLINE>, <OPTCOMMENT> <NEWLINE>, <SYNTAXERROR>;
	case P_SS:
	    if (alt == 0) {
              dump();
              exit(0); // or would 'break' be cleaner here?
	    }
	    if (alt < 8) {
	        int cx;

#ifdef NEVER
                debugs = debugstr;
                debugs += sprintf(debugs, "          SECTION .data\n");
                debugs += sprintf(debugs, "line%d:  db \"", lineno);
                hack = debugs;
	        for (cx = 0; cx < cp-1; cx++) {
                  if (c[cx].t == TYPE_CHARCONST) {
                    if (*c[cx].s == '"') {
		      debugs += sprintf(debugs, "'\", %d, \"", *c[cx].s);
		    } else {
		      debugs += sprintf(debugs, "'%s' ", c[cx].s);
		    }
		  } else {
		    debugs += sprintf(debugs, "%s ", c[cx].s);
		  }
		}
                fprintf(outfile, "                                        ;; %s\n", hack);
		debugs += sprintf(debugs, "\", 10, 0\n");
                debugs += sprintf(debugs, "          SECTION .text\n");

		debugs += sprintf(debugs, "          pop eax\n");
		debugs += sprintf(debugs, "          push eax\n");
		debugs += sprintf(debugs, "          push eax\n");
		debugs += sprintf(debugs, "          push dword fmt_tos    ; address of %%s format string\n");
		debugs += sprintf(debugs, "          call printf           ; Call C function\n");
		debugs += sprintf(debugs, "          add esp, 8            ; pop 2 params\n");

		debugs += sprintf(debugs, "          push dword line%d      ; address of string\n", lineno);
		debugs += sprintf(debugs, "          push dword fmt_str    ; address of %%s format string\n");
		debugs += sprintf(debugs, "          call printf           ; Call C function\n");
		debugs += sprintf(debugs, "          add esp, 8            ; pop 2 params\n");
#else
                hack = debugs = debugstr;
	        for (cx = 0; cx < cp-1; cx++) {
                  if (c[cx].t == TYPE_CHARCONST) {
                    if (*c[cx].s == '"') {
		      debugs += sprintf(debugs, "'\", %d, \"", *c[cx].s);
		    } else {
		      debugs += sprintf(debugs, "'%s' ", c[cx].s);
		    }
		  } else {
		    debugs += sprintf(debugs, "%s ", c[cx].s);
		  }
		}
#endif
		compile(A[ap]);
	    }
	    return NULL;

//\\ P<SYNTAXERROR> = ;
        case P_SYNTAXERROR:
	    fprintf(stderr, "Parse failed looking for %s\n", looking_for);
	    return NULL;
//\\ P<CALL> = "CALL" <TAG> ',' <INT> ',' <INT>;
	case P_CALL:
	    if (strcmp(c[A[A[ap]+3]].s, "psym") == 0) { // special for 'print "xxx"'
              plant("text", "", "push", "dword fmt_chr", NULL, "address of format string");
	      plant("text", "", "call", "printf", NULL, "Call C function");
              plant("text", "", "add", "esp", "8", "pop 2 params");
	    } else {
	      plant("text", NULL, "call", c[A[A[ap]+3]].s, NULL, NULL);
              if (atoi(c[A[A[ap+1]+3]].s) != 0) {
                plant("text", "", "add", "esp", itoaf("%d", atoi(c[A[A[ap+1]+3]].s)*4), NULL);
	      }
              if (atoi(c[A[A[ap+2]+3]].s) != 0) {
		plant("text", "", "push", "eax", NULL, NULL);
	      }
            }
	    return NULL;

//\\ P<dotPROC> = <LABEL> '.' "PROC";
	case P_dotPROC:
	    paramoffset = 1; // somewhat hacky, used with .PARAM
            plant("text", c[A[A[A[ap]]+3]].s, "push", "ebp", NULL, NULL);
            plant("text", "", "mov", "ebp", "esp", NULL);
            return NULL;

//\\ P<dotEND> = '.' "END";
	case P_dotEND:
	    plant("text", "", "mov", "esp", "ebp", NULL);
	    plant("text", "", "pop", "ebp", NULL, NULL);
            // fprintf(outfile, "          mov     eax,0           ; normal, no error, return value\n"); // drop-through case in void procedure.
            // ... if it was a function with a result, this is wrong anyway...
	    plant("text", "", "ret", "", NULL, NULL);
            dump();
	    exit(0); // might be nicer to find a cleaner way to break out of main parse loop...

//\\ P<dotWORD> = <LABEL> '.' "WORD" <INT>;
        case P_dotWORD:
#ifdef ORIG
	  plant("bss",c[A[A[A[ap]]+3]].s, "resd", c[A[A[ap+1]+3]].s, NULL, NULL);
#else
            if (atoi(c[A[A[ap+1]+3]].s) == 1) {
	      plant("data",c[A[A[A[ap]]+3]].s, "dd", "$80808080", NULL, NULL);
	    } else {
	      plant("data",c[A[A[A[ap]]+3]].s, itoaf("times %d dd", atoi(c[A[A[ap+1]+3]].s)), "$80808080", NULL, NULL);
	    }
#endif
            return NULL;

//\\ P<dotARRAY> = <LABEL> '.' "ARRAY";
	case P_dotARRAY:
	    plant("data", c[A[A[A[ap]]+3]].s, "", "", NULL, NULL);
            return NULL;

//\\ P<dotCONST> =  '.' "CONST" <INT>;
        case P_dotCONST:
	    plant("data", "", "dd", c[A[A[ap]+3]].s, NULL, NULL);
            return NULL;

//\\ P<EXIT> = "EXIT";
        case P_EXIT:  // does this require a RET first?  End of program?
//\\ P<RET> = "RET" <INT>;
        case P_RET:
	    if (atoi(c[A[A[ap]+3]].s) != 0) {
	      plant("text", "", "pop", "eax", NULL, NULL);
	    }
	    plant("text", "", "mov", "esp", "ebp", NULL);
	    plant("text", "", "pop", "ebp", NULL, NULL);
            if ((phrase == P_EXIT) /* || (atoi(c[A[A[ap]+3]].s) == 0) */ ) {
	      // Do we really need to return 0 status code from void function?  Let's omit it and see...
	      // OK... can omit for internal functions, but MAIN is called as an int function
	      plant("text", "", "mov", "eax", "0", NULL);
	    }
	    plant("text", "", "ret", "", NULL, NULL);
	    if (phrase == P_EXIT) {
              dump();
              exit(0);
	    }
	    return NULL;


//\\ P<INSTR> = <OPTLABEL> <ASSEM> <OPTCOMMENT>;
	case P_INSTR:
	    compile(A[ap]);
	    compile(A[ap+1]);
	    return NULL;

//\\ P<ASSEM> = <ADD>, <SUB>, <MUL>, <DIV>, <MOD>, <NEG>, <NOT>, <BAND>, <BOR>, <LSH>, <RSH>,
//\\            <CALL>, <RET>, <PUSHSTR>, <PUSHLIT>, <PUSHIND>, <PUSHADDR>, <PUSHVAR>, <POPIND>, <POPVAR>,
//\\            <PARAM>, <CMP>, <BT>, <BF>, <B>, <PRINT>, <EXIT>, <UNKNOWN>;
	case P_ASSEM:
	    compile(A[ap]);
	    return NULL;

//\\ P<CMP> = <COND>;
	case P_CMP:
	    condno =  A[A[ap]+1];
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "cmp", "ecx", "eax", NULL);
#ifdef STUPID_COMP_SHORTCUT
            comparison_alt_cache = condno;
#else
    // See http://faydoc.tripod.com/cpu/setge.htm for SETGE instruction as an alternative?  
#ifdef branching
	    fprintf(outfile, "          j"); compile(A[ap]); fprintf(outfile, " T_%d\n", nexttmp);
	    fprintf(outfile, "          push dword 1   ; false\n");
	    fprintf(outfile, "          jmp T_%d\n", nexttmp+1);
	    fprintf(outfile, "T_%d:     push dword 0   ; true\n", nexttmp);
	    fprintf(outfile, "T_%d:", nexttmp+1);
            nexttmp += 2;
#else
	    //	    fprintf(outfile, "          set"); compile(A[ap]); fprintf(outfile, " al\n");
	    //	    fprintf(outfile, "          xor ah,ah\n");
	    //	    fprintf(outfile, "          push word 0\n");
	    //	    fprintf(outfile, "          push ax");
            plant("text", "", "mov", "eax", "0", NULL);
	    //	    fprintf(outfile, "          mov ah,0\n");
	    //	    fprintf(outfile, "          cwde\n");
            sprintf(arg, "set%s", cond[condno]); // <COND>
            plant("text", "", arg, "al", NULL, NULL);
	    //fprintf(outfile, "          set"); compile(A[ap]); fprintf(outfile, " al\n");
            plant("text", "", "push", "eax", NULL, NULL);

	    /*
              Rainer suggests the much improved:

	      cmp <whatever>
	      setl al
	      movzx eax,al
	      push eax	    
             */
#endif
#endif
	    return NULL;

//\\ P<COND> = "CMPGE", "CMPLT", "CMPLE", "CMPGT", "CMPEQ", "CMPNE"; 
           case P_COND:
             // fprintf(outfile, "%s", cond[alt]); handle a level up now
             return NULL;

//\\ P<BF> = "BF" <TAG>;
	case P_BF:
#ifdef STUPID_COMP_SHORTCUT
	    plant("text", "", jcond[comparison_alt_cache ^ 1], c[A[A[ap]+3]].s, NULL, NULL);
	    //fprintf(outfile, "          j%s %s", cond[comparison_alt_cache ^ 1], c[A[A[ap]+3]].s);
#else
	    fprintf(outfile, "          pop eax\n");
	    //	    fprintf(outfile, "          cmp eax, 0\n");
	    fprintf(outfile, "          or eax, eax\n");
	    fprintf(outfile, "          je %s", c[A[A[ap]+3]].s);
#endif
	    return NULL;

//\\ P<BT> = "BT" <TAG>;
	case P_BT:
#ifdef STUPID_COMP_SHORTCUT
	    plant("text", "", jcond[comparison_alt_cache], c[A[A[ap]+3]].s, NULL, NULL);
	    // fprintf(outfile, "          j%s %s", cond[comparison_alt_cache], c[A[A[ap]+3]].s);
#else
	    fprintf(outfile, "          pop eax\n");
	    //	    fprintf(outfile, "          cmp eax, 0\n");
	    fprintf(outfile, "          or eax, eax\n");
	    fprintf(outfile, "          jne %s", c[A[A[ap]+3]].s);
#endif
	    return NULL;

//\\ P<B> = "B" <TAG>;
	case P_B:
	    plant("text", "", "jmp", c[A[A[ap]+3]].s, NULL, NULL);
	    return NULL;

//\\ P<MUL> = "MUL";
	case P_MUL:
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "imul", "ecx", NULL, "(edx:eax = eax*ecx)");
	    plant("text", "", "push", "eax", NULL, NULL);
	    return NULL;

//\\ P<DIV> = "DIV";
	case P_DIV:
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "mov", "edx", "0", NULL);
	    plant("text", "", "idiv", "ecx", NULL, "(div -> eax, remainder -> edx)");
	    plant("text", "", "push", "eax", NULL, NULL);
	    return NULL;

//\\ P<MOD> = "MOD";
	case P_MOD:
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "mov", "edx", "0", NULL);
	    plant("text", "", "idiv", "ecx", NULL, "(div -> eax, remainder -> edx)");
	    plant("text", "", "push", "edx", NULL, NULL);
	    return NULL;

//\\ P<ADD> = "ADD";
	case P_ADD:
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "add", "ecx", "eax", NULL);
	    plant("text", "", "push", "ecx", NULL, NULL);
	    //fprintf(outfile, "          pop eax\n");
	    //fprintf(outfile, "          add [esp],eax\n");
	    return NULL;

//\\ P<SUB> = "SUB";
	case P_SUB:
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "sub", "ecx", "eax", NULL);
	    plant("text", "", "push", "ecx", NULL, NULL);
	    //fprintf(outfile, "          pop eax\n");
	    //fprintf(outfile, "          sub [esp],eax\n");
	    return NULL;

//  <NEG>, <NOT>, <LSH>, <RSH>,

//\\ P<NEG> = "NEG";
        case P_NEG:
	  //	    fprintf(outfile, "          pop eax\n");
	  //	    fprintf(outfile, "          neg eax\n");
	  //	    fprintf(outfile, "          push eax");
	    plant("text", "", "neg", "dword [esp]", NULL, NULL);
            return NULL;

//\\ P<NOT> = "NOT";
        case P_NOT:
	  //	    fprintf(outfile, "          pop eax\n");
	  //	    fprintf(outfile, "          not eax\n");
	  //	    fprintf(outfile, "          push eax");
	    plant("text", "", "not", "dword [esp]", NULL, NULL);
            return NULL;

//\\ P<LSH> = "LSH";
        case P_LSH:
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "shl", "eax", "cl", NULL);
	    plant("text", "", "push", "eax", NULL, NULL);
            return NULL;

//\\ P<RSH> = "RSH";
        case P_RSH:
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "shr", "eax", "cl", NULL);
	    plant("text", "", "push", "eax", NULL, NULL);
            return NULL;

//\\ P<BAND> = "BAND", "LAND";
        case P_BAND:  // cheap hack for now
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "and", "eax", "ecx", NULL);
	    plant("text", "", "push", "eax", NULL, NULL);
            return NULL;

//\\ P<BOR> = "BOR", "LOR";
    case P_BOR: // ditto
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "or", "eax", "ecx", NULL);
	    plant("text", "", "push", "eax", NULL, NULL);
            return NULL;

//\\ P<PRINT> = "PRINT";
	case P_PRINT:
	    plant("text", "", "push", "dword fmt_str", NULL, "address of %s format string");
	    plant("text", "", "call", "printf", NULL, "external call to C");
	    plant("text", "", "add", "esp", "8", NULL);
	    return NULL;

//\\ P<PUSHLIT> = "PUSH" '#' <NUM>;
        case P_PUSHLIT:      // changed from OPD to NUM for the moment
	    sprintf(arg, "dword %s", compile(A[ap]));
	    plant("text", "", "push", arg, NULL, NULL);
	    //fprintf(outfile, "          push dword "); compile(A[ap]);
	    return NULL;

//\\ P<PUSHSTR> = "PUSHS" <STRING>;
	case P_PUSHSTR:
	    //fprintf(outfile, "          SECTION .data\n");
            sprintf(arg, "\"%s\"", c[A[A[ap]+3]].s); // TO DO!  String const needs escaping?
            plant("data", itoaf("string%d", ++next_string_const), "db", arg, "0", NULL);
            //fprintf(outfile, "string%d: db      \"%s\", 0\n", ++next_string_const, c[A[A[ap]+3]].s);
            //fprintf(outfile, "          SECTION .text\n");
	    plant("text", "", "push", itoaf("dword string%d", next_string_const), NULL, NULL);
	    return NULL;

//\\ P<PUSHIND> = "PUSHI" <NUM>;
	case P_PUSHIND:
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "push", "dword [ecx+eax*4]", NULL, NULL);
	    return NULL;

//\\ P<PUSHVAR> = "PUSH" <TAG>;
	case P_PUSHVAR:
	    sprintf(arg, "dword [%s]", c[A[A[ap]+3]].s);
	    plant("text", "", "push", arg, NULL, NULL);
	    return NULL;

//\\ P<PUSHADDR> = "PUSH" '&' <TAG>;
	case P_PUSHADDR:
	    plant("text", "", "push", c[A[A[ap]+3]].s, NULL, NULL);// if this is a procedure param it is different from a static
	    return NULL;

//\\ P<POPIND> = "POPI" <NUM>;
	case P_POPIND:
	  //	    fprintf(outfile, "          pop ecx\n");
	  //	    fprintf(outfile, "          pop dword [ecx]");
	    plant("text", "", "pop", "eax", NULL, NULL);
	    plant("text", "", "pop", "ecx", NULL, NULL);
	    plant("text", "", "pop", "edx", NULL, NULL);
	    plant("text", "", "mov", "[ecx+eax*4]", "edx", NULL);
	    return NULL;

//\\ P<POPVAR> = "POP" <TAG>;
	case P_POPVAR:
	    sprintf(arg, "dword [%s]", c[A[A[ap]+3]].s);
	    plant("text", "", "pop", arg, NULL, NULL);
	    return NULL;

//\\ P<PARAM> = '.' "PARAM" <TAG>;
	case P_PARAM:
	    sprintf(arg, "%s ebp+%0d", c[A[A[ap]+3]].s, 4*++paramoffset);
	    plant("text", "", "%define", arg, NULL, NULL);
	    //fprintf(outfile, "%%define %s ebp+%0d\n", c[A[A[ap]+3]].s, 4*++paramoffset);
            plant("text", "", "sub", "esp", "4", NULL);
	    return NULL;

//\\ P<UNKNOWN> = <TAG> <OPTOPD>;
	case P_UNKNOWN:
	    fprintf(outfile, "          %s; UNKNOWN OPCODE", c[A[A[ap]+3]].s);
	    compile(A[ap+1]);
	    return NULL;

//\\ P<OPD> = <NUM>, <TAG>, ;
	case P_OPD:
	    if (alt == 0) fprintf(outfile, "%s", compile(A[ap]));
	    else if (alt == 1) fprintf(outfile, "%s", c[A[A[ap]+3]].s);
	    return NULL;

//\\ P<NUM> = <CHARCONST>, <INT>;
	case P_NUM:
	  {static char last_num[128], *p;
            p = last_num;
	    if (alt == 0) {
                p += sprintf(p, "'");
                if (*c[A[A[ap]+3]].s == '\n') {
                  p += sprintf(p, "\\n");
		} else {
                  p += sprintf(p, "%s", c[A[A[ap]+3]].s);
		}
                if (*c[A[A[ap]+3]].s == '\'') p += sprintf(p, "'");
                p += sprintf(p, "'");
	    } else {
 	        p += sprintf(p, "%s", c[A[A[ap]+3]].s);
	    }
            //fprintf(outfile, "%s", last_num);
	    return last_num;
	  }

//\\ P<LABEL> = <TAG> ':';
	case P_LABEL:
	    plant("text", c[A[A[A[ap]]+3]].s, "", "", NULL, NULL);
	    // now handled up a level except for OPTLABELs in front of regular code (ie jump dests)
	    if (strcmp(c[A[A[A[ap]]+3]].s, "__start") == 0) {
              plant("text", "main", "push", "ebp", NULL, "the program label for the entry point");
              plant("text", "", "mov", "ebp", "esp", "set up incoming call stack frame");
	      /*
                  To make ecce work, we need some sort of initialisation like this:

  if (argc != 3)
    {
      fprintf (stderr, "syntax: %s infile outfile\n", argv[0]);
      exit (1);
    }
  if (strcmp (argv[1], argv[2]) == 0)
    {
      fprintf (stderr, "%s: output file cannot overwrite input file\n",
               argv[0]);
      exit (1);
    }
  infile = fopen (argv[1], "r");
  if (infile == NULL)
    {
      fprintf (stderr, "%s: cannot read file '%s'\n", argv[0], argv[1]);
      exit (1);
    }
  outfile = fopen (argv[2], "w");
  if (outfile == NULL)
    {
      fprintf (stderr, "%s: cannot write file '%s'\n", argv[0], argv[2]);
      exit (1);
    }


               Note that the unix calling convention has argc at [esp] and the args following.


	       */
	    }

	    return NULL;

//\\ P<OPTLABEL> = <LABEL>, ;
	case P_OPTLABEL:
	    if (alt == 0) compile(A[ap]);
	    return NULL;

//\\ P<OPTOPD> = <OPD>, ;
	case P_OPTOPD:
	    if (alt == 0) compile(A[ap]);
	    return NULL;

//\\ P<OPTCOMMENT> = <COMMENT>, ;
	case P_OPTCOMMENT:

//\\ E
	    break;
    }
}

int main(int argc, char **argv)
{
  char outname[256], *s;

    Progname = argv[0];

    if ((argc >= 2) && (strcmp(argv[1], "-d") == 0)) {
      debug_parser = TRUE; argc -= 1; argv += 1;
    }

    if ((argc >= 2) && (strcmp(argv[1], "-O") == 0)) {
      opt_optimiser = TRUE; argc -= 1; argv += 1;
    }

    if (argc != 2) {
      fprintf(stderr, "syntax: %s [-d] file.asm\n", Progname); exit(1);
    }

    sprintf(outname, "%s", argv[1]);
    s = strrchr(outname, '.');
    if (s == NULL) s = outname+strlen(outname);
    sprintf(s, "%s", ".S");

    infile = fopen(argv[1], "r");
    if (infile == NULL) {
      fprintf(stderr, "%s: cannot open input %s - %s\n", Progname, argv[1], strerror(errno)); exit(2);
    }

    outfile = fopen(outname, "w");
    if (outfile == NULL) {
      fprintf(stderr, "%s: cannot open output %s - %s\n", Progname, outname, strerror(errno)); exit(3);
    }

    fprintf(outfile, "; Compile with: nasm -O1 -f elf %s\n", outname);
    plant("text", "", "global", "main", NULL, "the standard gcc entry point");
    plant("text", "", "extern", "printf", NULL, NULL);
    plant("text", "", "extern", "getchar", NULL, NULL);
    plant("text", "", "extern", "putchar", NULL, NULL);
    plant("text", "", "extern", "fgetc", NULL, NULL);
    plant("text", "", "extern", "fputc", NULL, NULL);
    plant("text", "", "extern", "fflush", NULL, NULL);
    plant("text", "", "extern", "exit", NULL, NULL);
    plant("text", "", "extern", "stdin", NULL, NULL);
    plant("text", "", "extern", "stdout", NULL, NULL);

    plant("data", "fmt_str", "db", "\"%s\"", "0", NULL);
    plant("data", "fmt_chr", "db", "\"%c\"", "0", NULL);
    plant("data", "fmt_tos", "db", "\"TOS: %08x\"", "10, 0", NULL);


    for (;;) {
        nextfree = 0; arraysize = 0;
        next_free_a = 0; a_size = 0;
        cp = 0;  // code pointer.  Has to be global state.
        ap = 0;  // Analysis record pointer.

	// parse on a line-by-line basis
        line_reconstruction();

#ifdef DEBUG_LEXER
	if (debug_lexer) {
	    int i; // DEBUG ONLY
	    fprintf(stderr, "\nLexical token stream:\n\n");
	    for (i = 0; i < nextfree; i++) {
		fprintf(stderr, "C[%d] => %s, line %d, col %d: [%0d] %s\n",
			i,  c[i].f, c[i].l, c[i].col, c[i].t, c[i].s);
	    }
	}
#endif

        if (startparse(PHRASE_BASE)) {
            compile(0);
        } else {
	    fprintf(stderr, "Can't\n");
	    exit(1);
	}
    }
    dump();
    exit(0);
    return(0);
}