This is G o o g l e's cache of http://www.gtoal.com/compilers101/tinc/02_Graham_Toal/teeny.c.html as retrieved on 29 Apr 2006 16:15:29 GMT.
G o o g l e's cache is the snapshot that we took of the page as we crawled the web.
The page may have changed since that time. Click here for the current page without highlighting.
This cached page may reference images which are no longer available. Click here for the cached text only.
To link to or bookmark this page, use the following url: http://www.google.com/search?q=cache:yBGi7NL8-VAJ:www.gtoal.com


Google is neither affiliated with the authors of this page nor responsible for its content.

teeny.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define ishex(ch) (int)strchr("0123456789ABCDEFabcdef", ch)
int hextobin(char x)
{
  // Too crude, just a quick hack.  Seldom called.
  if (('0' <= x) && (x <= '9')) {        return x-'0';
  } else if (('A' <= x) && (x <= 'F')) { return x-'A'+10;
  } else if (('a' <= x) && (x <= 'f')) { return x-'a'+10;
  } else {                               return 0;
    // ASSERT: INTERNAL ERROR!
  }
}
void indent(int depth, FILE *f)
{
  int i;
  for (i = 0; i < depth; i++) fprintf(f, "  ");
}

#define DEBUG_PARSER
#include "teeny.h" // GENERATED GRAMMAR FROM takeon.c and teeny.g

#ifndef FALSE
#define FALSE (0!=0)
#define TRUE (0==0)
#endif

char *progname;

int debug_parser = FALSE;

// Built-in phrase codes.  Must match with grammar file.
#define TYPE_EOF 0
#define TYPE_TAG 1
#define TYPE_STRING 2
#define TYPE_CHARCONST 3
#define TYPE_CHAR 4
#define TYPE_INT 5
#define TYPE_HEXINT 6
#define TYPE_KEYWORD 7

typedef struct sourceinfo { // ATOMS for processed input stream
  char *s; // string contents
  int l;   // lineno
  int col; // column
  int t;   // type - tag, "string", 'charconst', or char, so far
  char *f; // source or includefile  name
} sourceinfo;

int bestparse = -1; // for error reporting.
char *looking_for = "<UNKNOWN>";    // 'while looking for <PHRASENAME>' (or literal) ...

// C is the source character token stream
static sourceinfo *c = NULL;
int nextfree = 0, arraysize = 0;
char *onecharstr;

// A is the Analysis record.  Contents point to a C[] structure when the item is a BIP,
// otherwise the format is [phraseno] [altno] [no-of-subphrases] [subphrases and/or BIPs...]
// eg A[25] = 10 means that A[25] is C[10].
static int *A = NULL;
int next_free_a = 0, a_size = 0;


static void *makespace_(void *c, int nextfree, int *arraysize, int objsize) {
  if (nextfree >= *arraysize) {
    *arraysize = (*arraysize * 2) + 1;
    c = (void *)realloc(c, (*arraysize+1) * objsize); // 0:arraysize, inclusive. eg 0:15, 0:127 etc
    if (c == NULL) {fprintf(stderr, "tacctwo: %s\n", strerror(errno)); exit(errno);}
  }
  return c;
}
#define makespace(c, nextfree, arraysize) c = (typeof(c))makespace_(c, nextfree, &arraysize, sizeof(c[0]))

void stores(char *s, int lineno, int col, int type, char *fname) {
  makespace(c, nextfree, arraysize);
  c[nextfree].s = s; c[nextfree].l = lineno; c[nextfree].col = col;
  c[nextfree].f = fname; c[nextfree].t = type;
  nextfree++;
}

void storec(int ch, int lineno, int col, int type, char *fname) {
  onecharstr[ch*2] = ch; onecharstr[ch*2+1] = '\0';  // convert char to 1-char string before saving.
  stores(&onecharstr[ch*2], lineno, col, type, fname);
}

int iskeyword(char *s) {
  int i;
  for (i = 0; i < MAX_KEYWORD; i++) if (strcmp(s, keyword[i]) == 0) return TRUE;
  return FALSE;
}



//--------------------------------------------------------------------------

FILE *sourcefile;
char *curfile;
int startline = TRUE, whitespace = TRUE, lineno = 1, col = 0, ch, peek;

void line_reconstruction(void)  // PRE-PROCESS INPUT READY FOR PARSING
{
  for (;;) {
    ch = fgetc(sourcefile); if (ch == EOF) break;
    ch &= 255; // int, positive.

    peek = fgetc(sourcefile); ungetc(peek, sourcefile);

    if (isalpha(ch)) {
        int nextfree = 0, strsize = 0, startcol = col;
        char *token = NULL;

        whitespace = FALSE;
        for (;;) {
          makespace(token, nextfree, strsize);
          if (isalpha(ch) || isdigit(ch) || (ch == '_')) {
            // digits and '_' allowed after 1st char.
            col++;
            token[nextfree++] = ch;
          } else {
            token[nextfree] = '\0'; ungetc(ch, sourcefile);
            break;
          }
          ch = fgetc(sourcefile);
        }
        stores(strdup(token), lineno, startcol, iskeyword(token) ? TYPE_KEYWORD : TYPE_TAG, curfile);
        free(token);
    } else if (isdigit(ch)) {
        int nextfree = 0, numsize = 0;
        char *number = NULL;

        // Store as a string...
        whitespace = FALSE;
        for (;;) {
          makespace(number, nextfree, numsize);
          if (isdigit(ch)) {
            col++;
            number[nextfree++] = ch;
          } else {
            number[nextfree] = '\0'; ungetc(ch, sourcefile);
            break;
          }
          ch = fgetc(sourcefile);
        }
        stores(strdup(number), lineno, col, TYPE_INT, curfile);
        free(number);
    } else switch (ch) {

    case '$': // Hex constant \$[0-9a-fA-F]+
              // Q: store the '$' in the string or not?
      whitespace = FALSE;
      col++;
      if (ishex(peek)) {
        int nextfree = 0, numsize = 0;
        char *number = NULL;

        for (;;) {
          makespace(number, nextfree, numsize);
          ch = fgetc(sourcefile);
          if (ishex(ch)) {
            col++;
            number[nextfree++] = ch;
          } else {
            number[nextfree] = '\0'; ungetc(ch, sourcefile);
            break;
          }
        }
        stores(strdup(number), lineno, col, TYPE_HEXINT, curfile);
        free(number);
      } else {
        // Warn: probably an error... should not be any naked '$' symbols.
        // If the error to be prined would have been a generic syntax
        // error at the same location, then maybe give a more informative
        // error message such as "Unexpected character '$' near: ..."
        // On the other hand the generic mechanism probably reports this
        // almsost as accurately.
        storec(ch, lineno, col++, TYPE_CHAR, curfile);
      }
      break;

    case '\'': // Handle 'c' character constants
    case '"': // Handle "string"
      {
        int nextfree = 0, strsize = 0, quotech = ch;
        char *string = NULL;

        whitespace = FALSE;
        col++;
        for (;;) {
          ch = fgetc(sourcefile); // Newlines are allowed
          col++;
          makespace(string, nextfree, strsize);
          if (ch == '\\') {
            ch = fgetc(sourcefile); col++;
            if (ch == '\\') {           string[nextfree++] = ch;
            } else if (ch == '\'') {    string[nextfree++] = '\'';
            } else if (ch == '"') {     string[nextfree++] = '"';
            } else if (ch == 'n') {     string[nextfree++] = '\n';
            } else if (ch == 'r') {     string[nextfree++] = '\r';
            } else if (ch == 't') {     string[nextfree++] = '\t';
            } else if (ch == 'x') {
              int x, x1, x2;
              x1 = fgetc(sourcefile); col++;
              if (!ishex(x1)) {
                // WARN: Bad format
                continue;
              }
              x2 = fgetc(sourcefile); col++;
              if (!ishex(x2)) {
                // WARN: Bad format
                continue;
              }
              x = (hextobin(x1)<<4) | hextobin(x2);
              if (x == 0) {
                // WARN: embedded NUL in a string is asking for trouble...
              }
              string[nextfree++] = x;
            } else {
              // Warn of unknown (to me) \x escape.  Probably an error.
	      string[nextfree++] = '\\'; string[nextfree++] = ch;
            }
          } else if (ch != quotech) {   string[nextfree++] = ch;
          } else {
            string[nextfree] = '\0';
            break;
          }
        }

        if (quotech == '\'') {
          if (strlen(string) == 1) {
          } else if (strlen(string) <= 4) {
            // Warn that 'xx' as a 32-bit int is a non-standard extension
          } else {
            // Warn that this is probably a string with the wrong type of quote.
          }
        }
        stores(strdup(string), lineno, col, (quotech == '\'' ? TYPE_CHARCONST : TYPE_STRING), curfile);
        free(string);
      }
      break;

    case '/': // COMMENTS (or just a divide symbol)
      col++;
      whitespace = FALSE;
      if (peek == '/') {
        // Handle line comment
        do {ch = fgetc(sourcefile);} while (ch != '\n');
        lineno++; col = 0; whitespace = TRUE;
      } else if (peek == '*') {
        /* Handle potential multi-line comment */
        ch = fgetc(sourcefile); // Now we have read '/*'
        for (;;) {
          col++;
          ch = fgetc(sourcefile); peek = fgetc(sourcefile);
          if ((ch == '*') && (peek == '/')) break;
          ungetc(peek, sourcefile);
        }
        col += 2;
        (void)fgetc(sourcefile); // Remove '/'
        // QUESTION: How does this affect # directives? 
      } else {
        storec(ch, lineno, col, TYPE_CHAR, curfile);
      }
      break;

    // WHITESPACE
    case '\n':      lineno++;
    case '\r':      startline = TRUE; col = 0; whitespace = TRUE;
      break;

    case '\t':
    case ' ':       col++; // Does not affect whitespace
      break;

    // DIRECTIVES
    case '#':
      // If we interpret any #-directives while lexing, we don't want to
      // do an expensive test on every token, so what we can do is set
      // a countdown timer on the introductory token (either this '#'
      // or the actual keyword such as 'ifdef') and then test that the
      // *previous* tokens match when the timer hits 0, eg
      // C[cp-3] == '#' && C[cp-2] == 'include' ... etc

      if (!whitespace) {
        // WARN: probably an error... should not be any '#' symbols in the
        // middle of a line. (This language uses "!=" or "<>" for not-equal)
      }
      // Drop through

    default:
      whitespace = FALSE;
      storec(ch, lineno, col++, TYPE_CHAR, curfile);
    }
  }
  // set up a dummy at the end because we sometimes look ahead by 1
  // in the parsing code and don't want to hit uninitialised data.
  c[nextfree].t = TYPE_EOF;
  c[nextfree].s = "<EOF>";
  c[nextfree].l = lineno;
  c[nextfree].col = col;
}

//--------------------------------------------------------------------------

int cp = 0;  // code pointer.  Has to be global state.
int ap = 0;  // Analysis record pointer.

int parse(int pp, int depth) // depth is only for indentation in diags
{
  int saved_cp, saved_ap, i, gp, alts, match;

  gp = phrase_start[pp-512-MAX_BIP];
  alts = gram[gp];

  if (debug_parser) {
    fprintf(stdout, "\n");
    indent(depth, stdout);
    fprintf(stdout, "Phrase %s/%d (%d alternatives) = ", phrasename[pp-512], pp, alts);
    fflush(stdout);
  }
  gp++; // gp now points to first element (length) of first alt

  saved_cp = cp;
  saved_ap = ap;

  for (i = 0; i < alts; i++) {
    int each, phrases = gram[gp++], phrase_count, gap = 0;

    cp = saved_cp;
    ap = saved_ap;

    if (ap+3 > next_free_a) next_free_a = ap+3;
    makespace(A, next_free_a, a_size);
    A[ap++] = pp;   // record which phrase (could be done outside loop)
    A[ap++] = i;    // and which alt.


    // Count slots needed.  *Could* be precalculated and stored
    // in the grammar, either embedded (after the ALT) or as a
    // separate table

    for (each = 0; each < phrases; each++) if (gram[gp+each] >= 512) gap++;
    A[ap++] = gap;    // Count of alts (gap)
    // ap+gap now points to the slot after the space required, which
    // is where the first subphrase will be stored.
    ap = ap+gap; // recursive subphrases are stored after this phrase.
                 // ap is left updated if successful.


    // successfully parsed phrases are stored in A[saved_ap+3+n]

    if (saved_ap+3+gap > next_free_a) next_free_a = saved_ap+3+gap;
    makespace(A, next_free_a, a_size);

    // this loop is only for diagnostics
    if (debug_parser) {
      fprintf(stdout, "\n");
      indent(depth, stdout);
      fprintf(stdout, "Alternative %d: (%d phrases) ", i+1, phrases);
      for (each = 0; each < phrases; each++) {
        int phrase = gram[gp+each];
        if (phrase < 256) {
          fprintf(stdout, " '%c'", phrase);
        } else if (phrase < 512) {
          fprintf(stdout, " \"%s\"/%d", keyword[phrase-256], phrase-256);
        } else if (phrase < 512+MAX_BIP) {
          fprintf(stdout, " {%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]);
        } else {
          fprintf(stdout, " <%s/%d>", phrasename[phrase-512], phrase);
        }
      }
      fprintf(stdout, "\n");
      fflush(stdout);
    }

    match = TRUE; // stays true if all subphrases match
    phrase_count = 0; // only phrases which make it into the A record,
                      // i.e. excluding literals and keywords
    for (each = 0; each < phrases; each++) {
      int phrase = gram[gp+each];

      if (debug_parser) {
        indent(depth, stdout);
        fprintf(stdout, "Input token stream = '%s' '%s' '%s' ...\n",
          (cp < nextfree ? c[cp].s : "EOF"),
          (cp+1 < nextfree ? c[cp+1].s : "EOF"),
          (cp+2 < nextfree ? c[cp+2].s : "EOF"));
      }
      if (cp > bestparse) {
        static char s[128];
        if (phrase < 256) {
          sprintf(s, "'%c'", phrase);
        } else if (phrase < 512) {
          sprintf(s, "\"%s\"", keyword[phrase-256]);
        } else if (phrase < 512+MAX_BIP) {
          sprintf(s, "{%s}", phrasename[phrase-512]);
        } else {
          sprintf(s, "<%s>", phrasename[phrase-512]);
        }
        looking_for = s;
        bestparse = cp;
      }

      if (debug_parser) indent(depth, stdout);
      if (phrase < 256) {
        if (debug_parser) fprintf(stdout, "'%c'", phrase);
        if ((c[cp].t != TYPE_CHAR) || (c[cp].s[0] != phrase)) match = FALSE; else cp++;
        // Don't record literals
      } else if (phrase < 512) {
        if (debug_parser) fprintf(stdout, "\"%s\"/%d", keyword[phrase-256], phrase-256);
        if (strcmp(keyword[phrase-256], c[cp].s) != 0) match = FALSE; else cp++;
        // Don't record keywords
      } else if (phrase < 512+MAX_BIP) {
        int where = ap; // next phrase to be parsed will be stored at current 'ap'.
        if (debug_parser) fprintf(stdout, "{%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]);
        if (c[cp].t != BIP[phrase-512]) match = FALSE; else {
          A[ap++] = phrase;
          A[ap++] = 1;
          A[ap++] = 1;
          A[ap++] = cp++;
          A[saved_ap+3+phrase_count++] = where; // Record BIP
        }
      } else {
        int where = ap; // next phrase to be parsed will be stored at current 'ap'.
        if (debug_parser) fprintf(stdout, "<%s/%d>", phrasename[phrase-512], phrase);
        if (!parse(phrase, depth+1)) match = FALSE; else {
          A[saved_ap+3+phrase_count++] = where;
        }
      }
      if (debug_parser) {
        fprintf(stdout, "\n");
        indent(depth, stdout);
        fprintf(stdout, "Tried alternative %d: result was %s\n", each+1, (match ? "TRUE" : "FALSE"));
        fflush(stdout);
      }
      if (!match) break;
    }
    gp += phrases; // move over all phrases, to next alt
    if (match) break; else if (debug_parser) {
      indent(depth, stdout);
      fprintf(stdout, "** Alternative %d FAILED.\n", i+1);
    }
    // gp now points to first element (length) of next alt, or start of next phrase
  }
  return(match);
}

//--------------------------------------------------------------------------

typedef int TRIP;

//------------------------------------------------------------------------------------

/*
      In previous compilers, I had to write custom code for every tree-based
    optimisation, in order to walk down the tree to the right place to find
    the leaves to be optimised.  In this one, I have a generic tree-walking
    procedure which can walk the entire program, but it can be customised 
    so that it takes action only on specific phrases.  This is possible in
    this design only because each set of subphrases stores the count of
    subphrases befoe it - thus allowing a generic tree-walking procedure
    that doesn't have to know what each node consists of until it happens
    across a node of the type it is looking for.
 */

void walk(int ap, int depth, int wanted(int phraseno), void perform(int ap, int before, int depth))
{
  int i;

  if (wanted(A[ap])) perform(ap, TRUE, depth);
  for (i = 3; i < A[ap+2]+3; i++) {
    if (A[A[ap+i]] >= 512+MAX_BIP) walk(A[ap+i], depth+1, wanted, perform);
  }
  if (wanted(A[ap])) perform(ap, FALSE, depth);

}

int want_all(int phraseno) {
  return TRUE;
}

void print_all(int ap, int before, int depth) {
  int saved_ap = ap;
  int phrase = A[ap++];
  int alt = A[ap++];
  int phrases = A[ap++]; // defined subphrases
  int i;

  indent(depth, stderr);
  fprintf(stderr, "<%s%s/%d>  ", (before ? "" : "/"), phrasename[phrase-512], alt);
  for (i = 0; i < (3+phrases); i++) {
    fprintf(stderr, "A[%d] = %d, ", saved_ap+i, A[saved_ap+i]);
  }
  fprintf(stderr, "\n");

}

TRIP compile(int ap, int depth)
{
  int saved_ap = ap;
  int phrase = A[ap++];  // A[ap] is the phrase number. A[ap+1] is the alt.
  int alt = A[ap++]; // For consistency, in BIPs, the Alt should always be 1
                     // although that may not be the case at the moment :-(
  int phrases = A[ap++]; // defined subphrases
  int i;

  // The following ecce command executed on this file will generate teeny.g:
  // ecce -c "(v.//\\.s..(v/ /e)?m,k)0;%c" teeny.c teeny.g
  // May later tweak takeon.c to read from teeny.c rather than teeny.g
  // thus simplifying build process and becoming more like yacc.

  switch (phrase) {

//\\ # BIPS (Built-in Phrases)are linked to the type-code returned
//\\ # by the line-reconstruction code (aka lexer)
//\\
//\\ # These *must* come first.

// See TYPE_* in first page for the values to use.

//\\
//\\ B<IDENT>=1;
  case P_IDENT:
    fprintf(stdout, "%s", c[A[ap]].s);
    break;

//\\ B<NUM>=5;
  case P_NUM:
    fprintf(stdout, "%s", c[A[ap]].s);
    break;

//\\ B<HEXNUM>=6;
  case P_HEXNUM:
    fprintf(stdout, "0x%s", c[A[ap]].s);
    break;

//\\ B<CHARLIT>=3;
  case P_CHARLIT:
    fprintf(stdout, "'%s'", c[A[ap]].s); // ADD ESCAPES
    break;

//\\ B<STRING>=2;
  case P_STRING:
    fprintf(stdout, "\"%s\"", c[A[ap]].s); // ADD ESCAPES
    break;


//\\
//\\ # Phrase definitions.  PROGRAM is the main entry point.
//\\
//\\ # This is the first draft of a grammar, based on the one
//\\ # collectively determined at the wiki site.  At the moment
//\\ # this has not been debugged extensively.


  case P_PROGRAM:
//\\ P<PROGRAM> = <OPTGLOBAL> <OPTPROC> "program" <MAINBLOCK>;
//                 A[ap]       A[ap+1]             A[ap+2]    Note that literals are discarded.
    fprintf(stdout, "#include <stdio.h>\n");
    fprintf(stdout, "void put(int c) {\n");
    fprintf(stdout, "  fputc(c, stdout);\n");
    fprintf(stdout, "}\n");
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    fprintf(stdout, "int main(int argc, char **argv)\n");
    compile(A[ap+2], depth+1);
    // Would be nice if we could force an 'exit(0)' at the end of
    // the mail program block.
    break;

  case P_OPTGLOBAL:
//\\
//\\ P<OPTGLOBAL> = <GLOBAL> <OPTGLOBAL>,
//\\                ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_OPTPROC:
//\\
//\\ P<OPTPROC> = <PROC> <OPTPROC>,
//\\              ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_MAINBLOCK:
  case P_BLOCK:
//\\
//\\ P<MAINBLOCK> = "begin" <OPTLOCAL> <SSLIST> "end";
//\\ P<BLOCK> = "begin" <OPTLOCAL> <SSLIST> "end";
    fprintf(stdout, "{\n");
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    if (phrase == P_MAINBLOCK) fprintf(stdout, "  exit(0); return(0);\n");
    fprintf(stdout, "}\n");
    break;

  case P_SSLIST:
//\\
//\\ P<SSLIST> = <STATEMENT> <SSLIST>,
//\\             ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_OPTLOCAL:
//\\
//\\ P<OPTLOCAL> = <LOCAL>,
//\\               ;
    if (alt == 0) compile(A[ap], depth+1);
    break;

  case P_RESTOFDECL:
//\\
//\\ P<RESTOFDECL> = ',' <DECL> <RESTOFDECL>,
//\\                 ;
    if (alt == 1) break;
    fprintf(stdout, ", ");
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_DIRECTIVE:
//\\
//\\ P<DIRECTIVE> = '#' <COMMAND>;
    compile(A[ap], depth+1);
    break;

  case P_COMMAND:
//\\
//\\ P<COMMAND> = "define" <IDENT> <DEFN>,
//\\              "ifdef" <IDENT>,
//\\              "endif";
    // See comment next to '#' in the line reconstruction for one way
    // to handle these directives.
    fprintf(stdout, "\n#%s", (alt == 0 ? "define" : (alt == 1 ? "ifdef" : "endif")));
    if (alt < 2) {
      fprintf(stdout, " ");
      compile(A[ap], depth+1);
    }
    if (alt == 0) {
      fprintf(stdout, " ");
      compile(A[ap+1], depth+1);
    }
    fprintf(stdout, "\n");
    break;

  case P_DEFN:
//\\
//\\ P<DEFN> = <IDENT>, <NUMBER>, <HEXNUM>, <CHARLIT>, <STRING>;
    compile(A[ap], depth+1);
    break;

  case P_GLOBAL:
//\\
//\\ P<GLOBAL> = '#' "include" <STRING>,
//\\             <LOCAL>;
    if (alt == 0) {
      fprintf(stdout, "\n#%s"); compile(A[ap], depth+1); fprintf(stdout, "\n");
    } else {
      compile(A[ap], depth+1);
    }
    break;

  case P_LOCAL:
//\\
//\\ P<LOCAL> = <DIRECTIVE>,
//\\            <TYPE> <DECL> <RESTOFDECL> <OPTSEMI>;
    if (alt == 0) {
      compile(A[ap], depth+1);
    } else {
      fprintf(stdout, "  ");
      compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1);
      fprintf(stdout, ";\n");
    }
    break;

  case P_TYPE:
//\\
//\\ P<TYPE> = <OPTUNSIGNED> <SIZE>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_OPTUNSIGNED:
//\\
//\\ P<OPTUNSIGNED> = "unsigned",
//\\                  ;
    if (alt == 0) fprintf(stdout, "unsigned ");
    break;

  case P_SIZE:
//\\
//\\ P<SIZE> = "char",
//\\           "int";
    fprintf(stdout, (alt == 0 ? "char " : "int "));
    break;

  case P_DECL:
//\\
//\\ P<DECL> = '*' <IDENT>,
//\\           <IDENT> <OPTINDEX>,
//\\           <IDENT>;
    if (alt==0) fprintf(stdout, "*");
    compile(A[ap], depth+1);
    if (alt==1) compile(A[ap+1], depth+1);
    break;

  case P_OPTINDEX:
//\\
//\\ P<OPTINDEX> = '[' <NUMBER> ']',
//\\               ;
    if (alt == 1) break;
    fprintf(stdout, "[");
    compile(A[ap], depth+1);
    fprintf(stdout, "]");
    break;

  case P_PROC:
//\\
//\\ P<PROC> = "procedure" <IDENT> '(' ')' <OPTBLOCK>,
//\\           "procedure" <IDENT> '(' <FORMALLIST> ')' <OPTBLOCK>;
    if (alt == 0) {
      fprintf(stdout, "void ");compile(A[ap],depth+1);fprintf(stdout, "(void)\n");
      compile(A[ap+1],depth+1);
    } else {
      fprintf(stdout, "void ");compile(A[ap],depth+1);fprintf(stdout, "(");compile(A[ap+1],depth+1);fprintf(stdout, ")\n");
      compile(A[ap+2],depth+1);
    }
    break;

  case P_OPTBLOCK:
//\\
//\\ P<OPTBLOCK> = <BLOCK>,
//\\               ;
    if (alt == 0) {
      compile(A[ap], depth+1);
    } else fprintf(stdout, ";");
    break;

  case P_FORMALLIST:
//\\
//\\ P<FORMALLIST> = <OPTFORMAL> <RESTOFFORMAL>,
//\\                 ;
    if (alt == 1) {
      fprintf(stdout, "void");
      break;
    }
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_RESTOFFORMAL:
//\\
//\\ P<RESTOFFORMAL> = ',' <FORMAL> <RESTOFFORMAL>,
//\\                   ;
    if (alt == 1) break;
    fprintf(stdout, ", ");
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_FORMAL:
// \\
// \\ P<FORMAL> = <TYPE> '*' <IDENT>,
// \\             <TYPE> '*',
// \\             <TYPE> <IDENT>,
// \\             <IDENT>;
    // <IDENT> on its own is prsumably for declarations like:
    // unsigned int M, N - in which case the type is that of
    // the preceding item.  But what if the IDENT comes first,
    // i.e. procedure fred(jim);  Would that default to an int
    // or is it simply an error?
//    compile(A[ap], depth+1);
//    if (alt < 2) fprintf(stdout, " *");
//    if ((alt&1) == 0) compile(A[ap+1], depth+1);
//    break;

// Trying a different grammar for FORMAL:
//\\
//\\ P<FORMAL> = <TYPE> '*' <IDENTLIST>,
//\\             <TYPE> '*',
//\\             <TYPE> <IDENTLIST>,
//\\             <TYPE>;
                 // *** I think we forgot naked type in the otiginal grammar
    if ((alt&1) == 1) {
      compile(A[ap], depth+1);
      if (alt == 1) fprintf(stdout, " *");
    } else {
      int namelist = A[ap+1];
      // Need to look down into structure for namelist in order to
      // explicitly type each variable, i.e. "int i, int j" rather
      // than "int i, j"

      // namelist points to a IDENTLIST
      for (;;) {
        int name = A[namelist+3];
        compile(A[ap], depth+1);
        if (alt == 0) fprintf(stdout, " *");
        compile(name, depth+1);
        namelist = A[namelist+4];
        // namelist now points to a RESTOFIDENTLIST
        if (A[namelist+1] == 0) {
          fprintf(stdout, ", ");
        } else {
          break;
        }
      }

      //
      // Can we do this elegantly with the 'walk' procedure?
    }
    break;

//\\ P<IDENTLIST> = <IDENT> <RESTOFIDENTLIST>;
//\\ P<RESTOFIDENTLIST> = ',' <IDENT> <RESTOFIDENTLIST>,
//\\                      ;

  case P_OPTFORMAL:
//\\
//\\ P<OPTFORMAL> = <FORMAL>,
//\\                ;
    if (alt == 1) return;
    compile(A[ap], depth+1);
    break;

  case P_STATEMENT:
//\\
//\\ P<STATEMENT> = <SS> <OPTSEMI>;
    compile(A[ap], depth+1);
    break;

  case P_SS:
    switch (alt) {
//\\
    case 0:
//\\ P<SS>        = <IF>,
      compile(A[ap], depth+1);
      break;
    case 1:
//\\                <WHILE>,
      compile(A[ap], depth+1);
      break;
    case 2:
//\\                <FOR>,
      compile(A[ap], depth+1);
      break;
    case 3:
//\\                '#' "inline" <STRING>,
      break;
    case 4:
//\\                <DIRECTIVE>,
      break;
    case 5:
//\\                <SIMPLE>;
      compile(A[ap], depth+1);
      break;
    }
    break;

  case P_IF:
//\\
//\\ P<IF> = "if" '(' <BOOLEXPR> ')' <SSLIST> <OPTELSE> "endif";
    fprintf(stdout, "  if (");
    compile(A[ap], depth+1);
    fprintf(stdout, ") {\n");
    compile(A[ap+1], depth+1);
    compile(A[ap+2], depth+1);
    fprintf(stdout, "  }\n");
    break;

  case P_OPTELSE:
//\\
//\\ P<OPTELSE> = "else" <SSLIST>,
//\\              ;
    if (alt == 1) break;
    fprintf(stdout, "  } else {\n");
    compile(A[ap], depth+1);
    break;

  case P_WHILE:
//\\
//\\ P<WHILE> = "while" '(' <BOOLEXPR> ')' <SSLIST> "endwhile";
    fprintf(stdout, "  while (");
    compile(A[ap], depth+1);
    fprintf(stdout, ") {\n");
    compile(A[ap+1], depth+1);
    fprintf(stdout, "  }\n");
    break;

  case P_FOR:
//\\
//\\ P<FOR> = "for" '(' <SIMPLE> <OPTSEMI> <BOOLEXPR> <OPTSEMI> <SIMPLE> <OPTSEMI> ')' <SSLIST> "endfor";
    fprintf(stdout, "  for (");
    compile(A[ap], depth+1);
    fprintf(stdout, "; ");
    compile(A[ap+2], depth+1);
    fprintf(stdout, "; ");
    compile(A[ap+4], depth+1);
    fprintf(stdout, ") {\n");
    compile(A[ap+6], depth+1);
    fprintf(stdout, "  }\n");
    break;

  case P_SIMPLE:
//\\
//\\ P<SIMPLE> = <IDENT> '(' <PARAMLIST> ')',
//\\             <IDENT> '(' ')',
//\\             <VARIABLE> '=' <BOOLEXPR>;
    fprintf(stdout, "  ");
    compile(A[ap], depth+1);
    if (alt < 2) {
      fprintf(stdout, "(");
      if (alt == 0) compile(A[ap+1], depth+1);
      fprintf(stdout, ")");
    } else {
      fprintf(stdout, " = "); compile(A[ap+1], depth+1);
    }
    fprintf(stdout, ";\n");
    break;

  case P_PARAMLIST:
//\\
//\\ P<PARAMLIST> = <PARAM> <RESTOFPARAMLIST>
//\\                ;
    if (alt == 1) break;
    compile(A[ap], depth+1); 
    compile(A[ap+1], depth+1); 
    break;

  case P_RESTOFPARAMLIST:
//\\ P<RESTOFPARAMLIST> = <OPTCOMMA> <PARAMLIST>,
//\\                      ;
    if (alt == 1) break;
    fprintf(stdout, ", ");
    compile(A[ap+1], depth+1); 
    break;

  case P_OPTCOMMA:
//\\
//\\ P<OPTCOMMA> = ',',
//\\               ;
    break;

  case P_VARIABLE:
//\\
//\\ P<VARIABLE> = '*' <ADDR>,
//\\               <IDENT> '[' <EXPR> ']',
//\\               <IDENT>;
    if (alt == 0) fprintf(stdout, "*");
    compile(A[ap], depth+1);
    if (alt == 1) {
      fprintf(stdout, "["); compile(A[ap+1], depth+1); fprintf(stdout, "]");
    }
    break;

  case P_ADDR:
//\\
//\\ P<ADDR> = <HEXNUM>,
//\\           <IDENT>;
    compile(A[ap], depth+1);
    break;

  case P_PARAM:
//\\
//\\ P<PARAM> = <STRING>,
//\\            <EXPR>;
    compile(A[ap], depth+1);
    break;

  case P_BOOLEXPR:
//\\
//\\ P<BOOLEXPR> = <BOOLTERM> <RESTOFBOOLTERM>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_RESTOFBOOLTERM:
//\\
//\\ P<RESTOFBOOLTERM> = <OROP> <BOOLTERM> <RESTOFBOOLTERM>,
//\\                     ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    compile(A[ap+2], depth+1);
    break;

  case P_BOOLTERM:
//\\
//\\ P<BOOLTERM> = <BOOLFACTOR> <RESTOFBOOLFACTOR>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_RESTOFBOOLFACTOR:
//\\
//\\ P<RESTOFBOOLFACTOR> = '&' <BOOLFACTOR> <RESTOFBOOLFACTOR>,
//\\                       ;
    if (alt == 1) break;
    fprintf(stdout, " && ");
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_BOOLFACTOR:
//\\
//\\ P<BOOLFACTOR> = <OPTNOT> <RELATION>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_OPTNOT:
//\\
//\\ P<OPTNOT> = '!',
//\\             ;
    if (alt == 0) fprintf(stdout, "!");
    break;

  case P_OPTSEMI:
//\\
//\\ P<OPTSEMI> = ';',
//\\              ;
    break;

  case P_RELATION:
//\\
//\\ P<RELATION> = <EXPR> <RESTOFRELATION>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_RESTOFRELATION:
//\\
//\\ P<RESTOFRELATION> = <RELOP> <EXPR>,
//\\                     ;
    // implicitly "<expr> != 0", if no relop given.
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_EXPR:
//\\
//\\ P<EXPR> = <SUM> <RESTOFEXPR>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_RESTOFEXPR:
//\\
//\\ P<RESTOFEXPR> = <SHIFTOP> <SUM> <RESTOFEXPR>,
//\\                 ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    compile(A[ap+2], depth+1);
    break;

  case P_SUM:
//\\
//\\ P<SUM> = <OPTADDOP> <TERM> <RESTOFSUM>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    compile(A[ap+2], depth+1);
    break;

  case P_OPTADDOP:
//\\
//\\ P<OPTADDOP> = <ADDOP>,
//\\               ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    break;

  case P_RESTOFSUM:
//\\
//\\ P<RESTOFSUM> = <ADDOP> <TERM> <RESTOFSUM>,
//\\                ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    compile(A[ap+2], depth+1);
    break;

  case P_TERM:
//\\
//\\ P<TERM> = <FACTOR> <RESTOFTERM>;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    break;

  case P_RESTOFTERM:
//\\
//\\ P<RESTOFTERM> = <MULOP> <FACTOR> <RESTOFTERM>,
//\\                 ;
    if (alt == 1) break;
    compile(A[ap], depth+1);
    compile(A[ap+1], depth+1);
    compile(A[ap+2], depth+1);
    break;

  case P_OROP:
//\\
//\\ P<OROP> = '|', '~';
    fprintf(stdout, (alt == 0 ? " | " : " ^ "));
    break;

  case P_RELOP:
//\\
//\\ P<RELOP> = '<>', '<=', '<', '>=', '>', '!=', '=';
    switch (alt) {
    case 0:
    case 5: fprintf(stdout, " != "); break;
    case 1: fprintf(stdout, " <= "); break;
    case 2: fprintf(stdout, " < "); break;
    case 3: fprintf(stdout, " >= "); break;
    case 4: fprintf(stdout, " > "); break;
    case 6: fprintf(stdout, " == "); break;
    }
    break;

  case P_SHIFTOP:
//\\
//\\ P<SHIFTOP> = '<<', '>>';
    fprintf(stdout, (alt == 0 ? " << " : " >> "));
    break;

  case P_ADDOP:
//\\
//\\ P<ADDOP> = '+', '-';
    fprintf(stdout, (alt == 0 ? " + " : " - "));
    break;

  case P_MULOP:
//\\
//\\ P<MULOP> = '*', '/';
    fprintf(stdout, (alt == 0 ? " * " : " / "));
    break;

  case P_FACTOR:
//\\
//\\ P<FACTOR> = '(' <BOOLEXPR> ')',
    if (alt == 0) { // when this is in the switch, it is not executed.  Compiler bug?
      fprintf(stdout, " (");
      compile(A[ap], depth+1);
      fprintf(stdout, ") ");
      break;
    } else switch (alt) {
    case 1:
//\\             '&' <IDENT> '[' <EXPR> ']',
    case 2:
//\\             '&' <IDENT>,
      fprintf(stdout, " &");
      compile(A[ap], depth+1);
      if (alt == 1) {
        fprintf(stdout, "[");
        compile(A[ap+1], depth+1);
        fprintf(stdout, "]");
      }
      break;
    case 3:
//\\             <VARIABLE>,
    case 4:
//\\             <NUMBER>,
    case 5:
//\\             <HEXNUM>;
      compile(A[ap], depth+1);
    }
    break;

//\\
//\\ # Shouldn't HEXNUM be here with NUM and CHARLIT?
//\\ # The apparent restriction is that a HEXNUM cannot
//\\ # be used as the size of an array declaration - but
//\\ # a character constant can???

  case P_NUMBER:
//\\
//\\ P<NUMBER> = <NUM>,
//\\             <CHARLIT>;
      compile(A[ap], depth+1);
    break;

//\\
//\\ E
//\\ # 'E' is end of grammar.  Everything after this is ignored.
  default:
    fprintf(stderr, "*** Internal error at line %d.  ap=%d  phrase=%d\n", __LINE__, ap, phrase); exit(2);
  }

  return(-1); // DUMMY TRIP, NOTHING TO RETURN

}

//--------------------------------------------------------------------------



int main(int argc, char **argv) {
char *s;

  // Get clean version of executable name.  Should work on most existing systems (2006)
  progname = argv[0];
  if ((s = strrchr(progname, '/')) != NULL) progname = s+1;  // Unix
  if ((s = strrchr(progname, '\\')) != NULL) progname = s+1; // M$
  if ((s = strrchr(progname, ']')) != NULL) progname = s+1;  // Dec
  if ((s = strrchr(progname, ';')) != NULL) *s = '\0';       // Version no's
  if (((s = strrchr(progname, '.')) != NULL) && (strcasecmp(s, ".exe") == 0)) *s = '\0';
  if (((s = strrchr(progname, '.')) != NULL) && (strcasecmp(s, ".com") == 0)) *s = '\0';

  if ((argc == 3) && strcmp(argv[1], "-d") == 0) {
    argv++; argc--; debug_parser = TRUE;
  }

  if (argc != 2) {
    fprintf(stderr, "syntax: %s [-d] filename\n", progname);
    exit(1);
  }

  sourcefile = fopen(argv[1], "r");
  if (sourcefile == NULL) {
    fprintf(stderr, "%s: %s - %s\n", progname, strerror(errno), argv[1]);
    exit(errno);
  }

  curfile = argv[1]; startline = TRUE; whitespace = TRUE;
  onecharstr = (char *)malloc(512);

  line_reconstruction();

  if (debug_parser) {
    int i; // DEBUG ONLY
    for (i = 0; i < nextfree; i++) {
      fprintf(stdout, "%s, line %d, col %d: [%0d] %s\n",
                       c[i].f, c[i].l, c[i].col, c[i].t, c[i].s);
    }
  }

  if (!parse(PHRASE_BASE, 0)) {
    if (bestparse == nextfree) {
      fprintf(stderr, "\"%s\", Line %d, Col %d: Premature end of file while looking for %s\n",
                       argv[1], c[bestparse].l, c[bestparse].col+1, looking_for);
    } else {
      int i;
      fprintf(stderr, "\"%s\", Line %d, Col %d: Syntax error while looking for %s near ",
                       argv[1], c[bestparse].l, c[bestparse].col+1, looking_for);
      for (i = bestparse; i < bestparse+3; i++) {
        if (i == nextfree) {
          fprintf(stderr, "<End of file>");
          break;
        }
        switch (c[i].t) {
        case TYPE_HEXINT:
          fprintf(stderr, "$"); // *OR* ... We could put the '$' back in front of the string
          /* drop through */    // and probably save much code whenever printing.  Use str+1
        case TYPE_TAG:
        case TYPE_CHAR:
        case TYPE_INT:
        case TYPE_KEYWORD:
          fprintf(stderr, "%s", c[i].s);
          break;
        case TYPE_STRING:
          fprintf(stderr, "\"%s\"", c[i].s);
          break;
        case TYPE_CHARCONST:
          fprintf(stderr, "'%s'", c[i].s);
          break;
        }
        fprintf(stderr, (i == (bestparse+2) ? " ..." : " "));
      }
      fprintf(stderr, "\n");
    }
    exit(1);
  }

  fprintf(stderr, "Parsed OK, now compiling.\n");

  if (debug_parser) walk(0, 0, want_all, print_all); // Diags: print final parse tree

  // The structure at A[] is a concrete syntax tree.  We could
  // generate code directly from this tree, however for the
  // sake of cleaner design, we will first convert the concrete
  // syntax tree into an Abstract Syntax Tree (AST)

  {int program = compile(0, 0);
    // Now generate the output code from the AST.
    // codegen(program, -1, -1, -1)
  }
  exit(0); return(1);
}