// don't pay any attention to this file for now, I'm just trying out some ideas...
void handle(char *s) { char *t; int i; static int N = 0; // ": = _ + R 2 POP_&" i = *s++; s += 1; // i is the opcode index t = opstr[i]; // "= _ + R 2 POP_&" for (;;) { toktype[*s] = TOK_OPERATOR; *t++ = *s++; if (*s == ' ') break; } // " _ + R 2 POP_&" *t = '\0'; s += 1; // "_ + R 2 POP_&" fixity[i] = *s++; s += 1; // "+ R 2 POP_&" if (*s == '+') N++; precedence[i] = N; s += 2; // "R 2 POP_&" associativity[i] = *s++; s += 1; // "2 POP_&" arity[i] = (*s++) - '0'; s += 1; t = mnemonic[i]; // "POP_&" for (;;) { *t++ = *s++; if (*s == ' ' || *s == '\0') break; } // "" if (*s == ' ') {*t = '\0'; s += 1;} rest[i] = s; /* if (i == 'A') fprintf(stderr, "opstr: '%s' fixity: %c precedence: %d assoc: %c arity: %d mnemonic: %s rest: %s\n", opstr[i], fixity[i], precedence[i], associativity[i], arity[i], mnemonic[i], rest[i]); */ } void init_tables(void) { int i = 0; char *s; // this whole procedure could be removed if we used initialized const arrays instead // but this makes the logic easier to follow... while ((s = initdata[i++]) != NULL) handle(s); // operator table toktype[0] = TOK_EOF; toktype['\n'] = TOK_NEWLINE; toktype[' '] = TOK_WHITESPACE; toktype['\t'] = TOK_WHITESPACE; for (i = 'A'; i <= 'Z'; i++) toktype[i] = TOK_ALPHA; for (i = 'a'; i <= 'z'; i++) toktype[i] = TOK_ALPHA; for (i = '0'; i <= '9'; i++) toktype[i] = TOK_NUMBER; } int lookupop(char *s) { int i = 255; strcpy(opstr[0], s); while (strcmp(s, opstr[i]) != 0) i -= 1; return i; } static int pend = -2; int inputchar(void) { int val = nextchar(); if (pend >= 0) pend = -2; return val; } int nextchar(void) { if (pend == -2) pend = getchar(); return pend; } int starts_any_op(char *prefix) { int i; char *full; /* This multiple-opcode tokenizer has a possible quirk: If we have an operator "<" and also "<->" then when we have just read the "<", if the next character is a "-" we go on to read the following character. So "if a <-3" will read a token "<-" which is an error and fail, rather than parsing the statement as "if", "a" "<" "-3", even though it should be unambiguous */ for (i = 0; i < 256; i++) if ((strlen(prefix) <= strlen(full = opstr[i])) && (strncmp(prefix, full, strlen(prefix)))) return 0==0; return 0!=0; } char *get_tok(int *toktypep) { static char buff[128]; // TODO: remember to add length check later char *s = buff; int c = inputchar(), i; if (c != EOF) { *toktypep = toktype[c]; switch (toktype[c]) { case TOK_NUMBER: case TOK_ALPHA: for (;;) { *s++ = c; c = nextchar(); if ((toktype[c] != TOK_NUMBER) && (toktype[c] != *toktypep)) break; c = inputchar(); } break; case TOK_OPERATOR: for (;;) { *s++ = c; *s = '\0'; c = nextchar(); if (toktype[c] != TOK_OPERATOR) break; *s = c; s[1] = '\0'; if (starts_any_op(buff)) { c = inputchar(); } else { *s = '\0'; // cases such as "+(" break; } } for (i = 0; i < 256; i++) if (strcmp(opstr[i], buff) == 0) return strdup(buff); fprintf(stderr, "Bad operator '%s'\n", buff); strcpy(buff, "+"); break; case TOK_NEWLINE: break; case TOK_WHITESPACE: break; default: fprintf(stderr, "Bad character in input: %c\n", c); } } else *toktypep = TOK_EOF; *s = '\0'; return strdup(buff); } int op[32]; // internal operator stack for precedence int top = -1; int nest = -1; // run-time nest depth (we don't know actual values but we // can keep track of the depth of the run-time stack) void operate(int op) { if (op == '\0') {fprintf(stderr, "Bad operator\n"); exit(1);} fprintf(stdout, " %s\n", mnemonic[op]); } void operand(char *s) { if ((nest+1 > 0) && (top != nest)) {fprintf(stderr, "Missing operator before %s [nest=%d top=%d]\n", s, nest, top); exit(1);} nest += 1; fprintf(stdout, " PUSH%s%s\n", (isdigit(*s) ? " #":" "), s); } void operator(int oper) { // CHECK ARITY! if (top+1 != nest) { if (top < 0) fprintf(stderr, "Missing initial operand before %s [nest=%d top=%d]\n", opstr[oper], nest, top); else fprintf(stderr, "Missing operand between %s and %s [nest=%d top=%d]\n", op[top], opstr[oper], nest, top); exit(1); } // RULE 1: a * b + c => (a*b) + c ... do it before we push the next operand // RULE 2: a + b + c => (a+b) + c ... do it before we push the next operand while ((top >= 0) && (precedence[op[top]] >= precedence[oper])) { // was > now >= // 1: stack is now just ... + a*b // 2: stack is now just a + b nest -= 1; /* pop 2 operands, push 1 */ operate(op[top--]); } op[++top] = oper; } void evaluate(void) { if (nest <0 && top < 0) return; /* nothing entered */ if (nest == top) {fprintf(stderr, "Missing operand after final %s\n", op[top]); exit(1);} while (top >= 0) operate(op[top--]); fprintf(stdout, " PRINT\n"); nest = top = -1; }

// don't pay any attention to this file for now, I'm just trying out some ideas...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TOK_NUMBER 1
#define TOK_ALPHA 2
#define TOK_OPERATOR 3
#define TOK_WHITESPACE 4
#define TOK_NEWLINE 5
#define TOK_EOF 6

#define INFIX   '_'
#define PREFIX  '{'
#define POSTFIX '}'
#define OUTFIX  '('
#define MIXFIX  '?'

#define LEFT 1
#define RIGHT 2

// The string table below is just a shorthand way to initialise several tables:
// opstr fixity opcode precedence associativity arity rest

// items are space-delimited
// some thought needed when we get to various flavours of mixfix

static char *initdata[] = {
    ": = _ + R 2 POP_&",  /* assignment */
    "A && _ + L 2 BAND",  /* booleans */
    "O || _ + L 2 BOR",
    "+ + _ + L 2 ADD",    /* arithmetic */
    "- - _ = L 2 SUB",
    "* * _ + L 2 MUL",
    "/ / _ = L 2 DIV",
    "^ ^ _ + R 2 POW",    /* unary */
    "M - { + _ 1 NEG",
    "! ! } + _ 1 CALL_Factorial",
    NULL
};

static int opcode[256];
static char opstr[256][16];
static int fixity[256];
static int precedence[256];
static int associativity[256];
static int arity[256];
static char mnemonic[256][16];
static char *rest[256];

static int toktype[256];

void handle(char *s) {
    char *t;
    int i;
    static int N = 0;
                                                // ": = _ + R 2 POP_&"
    i = *s++; s += 1; // i is the opcode index
    t = opstr[i];                               // "= _ + R 2 POP_&"
    for (;;) {
        toktype[*s] = TOK_OPERATOR;
	*t++ = *s++;
        if (*s == ' ') break;
    }                                           // " _ + R 2 POP_&"
    *t = '\0'; s += 1;                          // "_ + R 2 POP_&"
    fixity[i] = *s++; s += 1;                   // "+ R 2 POP_&"
    if (*s == '+') N++;
    precedence[i] = N; s += 2;                  // "R 2 POP_&"
    associativity[i] = *s++; s += 1;            // "2 POP_&"
    arity[i] = (*s++) - '0'; s += 1;
    t = mnemonic[i];                            // "POP_&"
    for (;;) {
	*t++ = *s++;
        if (*s == ' ' || *s == '\0') break;
    }                                           // ""
    if (*s == ' ') {*t = '\0'; s += 1;}
    rest[i] = s;
    /* if (i == 'A') fprintf(stderr, "opstr: '%s' fixity: %c precedence: %d assoc: %c arity: %d mnemonic: %s rest: %s\n",
                                     opstr[i], fixity[i], precedence[i], associativity[i], arity[i], mnemonic[i], rest[i]); */
}

void init_tables(void) {
    int i = 0;
    char *s;
    // this whole procedure could be removed if we used initialized const arrays instead
    // but this makes the logic easier to follow...
    while ((s = initdata[i++]) != NULL)  handle(s); // operator table
    toktype[0] = TOK_EOF; toktype['\n'] = TOK_NEWLINE;
    toktype[' '] = TOK_WHITESPACE; toktype['\t'] = TOK_WHITESPACE; 
    for (i = 'A'; i <= 'Z'; i++) toktype[i] = TOK_ALPHA;
    for (i = 'a'; i <= 'z'; i++) toktype[i] = TOK_ALPHA;
    for (i = '0'; i <= '9'; i++) toktype[i] = TOK_NUMBER;
}

int lookupop(char *s) {
    int i = 255;
    strcpy(opstr[0], s);
    while (strcmp(s, opstr[i]) != 0) i -= 1;
    return i;
}

static int pend = -2;
int inputchar(void)
{
    int val = nextchar();
    if (pend >= 0) pend = -2;
    return val;
    
}
int nextchar(void)
{
    if (pend == -2) pend = getchar();
    return pend;
}

int starts_any_op(char *prefix)
{
    int i; char *full;
/*  This multiple-opcode tokenizer has a possible quirk:

    If we have an operator "<" and also "<->" then when we have just read the "<", if the next character
    is a "-" we go on to read the following character.  So "if a <-3" will read a token "<-" which is an error
    and fail, rather than parsing the statement as "if", "a" "<" "-3", even though it should be unambiguous
 */

    for (i = 0; i < 256; i++) if ((strlen(prefix) <= strlen(full = opstr[i])) && (strncmp(prefix, full, strlen(prefix)))) return 0==0;
    return 0!=0;
}

char *get_tok(int *toktypep)
{
    static char buff[128]; // TODO: remember to add length check later
    char *s = buff;
    int c = inputchar(), i;

    if (c != EOF) {
        *toktypep = toktype[c];
	switch (toktype[c]) {
	    case TOK_NUMBER:
	    case TOK_ALPHA:
                for (;;) {
		    *s++ = c;
                    c = nextchar();
                    if ((toktype[c] != TOK_NUMBER) && (toktype[c] != *toktypep)) break;
                    c = inputchar();
                }
                break;
	    case TOK_OPERATOR:
                for (;;) {
		    *s++ = c; *s = '\0';
                    c = nextchar();
                    if (toktype[c] != TOK_OPERATOR) break;
                    *s = c; s[1] = '\0';
                    if (starts_any_op(buff)) {
                        c = inputchar();
		    } else {
                        *s = '\0';   // cases such as "+("
                        break;
		    }
                }
                for (i = 0; i < 256; i++) if (strcmp(opstr[i], buff) == 0) return strdup(buff);
                fprintf(stderr, "Bad operator '%s'\n", buff);
                strcpy(buff, "+");
                break;
	    case TOK_NEWLINE:
                break;
	    case TOK_WHITESPACE:
                break;
            default:
                fprintf(stderr, "Bad character in input: %c\n", c);
	}
    } else *toktypep = TOK_EOF;
    *s = '\0';
    return strdup(buff);
}

int op[32]; // internal operator stack for precedence
int top = -1;

int nest = -1; // run-time nest depth (we don't know actual values but we
               //  can keep track of the depth of the run-time stack)

void operate(int op)
{
    if (op == '\0') {fprintf(stderr, "Bad operator\n"); exit(1);}
    fprintf(stdout, "        %s\n", mnemonic[op]);
}

void operand(char *s) {
    if ((nest+1 > 0) && (top != nest)) {fprintf(stderr, "Missing operator before %s [nest=%d top=%d]\n", s, nest, top); exit(1);}
    nest += 1;
    fprintf(stdout, "        PUSH%s%s\n", (isdigit(*s) ? " #":" "), s);
}

void operator(int oper) {
  // CHECK ARITY!
  if (top+1 != nest) {
      if (top < 0) fprintf(stderr, "Missing initial operand before %s [nest=%d top=%d]\n", opstr[oper], nest, top);
      else fprintf(stderr, "Missing operand between %s and %s [nest=%d top=%d]\n", op[top], opstr[oper], nest, top);
      exit(1);
  }
  // RULE 1:  a * b + c => (a*b) + c ... do it before we push the next operand
  // RULE 2:  a + b + c => (a+b) + c ... do it before we push the next operand
  while ((top >= 0) && (precedence[op[top]] >= precedence[oper])) {   // was > now >=
    // 1: stack is now just ... + a*b
    // 2: stack is now just a + b
    nest -= 1; /* pop 2 operands, push 1 */
    operate(op[top--]);
  }
  op[++top] = oper;
}

void evaluate(void) {
  if (nest <0 && top < 0) return; /* nothing entered */
  if (nest == top) {fprintf(stderr, "Missing operand after final %s\n", op[top]); exit(1);}
  while (top >= 0) operate(op[top--]);
  fprintf(stdout, "        PRINT\n");
  nest = top = -1;
}

int main(int argc, char **argv)
{
    char *tok;
    int toktype;

    init_tables();

    for (;;) {
	tok = get_tok(&toktype);
	switch (toktype) {
	    case TOK_NUMBER:      {operand(tok);		    continue;}
	    case TOK_ALPHA:       {operand(tok);		    continue;}
	    case TOK_OPERATOR:    {operator(lookupop(tok));         continue;}
	    case TOK_NEWLINE:     {evaluate();		            continue;}
	    case TOK_EOF:         {evaluate();                      exit(0);}
	    case TOK_WHITESPACE:	                            continue;
	    default:              init_tables(); /* lexer already complained */
        }
    }

    exit(0);
    return(0);
}