// don't pay any attention to this file for now, I'm just trying out some ideas...
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TOK_NUMBER 1
#define TOK_ALPHA 2
#define TOK_OPERATOR 3
#define TOK_WHITESPACE 4
#define TOK_NEWLINE 5
#define TOK_EOF 6
#define INFIX '_'
#define PREFIX '{'
#define POSTFIX '}'
#define OUTFIX '('
#define MIXFIX '?'
#define LEFT 1
#define RIGHT 2
// The string table below is just a shorthand way to initialise several tables:
// opstr fixity opcode precedence associativity arity rest
// items are space-delimited
// some thought needed when we get to various flavours of mixfix
static char *initdata[] = {
": = _ + R 2 POP_&", /* assignment */
"A && _ + L 2 BAND", /* booleans */
"O || _ + L 2 BOR",
"+ + _ + L 2 ADD", /* arithmetic */
"- - _ = L 2 SUB",
"* * _ + L 2 MUL",
"/ / _ = L 2 DIV",
"^ ^ _ + R 2 POW", /* unary */
"M - { + _ 1 NEG",
"! ! } + _ 1 CALL_Factorial",
NULL
};
static int opcode[256];
static char opstr[256][16];
static int fixity[256];
static int precedence[256];
static int associativity[256];
static int arity[256];
static char mnemonic[256][16];
static char *rest[256];
static int toktype[256];
void handle(char *s) {
char *t;
int i;
static int N = 0;
// ": = _ + R 2 POP_&"
i = *s++; s += 1; // i is the opcode index
t = opstr[i]; // "= _ + R 2 POP_&"
for (;;) {
toktype[*s] = TOK_OPERATOR;
*t++ = *s++;
if (*s == ' ') break;
} // " _ + R 2 POP_&"
*t = '\0'; s += 1; // "_ + R 2 POP_&"
fixity[i] = *s++; s += 1; // "+ R 2 POP_&"
if (*s == '+') N++;
precedence[i] = N; s += 2; // "R 2 POP_&"
associativity[i] = *s++; s += 1; // "2 POP_&"
arity[i] = (*s++) - '0'; s += 1;
t = mnemonic[i]; // "POP_&"
for (;;) {
*t++ = *s++;
if (*s == ' ' || *s == '\0') break;
} // ""
if (*s == ' ') {*t = '\0'; s += 1;}
rest[i] = s;
/* if (i == 'A') fprintf(stderr, "opstr: '%s' fixity: %c precedence: %d assoc: %c arity: %d mnemonic: %s rest: %s\n",
opstr[i], fixity[i], precedence[i], associativity[i], arity[i], mnemonic[i], rest[i]); */
}
void init_tables(void) {
int i = 0;
char *s;
// this whole procedure could be removed if we used initialized const arrays instead
// but this makes the logic easier to follow...
while ((s = initdata[i++]) != NULL) handle(s); // operator table
toktype[0] = TOK_EOF; toktype['\n'] = TOK_NEWLINE;
toktype[' '] = TOK_WHITESPACE; toktype['\t'] = TOK_WHITESPACE;
for (i = 'A'; i <= 'Z'; i++) toktype[i] = TOK_ALPHA;
for (i = 'a'; i <= 'z'; i++) toktype[i] = TOK_ALPHA;
for (i = '0'; i <= '9'; i++) toktype[i] = TOK_NUMBER;
}
int lookupop(char *s) {
int i = 255;
strcpy(opstr[0], s);
while (strcmp(s, opstr[i]) != 0) i -= 1;
return i;
}
static int pend = -2;
int inputchar(void)
{
int val = nextchar();
if (pend >= 0) pend = -2;
return val;
}
int nextchar(void)
{
if (pend == -2) pend = getchar();
return pend;
}
int starts_any_op(char *prefix)
{
int i; char *full;
/* This multiple-opcode tokenizer has a possible quirk:
If we have an operator "<" and also "<->" then when we have just read the "<", if the next character
is a "-" we go on to read the following character. So "if a <-3" will read a token "<-" which is an error
and fail, rather than parsing the statement as "if", "a" "<" "-3", even though it should be unambiguous
*/
for (i = 0; i < 256; i++) if ((strlen(prefix) <= strlen(full = opstr[i])) && (strncmp(prefix, full, strlen(prefix)))) return 0==0;
return 0!=0;
}
char *get_tok(int *toktypep)
{
static char buff[128]; // TODO: remember to add length check later
char *s = buff;
int c = inputchar(), i;
if (c != EOF) {
*toktypep = toktype[c];
switch (toktype[c]) {
case TOK_NUMBER:
case TOK_ALPHA:
for (;;) {
*s++ = c;
c = nextchar();
if ((toktype[c] != TOK_NUMBER) && (toktype[c] != *toktypep)) break;
c = inputchar();
}
break;
case TOK_OPERATOR:
for (;;) {
*s++ = c; *s = '\0';
c = nextchar();
if (toktype[c] != TOK_OPERATOR) break;
*s = c; s[1] = '\0';
if (starts_any_op(buff)) {
c = inputchar();
} else {
*s = '\0'; // cases such as "+("
break;
}
}
for (i = 0; i < 256; i++) if (strcmp(opstr[i], buff) == 0) return strdup(buff);
fprintf(stderr, "Bad operator '%s'\n", buff);
strcpy(buff, "+");
break;
case TOK_NEWLINE:
break;
case TOK_WHITESPACE:
break;
default:
fprintf(stderr, "Bad character in input: %c\n", c);
}
} else *toktypep = TOK_EOF;
*s = '\0';
return strdup(buff);
}
int op[32]; // internal operator stack for precedence
int top = -1;
int nest = -1; // run-time nest depth (we don't know actual values but we
// can keep track of the depth of the run-time stack)
void operate(int op)
{
if (op == '\0') {fprintf(stderr, "Bad operator\n"); exit(1);}
fprintf(stdout, " %s\n", mnemonic[op]);
}
void operand(char *s) {
if ((nest+1 > 0) && (top != nest)) {fprintf(stderr, "Missing operator before %s [nest=%d top=%d]\n", s, nest, top); exit(1);}
nest += 1;
fprintf(stdout, " PUSH%s%s\n", (isdigit(*s) ? " #":" "), s);
}
void operator(int oper) {
// CHECK ARITY!
if (top+1 != nest) {
if (top < 0) fprintf(stderr, "Missing initial operand before %s [nest=%d top=%d]\n", opstr[oper], nest, top);
else fprintf(stderr, "Missing operand between %s and %s [nest=%d top=%d]\n", op[top], opstr[oper], nest, top);
exit(1);
}
// RULE 1: a * b + c => (a*b) + c ... do it before we push the next operand
// RULE 2: a + b + c => (a+b) + c ... do it before we push the next operand
while ((top >= 0) && (precedence[op[top]] >= precedence[oper])) { // was > now >=
// 1: stack is now just ... + a*b
// 2: stack is now just a + b
nest -= 1; /* pop 2 operands, push 1 */
operate(op[top--]);
}
op[++top] = oper;
}
void evaluate(void) {
if (nest <0 && top < 0) return; /* nothing entered */
if (nest == top) {fprintf(stderr, "Missing operand after final %s\n", op[top]); exit(1);}
while (top >= 0) operate(op[top--]);
fprintf(stdout, " PRINT\n");
nest = top = -1;
}
int main(int argc, char **argv)
{
char *tok;
int toktype;
init_tables();
for (;;) {
tok = get_tok(&toktype);
switch (toktype) {
case TOK_NUMBER: {operand(tok); continue;}
case TOK_ALPHA: {operand(tok); continue;}
case TOK_OPERATOR: {operator(lookupop(tok)); continue;}
case TOK_NEWLINE: {evaluate(); continue;}
case TOK_EOF: {evaluate(); exit(0);}
case TOK_WHITESPACE: continue;
default: init_tables(); /* lexer already complained */
}
}
exit(0);
return(0);
}