//#define ORIG 1 #define STUPID_COMP_SHORTCUT // macro-process pseudo stack-based assembly language into actual x86 code, and compile with nasm /* http://en.wikipedia.org/wiki/X86_calling_conventions http://unixwiz.net/techtips/win32-callconv-asm.html http://www.codeproject.com/KB/cpp/calling_conventions_demystified.aspx http://www.agner.org/optimize/calling_conventions.pdf - see table 4, register usage eax, ecx and edx may be corrupted inside a function. edx is used for long long results, which I'm not returning, so don't care. I push left->right, not right->left, so incompatible with CDECL at the moment, unless you call in reverse from the program (actually, same as Imp77 on intel does) another pending optimisation: call putchar ;; CALL putchar , 1 , 0 add esp, 4 mov esp, ebp ;; RET 0 pop ebp ret i think any assignment to esp is redundant before a return. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> static char *Progname; FILE *infile, *outfile; #define TRUE (0==0) #define FALSE (0!=0) #define DEBUG_PARSER 1 #define DEBUG_LEXER 1 //#define DEBUG 1 int debug_parser = FALSE; int debug_lexer = FALSE; int opt_optimiser = FALSE; #define sourcefile infile #define curfile "infile" #define MAXLINE 1024 #include "assemble.h" /* C[] is the source character token stream */ typedef struct sourceinfo { // ATOMS for processed input stream char *s; // string contents int l; // lineno int col; // column int t; // type - tag, "string", 'charconst', or char, so far char *f; // source or includefile name } sourceinfo; static sourceinfo c[16*1024]; int nextfree, arraysize; char onecharstr[1024]; int A[16*1024]; int next_free_a, a_size; int cp; // code pointer. Has to be global state. int ap; // Analysis record pointer. // Built-in phrase codes. Must match with grammar file. #define TYPE_EOF 0 #define TYPE_TAG 1 #define TYPE_STRING 2 #define TYPE_CHARCONST 3 #define TYPE_CHAR 4 #define TYPE_INT 5 //#define TYPE_HEXINT 6 #define TYPE_KEYWORD 7 #define TYPE_COMMENT 8 #define TYPE_NEWLINE 9 int lineno = 1, col = 0; int startline = TRUE; #define MAXPOOL (1024*32) char stringpool[MAXPOOL]; int nextstring = 0; static char *itoaf(char *fmt, int i) // unsafe, can't call twice in one statement { static char s[128]; sprintf(s, fmt, i); return s; } static char *f(char *fmt, char *p) // unsafe, can't call twice in one statement { static char s[512]; sprintf(s, fmt, p); return s; } static int next_free_entry = 0; #define MAX_ENTRIES 10000 typedef struct line { char *psect; char *label; char *opcode; char *opd1; char *opd2; char *comment; } line; line program[MAX_ENTRIES]; static char xline[1024]; static int xi = 0, xcur_line = 1; static void xungetc(int c, FILE *f) { if (xi > 0) { xi -= 1; } else if (c == '\n') { xcur_line -= 1; xi = strlen(xline); } ungetc(c, f); } static int xfgetc(FILE *f) { int c, ch; c = fgetc(f); if (c == EOF) return EOF; ch = c&255; if (ch == '\n') { xline[xi] = '\0'; xi = 0; // (void)make_binary_tuple(LINE, xcur_line++, (int)strdup(xline)); } else xline[xi++] = ch; if (xi == 1023) xi = 1022; xline[xi] = '\0'; return c; } int str_to_pool(char *s) { int tag; for (tag = 0; tag <= nextstring; tag++) { if (strcmp(stringpool+tag, s) == 0) { return tag; /* found, one way or another */ } } } void stores(char *s, int lineno, int col, int type, char *fname) { int tag; if (nextstring + strlen(s) + 1 >= MAXPOOL) exit(1); // TODO: add message strcpy(stringpool+nextstring, s); /* Create a backstop for when not found */ tag = str_to_pool(s); if (tag == nextstring) nextstring += strlen(s)+1; /* Not found, add it */ // makespace(c, nextfree, arraysize); c[nextfree].s = stringpool+tag; c[nextfree].l = lineno; c[nextfree].col = col; c[nextfree].f = fname; c[nextfree].t = type; nextfree++; } void storec(int ch, int lineno, int col, int type, char *fname) { onecharstr[ch*2] = ch; onecharstr[ch*2+1] = '\0'; // convert char to 1-char string before saving. stores(&onecharstr[ch*2], lineno, col, type, fname); } /*>*/ int iskeyword(char *s) { int i; for (i = 0; i < MAX_KEYWORD; i++) if (strcmp(s, keyword[i]) == 0) return TRUE; return FALSE; } void line_reconstruction(void) { static int whitespace; static int peek; int ch; /* Pre-process input ready for parsing. Tokens are stored in array C[] */ whitespace = FALSE; for (;;) { ch = xfgetc(sourcefile); if (ch == EOF) break; ch &= 255; // int, positive. peek = xfgetc(sourcefile); xungetc(peek, sourcefile); if (isalpha(ch) || (ch == '_')) { /*< token or keyword */ int nextfree = 0, strsize = 0, startcol = col; char token[1024]; whitespace = FALSE; for (;;) { // makespace(token, nextfree, strsize); if (isalpha(ch) || isdigit(ch) || (ch == '_')) { // digits allowed after 1st char. col++; token[nextfree++] = ch; } else { token[nextfree] = '\0'; xungetc(ch, sourcefile); break; } ch = xfgetc(sourcefile); } stores(token, lineno, startcol, iskeyword(token) ? TYPE_KEYWORD : TYPE_TAG, curfile); //free(token); /*>*/ } else if (isdigit(ch)) { /*< Number */ int nextfree = 0, numsize = 0; char number[1024]; // Store as a string... whitespace = FALSE; for (;;) { // makespace(number, nextfree, numsize); if (isdigit(ch)) { col++; number[nextfree++] = ch; } else { number[nextfree] = '\0'; xungetc(ch, sourcefile); break; } ch = xfgetc(sourcefile); } stores(number, lineno, col, TYPE_INT, curfile); //free(number); /*>*/ } else switch (ch) { case '\'': // Handle 'c' character constants case '"': // Handle "string" /*< literals */ { int nextfree = 0, strsize = 0, quotech = ch; char string[1024]; whitespace = FALSE; col++; for (;;) { ch = xfgetc(sourcefile); // Newlines are allowed col++; // makespace(string, nextfree, strsize); if (ch == '\\') { ch = xfgetc(sourcefile); col++; if (ch == '\\') { string[nextfree++] = ch; } else if (ch == '\'') { string[nextfree++] = '\''; } else if (ch == '"') { string[nextfree++] = '"'; } else if (ch == 'n') { string[nextfree++] = '\n'; } else if (ch == 'r') { string[nextfree++] = '\r'; } else if (ch == 't') { string[nextfree++] = '\t'; } else { // Warn of unknown (to me) \x escape. Probably an error. string[nextfree++] = '\\'; string[nextfree++] = ch; } } else if (ch != quotech) { string[nextfree++] = ch; } else { string[nextfree] = '\0'; break; } } if (quotech == '\'') { if (strlen(string) == 1) { } else if (strlen(string) <= 4) { // Warn that 'xx' as a 32-bit int is a non-standard extension } else { // Warn that this is probably a string with the wrong type of quote. } } stores(string, lineno, col, (quotech == '\'' ? TYPE_CHARCONST : TYPE_STRING), curfile); //free(string); } break; /*>*/ // this parser is line-at-a-time. If it was whole-file, we wouldn't return here case '\n': lineno++; startline = TRUE; col = 0; whitespace = TRUE; storec(ch, lineno, col++, TYPE_NEWLINE, curfile); return; case '\t': case ' ': col++; // Does not affect whitespace break; default: whitespace = FALSE; storec(ch, lineno, col++, TYPE_CHAR, curfile); } } // set up a dummy at the end because we sometimes look ahead by 1 // in the parsing code and don't want to hit uninitialised data. c[nextfree].t = TYPE_EOF; c[nextfree].s = "<EOF>"; c[nextfree].l = lineno; c[nextfree].col = col; /*>*/ } // ============================== code generation section ============================ int pos(int i) { if (i < 0) return 0; return i; } static int access = TRUE; static char *hack = NULL; // source static char cur_psect[128] = { '\0' }; char *strdup_or_null(const char *s) { char *t; if (s == NULL) return(NULL); t = strdup(s); if (t == NULL) { fprintf(stderr, "Won't\n"); exit(2); } return t; } int eq(const char *s, const char *t) { if (s == t) return(TRUE); // includes if both NULL if ((s == NULL) || (t == NULL)) return(FALSE); return (strcmp(s, t) == 0); } int isnull(const char *s) { if (s == NULL) return(TRUE); if (*s == '\0') return(TRUE); return(FALSE); } // "CMPGE", "CMPLT", "CMPLE", "CMPGT", "CMPEQ", "CMPNE"; static char *cond[6] = { "ge", "l", "le", "g", "e", "ne" }; static char *jcond[6] = { "jge", "jl", "jle", "jg", "je", "jne" }; char *inverse_jump(char *jump) { int i; for (i = 0; i < 7; i++) { if (strcmp(jcond[i], jump) == 0) return(jcond[i^1]); } fprintf(stderr, "Coding error. Is there a new opcode beginning with 'j' ???\n"); } int iscondjump(char *jump) { int i; for (i = 0; i < 7; i++) { if (strcmp(jcond[i], jump) == 0) return TRUE; } return FALSE; } static char *spacestr = " "; static char *spaces = NULL; void plant(const char *psect, const char *label, const char *opcode, const char *opd1, const char *opd2, char *comment) { int elide = FALSE; // using strings, and especially multiple heap strings, is scandalously inefficient. if (next_free_entry == MAX_ENTRIES) { fprintf(stderr, "Won't\n"); exit(1); } if (spaces == NULL) spaces = spacestr + strlen(spacestr); // points to \0 if (hack != NULL) { static char c2[256]; if (comment == NULL) { sprintf(c2, "; %s", hack); } else { sprintf(c2, "; %s%s ; %s", hack, spaces-pos(11-strlen(hack)), comment); } comment = c2; hack = NULL; } if (eq(psect, "text")) { if (isnull(label)) { if (!access) elide = TRUE; } else { access = TRUE; } if (eq(opcode, "jmp") || eq(opcode, "ret")) access = FALSE; // for *following* code, not this line. } // other psects don't affect access if (!elide) { // previously we commented these out; now we just drop them silently program[next_free_entry].psect = strdup_or_null(psect); program[next_free_entry].label = strdup_or_null(label); program[next_free_entry].opcode = strdup_or_null(opcode); program[next_free_entry].opd1 = strdup_or_null(opd1); program[next_free_entry].opd2 = strdup_or_null(opd2); program[next_free_entry].comment = strdup_or_null(comment); next_free_entry += 1; } } void print_comment(char *t) { char *s; for (;;) { s = strstr(t, ";;"); if (s == NULL) break; *s = '\0'; s += 2; fprintf(outfile, "%s\n ;;", t); t = s; } fprintf(outfile, "%s", t); } void reap(const char *psect, const char *label, const char *opcode, const char *opd1, const char *opd2, char *comment) { char opc[128]; int i; *opc = '\0'; if (opcode != NULL) sprintf(opc, "%s", opcode); if (spaces == NULL) spaces = spacestr + strlen(spacestr); // points to \0 if (strcmp(psect, cur_psect) != 0) { strcpy(cur_psect, psect); fprintf(outfile, "%sSECTION .%s\n", spaces-12, psect); } if ((label != NULL) && (*label != '\0')) { i = 10-strlen(label); fprintf(outfile, "%s:%s ", label, spaces-pos(i)); } else { fprintf(outfile, "%s", spaces-12); } // if (elide) fprintf(outfile, ";"); if (isalpha(opc[0]) && isupper(opc[0])) opc[0] = tolower(opc[0]); i = 6-strlen(opc); fprintf(outfile, "%s %s", opc, spaces-pos(i)); fprintf(outfile, "%s", opd1); i = strlen(opd1); if (opd2 != NULL) { i += 2+strlen(opd2); fprintf(outfile, ", %s", opd2); } i = 30-i; fprintf(outfile, "%s ", spaces-pos(i+1)); if ((comment != NULL) && (*comment != '\0')) { if (*comment == ';') { fprintf(outfile, ";"); } else { fprintf(outfile, " ; "); } print_comment(comment); //fprintf(outfile, "%s", comment); } fprintf(outfile, "\n"); } void backcomment(int source, int target) { static char comment[1024]; if (program[target].comment == NULL) { program[target].comment = program[source].comment; return; } if (program[source].comment == NULL) return; sprintf(comment, "%s ;%s", program[target].comment, program[source].comment); program[target].comment = strdup(comment); program[source].comment = NULL; // for wipeout } void wipeout(int line, int count) { int i = line + count; for (i = line; i < line+count; i++) { if (!isnull(program[i].opcode)) { fprintf(stderr, "Won't: '%s'\n", program[i-count].opcode); exit(3); // trying to wipe out a non-empty line. } } while (i < next_free_entry) { program[i-count] = program[i]; // gcc, copy entire record contents i += 1; } next_free_entry -= count; } void optimise(void) { int i; static char tmp[128]; // some of these optimisations are overly precise in which patterns they match; it's safer // to start narrow and then widen an existing rule by adding more options (eg adding "and" // to a test for "add") than it is to assume from the start that any arithmetic op is valid // at that point... for example, you could include "add", and "sub", forgetting that sub // is not commutative. It doesn't matter if you omit a tiny number of optimisation // opportunities, but it matters a lot if you get even one wrong. // this style of optimising make several assumptions about the style of code being // generated, for instance that registers are loaded once and then used once, but not // remembered. Once we start doing register remembering, some of these optimisations // could break, so we need to structure our optimisations into sequential passes, with // known assumptions for each pass. i = 0; for (;;) { if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file) // LABELS NECESSARY: // sequences of 2 instructions + label following: if (eq(program[i].psect, "text") && eq(program[i+1].psect, "text") && isnull(program[i+1].label) && eq(program[i+2].psect, "text") && (!isnull(program[i+2].label)) ) { if ( (program[i].opcode[0] == 'j' ) && (!eq(program[i].opcode, "jmp")) && (strncmp(program[i].opd1, "F_", 2) == 0) && eq(program[i+1].opcode, "jmp") && eq(program[i].opd1, program[i+2].label) ) { // sequences of 4 instructions: /* je F_1118 ;; BF F_1118 jmp nq ;; B nq F_1118: ;; F_1118 : => jne nq */ program[i].opcode = inverse_jump(program[i].opcode); program[i].opd1 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+2].label = ""; // safe assumptions as F_ labels only ever used once backcomment(i+1, i); wipeout(i+1, 1); if (isnull(program[i+1].opcode) && isnull(program[i+1].opd1) && isnull(program[i+1].opd2)) { backcomment(i+1, i); wipeout(i+1, 1); } i -= 10; if (i < 0) i = 0; continue; } } // UNBROKEN BY LABELS: // sequences of 5 instructions: if (eq(program[i].psect, "text") && eq(program[i+1].psect, "text") && isnull(program[i+1].label) && eq(program[i+2].psect, "text") && isnull(program[i+2].label) && eq(program[i+3].psect, "text") && isnull(program[i+3].label) && eq(program[i+4].psect, "text") && isnull(program[i+4].label) ) { if ( eq(program[i].opcode, "push") && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && eq(program[i+2].opcode, "add") && eq(program[i+2].opd1, "eax") && eq(program[i+3].opcode, "mov") && eq(program[i+3].opd1, "ecx") && eq(program[i+4].opcode, "pop") && eq(program[i+4].opd1, "edx") ) { /* push dword [ci] ;; PUSH ci ;; PUSH & c ; note param is rel to ebp in procedures mov eax, dword [text] ;; PUSH text add eax, dword 1 ;; PUSH # 1 ;; ADD ;; POPI 4 mov ecx, c pop edx => mov edx, dword [ci] ;; PUSH ci ;; PUSH & c ; note param is rel to ebp in procedures mov eax, dword [text] ;; PUSH text add eax, dword 1 ;; PUSH # 1 ;; ADD ;; POPI 4 mov ecx, c */ program[i].opcode = "mov"; // convert pop to a move program[i].opd2 = program[i].opd1; program[i].opd1 = "edx"; program[i+4].opcode = ""; // remove the push program[i+4].opd1 = ""; program[i+4].opd2 = ""; backcomment(i+4, i+3); wipeout(i+4, 1); i -= 10; if (i < 0) i = 0; continue; } } // sequences of 4 instructions: if (eq(program[i].psect, "text") && eq(program[i+1].psect, "text") && isnull(program[i+1].label) && eq(program[i+2].psect, "text") && isnull(program[i+2].label) && eq(program[i+3].psect, "text") && isnull(program[i+3].label) ) { if ((eq(program[i].opcode, "push") ) && (eq(program[i+1].opcode, "push") ) && (eq(program[i+2].opcode, "pop") ) && ((eq(program[i+2].opd1, "eax") ) || (eq(program[i+2].opd1, "ecx") )) && (eq(program[i+3].opcode, "pop") ) && ((eq(program[i+3].opd1, "eax") ) || (eq(program[i+3].opd1, "ecx") )) ) { /* push A push B pop eax pop ecx => mov eax, A mov ecx, B */ program[i].opcode = "mov"; program[i].opd2 = program[i].opd1; program[i].opd1 = program[i+3].opd1; program[i+1].opcode = "mov"; program[i+1].opd2 = program[i+1].opd1; program[i+1].opd1 = program[i+2].opd1; program[i+2].opcode = ""; program[i+2].opd1 = ""; backcomment(i+2, i+1); program[i+3].opcode = ""; program[i+3].opd1 = ""; backcomment(i+3, i+1); wipeout(i+2, 2); i -= 10; if (i < 0) i = 0; continue; } if ((eq(program[i].opcode, "push") ) && (eq(program[i+1].opcode, "mov") ) && (!eq(program[i+1].opd1, "ecx")) && (eq(program[i+2].opcode, "add") || eq(program[i+2].opcode, "sub") || eq(program[i+2].opcode, "and")) && (!eq(program[i+1].opd1, "ecx") ) && (eq(program[i+3].opcode, "pop") && (eq(program[i+3].opd1, "ecx"))) ) { /* push c ;; PUSH & c ; note param is rel to ebp in procedures mov eax, dword [ci] ;; PUSH ci add eax, dword 2 ;; PUSH # 2 ;; ADD ;; PUSHI 4 pop ecx => ;; PUSH & c ; note param is rel to ebp in procedures mov eax, dword [ci] ;; PUSH ci add eax, dword 2 ;; PUSH # 2 ;; ADD ;; PUSHI 4 mov ecx, c */ program[i+3].opcode = "mov"; // convert pop to a move program[i+3].opd2 = program[i].opd1; program[i].opcode = ""; // remove the push program[i].opd1 = ""; program[i].opd2 = ""; if (i > 0) backcomment(i, i-1); wipeout(i, 1); i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "push") && /*eq(program[i].opd1, "dword [ecx+eax*4]") &&*/ eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") && eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i+3].opcode, "pop") && eq(program[i+3].opd1, "edx") ) { /* push dword [ecx+eax*4] mov ecx, a ;; PUSH & a ; note param is rel to ebp in procedures mov eax, dword [pp] ;; PUSH pp ;; POPI 4 pop edx => mov edx, dword [ecx+eax*4] mov ecx, a ;; PUSH & a ; note param is rel to ebp in procedures mov eax, dword [pp] ;; PUSH pp ;; POPI 4 similarly for push dword [v] ;; PUSH v mov ecx, c ;; PUSH & c ; note param is rel to ebp in procedures mov eax, dword [ci] ;; PUSH ci ;; POPI 4 pop edx */ program[i].opcode = "mov"; // convert pop to a move program[i].opd2 = program[i].opd1; program[i].opd1 = "edx"; program[i+3].opcode = ""; // remove the push program[i+3].opd1 = ""; program[i+3].opd2 = ""; backcomment(i+3, i+2); wipeout(i+3, 1); i -= 10; if (i < 0) i = 0; continue; } } // sequences of 3 instructions: if (eq(program[i].psect, "text") && eq(program[i+1].psect, "text") && isnull(program[i+1].label) && eq(program[i+2].psect, "text") && isnull(program[i+2].label) ) { if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (eq(program[i+1].opcode, "sub") || eq(program[i+1].opcode, "add")) && eq(program[i+1].opd1, "ecx") && eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i+2].opd2, "ecx") ) { /* mov ecx, dword [ci] ;; PUSH ci sub ecx, dword 3 ;; PUSH # 3 ;; SUB mov eax, ecx ;; PUSHI 4 -> mov eax, dword [ci] ;; PUSH ci sub eax, dword 3 ;; PUSH # 3 ;; SUB ;; PUSHI 4 */ program[i].opd1 = "eax"; program[i+1].opd1 = "eax"; program[i+2].opcode = ""; program[i+2].opd1 = ""; program[i+2].opd2 = ""; backcomment(i+2, i+1); wipeout(i+2, 1); i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (eq(program[i+1].opcode, "sub") || eq(program[i+1].opcode, "add")) && eq(program[i+1].opd1, "ecx") && (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()? eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, program[i].opd2) && eq(program[i+2].opd2, "ecx") ) { /* mov ecx, dword [fp] ;; PUSH fp add ecx, dword 1 ;; PUSH # 1 (or any constant) ;; ADD mov dword [fp], ecx ;; POP fp => add dword [fp], dword 1 */ // *POSSIBLY* decide not to do this optimisation if program[i].opd2 is referred to anywhere // in the opd2 fields of the next 4 or 5 instructions, ie avoid a redundant memory access // if the value can stay in a register until needed again shortly. [TODO] // THIS BREAKS if a subsequent instruction has already been optimised to use the cached version of // ecx which this originally left behind. It's really the other optimisation that's wrong. int next; for (next = i+3; next < i+8; next++) { if (!isnull(program[next].label)) break; if (eq(program[i].opd2, program[next].opd2) || eq(program[i].opd2, program[next].opd1)) { goto skip; } } program[i+2].opcode = program[i+1].opcode; program[i+2].opd2 = program[i+1].opd2; program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL; program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL; backcomment(i, i-1); backcomment(i+1, i-1); wipeout(i, 2); i -= 10; if (i < 0) i = 0; continue; skip:; // PROTECT: program[i].opcode = "Mov"; //program[i+2].opcode = "Mov"; } if (eq(program[i].opcode, "mov") && (strncmp(program[i].opd1, "dword [", 7) == 0) && eq(program[i].opd2, "ecx") && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") && (!eq(program[i+1].opd2, "eax")) && (!eq(program[i+1].opd2, "ecx")) && eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i].opd1, program[i+2].opd2) ) { /* knock on effect of avoiding store-to-store increment when memory address reused soon after: mov dword [pp], ecx ;; POP pp mov ecx, a ;; PUSH & a mov eax, dword [pp] ;; PUSH pp => mov dword [pp], ecx ;; POP pp mov eax, ecx ;; PUSH & a mov ecx, a ;; PUSH pp */ char *temp = program[i+1].opd2; program[i].opcode = "Mov"; // stop this opcode being absorbed by the previous two opcodes // and becoming part of an: add dword[loc], dword 1 program[i+1].opd2 = program[i+1].opd1; program[i+1].opd1 = program[i+2].opd1; program[i+2].opd1 = program[i+1].opd2; program[i+2].opd2 = temp; // i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && eq(program[i+2].opcode, "mov") && ((eq(program[i+2].opd1, "[ecx+eax*4]") && eq(program[i+2].opd2, "edx")) || (eq(program[i+2].opd1, "edx") && eq(program[i+2].opd2, "dword [ecx+eax*4]"))) ) { /* Start this optimisation with a simple reordering: mov ecx, stored ;; PUSH & stored mov eax, dword [i] ;; PUSH i ;; POPI 4 mov [ecx+eax*4], edx => mov eax, dword [i] ;; PUSH i ;; POPI 4 mov ecx, stored ;; PUSH & stored mov [ecx+eax*4], edx Also works for: mov ecx, a ;; PUSH & a mov eax, dword [fp] ;; PUSH fp ;; PUSHI 4 mov edx, dword [ecx+eax*4] */ char *temp; // swap opd1; temp = program[i].opd1; program[i].opd1 = program[i+1].opd1; program[i+1].opd1 = temp; // swap opd2: temp = program[i].opd2; program[i].opd2 = program[i+1].opd2; program[i+1].opd2 = temp; i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "edx") && (eq(program[i].opd2, "ecx") || (strncmp(program[i].opd2, "dword ", 6) == 0)) && (!eq(program[i].opd2 + strlen(program[i].opd2) - strlen("+eax*4]"), "+eax*4]")) && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && (!eq(program[i+1].opd1, "edx")) && eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1 + strlen(program[i+2].opd1) - strlen("+eax*4]"), "+eax*4]") && eq(program[i+2].opd2, "edx") ) { /* Add a second simple reordering: mov edx, dword [nl] ;; PUSH nl mov eax, dword [fp] ;; PUSH & a ;; PUSH fp ;; POPI 4 mov [a+eax*4], edx => mov eax, dword [fp] ;; PUSH & a ;; PUSH fp ;; POPI 4 mov edx, dword [nl] ;; PUSH nl mov [a+eax*4], edx */ char *temp; // swap opd1; temp = program[i].opd1; program[i].opd1 = program[i+1].opd1; program[i+1].opd1 = temp; // swap opd2: temp = program[i].opd2; program[i].opd2 = program[i+1].opd2; program[i+1].opd2 = temp; i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") && (eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") && (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()? eq(program[i+2].opcode, "mov") && eq(program[i+2].opd2 + strlen(program[i+2].opd2) - strlen("+eax*4]"), "+eax*4]") ) { // quite a nice one, given this isn't being done at the AST level... /* mov eax, dword [text] ;; PUSH text add/sub eax, dword 1 ;; PUSH # 1 mov eax, dword [c+eax*4] => mov eax, dword [text] ;; PUSH text mov eax, dword [c+eax*4+1*4] */ char *temp; sprintf(tmp, "%s", program[i+2].opd2); temp = strrchr(tmp, ']'); sprintf(temp, "%c%s*4]", (eq(program[i+1].opcode, "add") ? '+' : '-'), program[i+1].opd2+6); // NO - REMOVE "dword " FIRST, AND GET "4" FACTOR FROM ORIG STRING. program[i+2].opd2 = strdup(tmp); program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") && (eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") && (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()? eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1 + strlen(program[i+2].opd1) - strlen("+eax*4]"), "+eax*4]") ) { // same as above, but 3rd instr operands swapped /* mov eax, dword [text] ;; PUSH text add eax, dword 1 ;; PUSH # 1 ;; ADD ;; POPI 4 mov [c+eax*4], edx */ char *temp; sprintf(tmp, "%s", program[i+2].opd1); temp = strrchr(tmp, ']'); sprintf(temp, "%c%s*4]", (eq(program[i+1].opcode, "add") ? '+' : '-'), program[i+1].opd2+6); // NO - REMOVE "dword " FIRST, AND GET "4" FACTOR FROM ORIG STRING. program[i+2].opd1 = strdup(tmp); program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "call") && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "[esp]") && eq(program[i+1].opd2, "eax") && eq(program[i+2].opcode, "pop") && eq(program[i+2].opd1, "eax") ) { /* call rfib1 ;; CALL rfib1 , 1 , 1 mov [esp], eax pop eax ;; ADD => call rfib1 ;; CALL rfib1 , 1 , 1 add esp, 4 */ program[i+1].opcode = "add"; program[i+1].opd1 = "esp"; program[i+1].opd2 = "4"; program[i+2].opcode = ""; program[i+2].opd1 = ""; program[i+2].opd2 = ""; backcomment(i+2, i+1); wipeout(i+2, 1); i -= 10; if (i < 0) i = 0; continue; } } // sequences of 2 instructions: if ((eq(program[i].psect, "text") ) && (eq(program[i+1].psect, "text") ) && isnull(program[i+1].label) ) { if ((eq(program[i].opcode, "push") ) && ((eq(program[i].opd1, "eax") ) || (eq(program[i].opd1, "ecx") )) && (eq(program[i+1].opcode, "pop") ) ) { // sequences of 2 instructions: /* push eax; pop fred => mov eax, fred */ program[i].opcode = "mov"; program[i].opd2 = program[i].opd1; program[i].opd1 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd1 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if ( (eq(program[i].opcode, "push") && (strncmp(program[i].opd1, "dword [", 7) == 0)) && (eq(program[i+1].opcode, "pop") && (strncmp(program[i+1].opd1, "dword [", 7) == 0)) ) { // stack-based design of code generator ensures that push followed by pop is never // done anywhere that a register holds any significant content program[i].opcode = "mov"; program[i].opd2 = program[i].opd1; program[i].opd1 = "eax"; // ANY REGISTER WILL DO! program[i+1].opcode = "mov"; program[i+1].opd2 = program[i].opd1; i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "push") && (eq(program[i].opd1, "eax") || eq(program[i].opd1, "ecx") || eq(program[i].opd1, "edx")) && eq(program[i+1].opcode, "pop") && (strncmp(program[i].opd1, "dword [", 7) == 0) ) { /* push edx pop dword [xrem] ;; POP xrem => mov dword [xrem], edx */ program[i].opcode = "mov"; program[i].opd2 = program[i].opd1; program[i].opd1 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd2 = ""; program[i+1].opd2 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "push") && (strncmp(program[i].opd1, "dword ", 6) == 0) && (program[i].opd1[6] != '[') && eq(program[i+1].opcode, "pop") ) { // push dword 0 ;; PUSH # 0 // pop eax ;; RET 1 ; pass result back in register eax // stack-based design of code generator ensures that push followed by pop is never // done anywhere that a register holds any significant content program[i].opcode = "mov"; program[i].opd2 = program[i].opd1; program[i].opd1 = program[i+1].opd1; #ifdef NEVER {// use this snippet when testing changes static char x[1024]; if (program[i].comment == NULL) { sprintf(x, "CHECK HERE"); } else { sprintf(x, "%s ;; CHECK HERE", program[i].comment); } program[i].comment = strdup(x); } #endif program[i+1].opcode = ""; program[i+1].opd1 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "push") && eq(program[i+1].opcode, "pop") && (eq(program[i+1].opd1, "eax") || eq(program[i+1].opd1, "ecx") || eq(program[i+1].opd1, "edx")) ) { // push dword 0 ;; PUSH # 0 // pop eax ;; RET 1 ; pass result back in register eax /* push dword [fp] ;; PUSH fp pop eax ;; RET 1 ; pass result back in register eax */ // stack-based design of code generator ensures that push followed by pop is never // done anywhere that a register holds any significant content program[i].opcode = "mov"; program[i].opd2 = program[i].opd1; program[i].opd1 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd1 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "add") && eq(program[i].opd1, "esp") && eq(program[i].opd2, "4") && eq(program[i+1].opcode, "push") && (strncmp(program[i+1].opd1, "dword [", 7) != 0) ) { // successive procedure calls ( > 1 arg is a logical extension ...) /* NOTE: due to addressing limitations, the followingt *cannot* be optimised: push dword [code] ;; PUSH code call stack ;; CALL stack , 1 , 0 add esp, 4 push dword [text] ;; PUSH text call stack ;; CALL stack , 1 , 0 add esp, 4 push dword [num] ;; PUSH num call stack ;; CALL stack , 1 , 0 add esp, 4 mov ecx, dword [ci] ;; PUSH ci */ program[i].opcode = "mov"; program[i].opd1 = "[esp]"; program[i].opd2 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd1 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "add") && eq(program[i].opd1, "ecx") && eq(program[i].opd2, "eax") && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "ecx") ) { // sequences of 2 instructions: /* add ecx, eax mov eax, ecx -> add ecx, eax */ char *swap = program[i].opd2; program[i].opd2 = program[i].opd1; program[i].opd1 = swap; program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") && (eq(program[i+1].opcode, "cmp") || eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "ecx") && eq(program[i+1].opd2, "eax") ) { /* mov eax, dword 'z' ;; PUSH # 'z' ;; CMPGT cmp ecx, eax -> cmp ecx, dword 'z' */ /* mov ecx, eax ; transfer fn result to the stack cmp ecx, dword 0 ;; PUSH # 0 ;; CMPLT */ program[i].opcode = program[i+1].opcode; program[i].opd1 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (eq(program[i+1].opcode, "and") || eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "ecx") ) { /* mov ecx, dword 95 ;; PUSH # 95 ;; BAND and eax, ecx -> and eax, dword 95 */ program[i].opcode = program[i+1].opcode; program[i].opd1 = program[i+1].opd1; program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && eq(program[i].opd2, "eax") && eq(program[i+1].opcode, "cmp") && eq(program[i+1].opd1, "ecx") ) { /* mov ecx, eax ; transfer fn result to the stack cmp ecx, dword 0 ;; PUSH # 0 ;; CMPLT -> cmp eax, dword 0 */ program[i].opcode = program[i+1].opcode; program[i].opd1 = program[i].opd2; program[i].opd2 = program[i+1].opd2; program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd2, "eax") && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") && eq(program[i].opd1, program[i+1].opd2) ) { /* mov dword [i], eax ;; POP i mov ecx, dword [i] ;; PUSH i => mov dword [i], eax ;; POP i mov ecx, eax if i.opd1 == i+1.opd2 - trivial case of register remembering */ program[i].opcode = "Mov"; // PROTECT IT FROM BEING MERGED BY AN "inc" program[i+1].opd2 = program[i].opd2; // trivial case of register remembering if (eq(program[i+1].opd1, program[i+1].opd2)) { // although this may be worth its own entry below! program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = ""; backcomment(i+1, i); wipeout(i+1, 1); } i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && (eq(program[i].opd2, "eax") || eq(program[i].opd2, "ecx")) && eq(program[i+1].opcode, "mov") && (eq(program[i+1].opd1, "eax") || eq(program[i+1].opd1, "ecx")) && eq(program[i].opd2, program[i+1].opd1) && eq(program[i].opd1, program[i+1].opd2) ) { /* mov dword [i], eax ;; POP i mov eax, dword [i] ;; PUSH i => mov dword [i], eax if i.opd1 == i+1.opd2 - trivial case of register remembering */ program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = ""; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) == 0) && (program[i].opd2[6] != '[') && (program[i].opd2[6] != '-') && (eq(program[i+1].opcode, "shr") || eq(program[i+1].opcode, "shl")) && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "cl") ) { /* mov ecx, dword 7 ;; PUSH # 7 ;; RSH shr eax, cl => shr eax, dword 7 */ program[i].opcode = program[i+1].opcode; program[i].opd1 = "eax"; sprintf(tmp, "byte %s", program[i].opd2+6); program[i].opd2 = strdup(tmp); program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL; backcomment(i+1, i); wipeout(i+1, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "[ecx+eax*4]") && eq(program[i+1].opd2, "edx") ) { /* mov ecx, c ;; PUSH ci ;; POPI 4 mov [ecx+eax*4], edx ;; PUSH ci => mov [c+eax*4], edx */ sprintf(tmp, "[%s+eax*4]", program[i].opd2); program[i+1].opd1 = strdup(tmp); program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = ""; backcomment(i, i-1); wipeout(i, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) && eq(program[i+1].opcode, "mov") && eq(program[i+1].opd2, "dword [ecx+eax*4]") ) { // now the reverse version /* mov ecx, a mov eax, dword [ecx+eax*4] => mov eax, [a+eax*4] */ sprintf(tmp, "dword [%s+eax*4]", program[i].opd2); program[i+1].opd2 = strdup(tmp); program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = ""; backcomment(i, i-1); wipeout(i, 1); i -= 10; if (i < 0) i = 0; continue; } if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "edx") && (strncmp(program[i].opd2, "dword [", 7) != 0) && // try it, and see what combinations are rejected? eq(program[i+1].opcode, "mov") && (eq(program[i+1].opd1 + strlen(program[i+1].opd1) - strlen("+eax*4]"), "+eax*4]")) && eq(program[i+1].opd2, "edx") ) { /* mov edx, ecx ;; PUSH & a ;; PUSH fp ;; POPI 4 mov [a+eax*4], edx => mov [a+eax*4], ecx */ program[i+1].opd2 = program[i].opd2; program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = ""; backcomment(i, i-1); wipeout(i, 1); i -= 10; if (i < 0) i = 0; continue; } } // single instructions: if (eq(program[i].opcode, "mov") && eq(program[i].opd1, program[i].opd2)) { program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL; backcomment(i, i-1); wipeout(i, 1); i -= 10; if (i < 0) i = 0; continue; } i += 1; } // now we do optimisations that break the default assumptions relied on above // When these are done, you have to be very careful what you peephole afterwards i = 0; for (;;) { if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file) if (eq(program[i].opcode, "mov") && eq(program[i+1].opcode, "cmp") && isnull(program[i+1].label) && iscondjump(program[i+2].opcode) && isnull(program[i+2].label) && eq(program[i].opd1, program[i+1].opd1) ) { int next = i+3, optimised = FALSE; while (eq(program[next].opcode, "mov") && isnull(program[next].label) && eq(program[next].psect, "text") && eq(program[next+1].opcode, "cmp") && isnull(program[next+1].label) && iscondjump(program[next+2].opcode) && isnull(program[next+2].label) && eq(program[i].opd1, program[next].opd1) && eq(program[next].opd1, program[next+1].opd1) && eq(program[i].opd2, program[next].opd2) ) { program[next].opcode = ""; program[next].opd1 = ""; program[next].opd2 = NULL; backcomment(next, next-1); wipeout(next, 1); next = next + 2; optimised = TRUE; } if (optimised) { program[i].opcode = "Mov"; // i -= 10; if (i < 0) i = 0; continue; } } i += 1; } i = 0; for (;;) { if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file) if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && eq(program[i+1].opcode, "cmp") && eq(program[i+1].opd1, "ecx") && (strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && (program[i+1].opd2[6] != '-') ) { /* mov ecx, dword [code] ;; PUSH code cmp ecx, dword 'F' ;; PUSH # 'F' => cmp dword [code], dword 'F' This optimisation should be done *after* the skip chains */ program[i+1].opd1 = program[i].opd2; program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL; backcomment(i, i-1); wipeout(i, 1); i -= 10; if (i < 0) i = 0; continue; } i += 1; } } void dump(void) { int i; plant("text", "__end_of_program", "", "", NULL, "End of program"); if (opt_optimiser) { plant("text", "", "", "", NULL, "(a few dummies for the optimizer's fenceposting)"); plant("text", "", "", "", NULL, "..."); plant("text", "", "", "", NULL, "..."); plant("text", "", "", "", NULL, "..."); plant("text", "", "", "", NULL, "..."); optimise(); } // Should print all .bss and .data entries first, leaving only .text // - this will allow optimising over instruction sequences which were broken // by inline data. Remember to wipeout the deleted entries. for (i = 0; i < next_free_entry; i++) { if (eq(program[i].label, "__end_of_program")) break; if (eq(program[i].psect, "data")) reap( program[i].psect, program[i].label, program[i].opcode, program[i].opd1, program[i].opd2, program[i].comment ); } for (i = 0; i < next_free_entry; i++) { if (eq(program[i].label, "__end_of_program")) break; if (eq(program[i].psect, "bss")) reap( program[i].psect, program[i].label, program[i].opcode, program[i].opd1, program[i].opd2, program[i].comment ); } for (i = 0; i < next_free_entry; i++) { if (eq(program[i].label, "__end_of_program")) break; if (eq(program[i].psect, "text")) reap( program[i].psect, program[i].label, program[i].opcode, program[i].opd1, program[i].opd2, program[i].comment ); } } // =================================================================================== int bestparse; // for error reporting. char *looking_for; // 'while looking for <PHRASENAME>' (or literal) ... void indent(int depth, FILE *f) { int i; for (i = 0; i < depth; i++) fprintf(f, " "); } int startparse(int pp) // depth is only for indentation in diags { bestparse = -1; // for error reporting. looking_for = "<UNKNOWN>"; // 'while looking for <PHRASENAME>' (or literal) ... parse(pp, 0); } static int nexttmp = 1; int parse(int pp, int depth) // depth is only for indentation in diags { /* Main parsing procedure. This is a table-driven parser, with the tables being generated from the grammar rules embedded in the 'compile' procedure below. The result of the parse is a tree structure, and the values of the nodes in the tree structure are used to drive a large 'case' statement which selects the actions to be performed after a successful parse. There is no grammatical structure embedded in this procedure. If you're looking for the grammar definition, see the procedure called 'compile' instead. */ int saved_cp, saved_ap, i, gp, alts, match; char saved_desc[256]; /* Initialisation */ gp = phrase_start[pp-512-MAX_BIP]; alts = gram[gp]; /* Debugging */ #ifdef DEBUG_PARSER if (debug_parser) { fprintf(outfile, "\n"); indent(depth, outfile); fprintf(outfile, "Phrase %s/%d (%d alternatives) = ", phrasename[pp-512], pp, alts); fflush(outfile); } #endif gp++; // gp now points to first element (length) of first alt saved_cp = cp; saved_ap = ap; for (i = 0; i < alts; i++) { /* Starting with the root phrase, recursively examine each alternative */ int each, phrases = gram[gp++], phrase_count, gap = 0; cp = saved_cp; ap = saved_ap; if (ap+3 > next_free_a) next_free_a = ap+3; // makespace(A, next_free_a, a_size); A[ap++] = pp; // record which phrase (could be done outside loop) A[ap++] = i; // and which alt. // Count slots needed. *Could* be precalculated and stored // in the grammar, either embedded (after the ALT) or as a // separate table for (each = 0; each < phrases; each++) if (gram[gp+each] >= 512) gap++; A[ap++] = gap; // Count of alts (gap) // ap+gap now points to the slot after the space required, which // is where the first subphrase will be stored. ap = ap+gap; // recursive subphrases are stored after this phrase. // ap is left updated if successful. // successfully parsed phrases are stored in A[saved_ap+3+n] if (saved_ap+3+gap > next_free_a) next_free_a = saved_ap+3+gap; // makespace(A, next_free_a, a_size); /* Debug */ // this loop is only for diagnostics #ifdef DEBUG_PARSER if (debug_parser) { char *saved_descp; fprintf(outfile, "\n"); indent(depth, outfile); fprintf(outfile, "Alternative %d: (%d phrases) ", i+1, phrases); saved_descp = saved_desc; *saved_descp = '\0'; for (each = 0; each < phrases; each++) { int phrase = gram[gp+each]; if (phrase < 256) { saved_descp += sprintf(saved_descp, " '%c'", phrase); } else if (phrase < 512) { saved_descp += sprintf(saved_descp, " \"%s\"/%d", keyword[phrase-256], phrase-256); } else if (phrase < 512+MAX_BIP) { saved_descp += sprintf(saved_descp, " {%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]); } else { saved_descp += sprintf(saved_descp, " <%s/%d>", phrasename[phrase-512], phrase); } } fprintf(outfile, "%s\n", saved_desc); fflush(outfile); } #endif match = TRUE; // stays true if all subphrases match phrase_count = 0; // only phrases which make it into the A record, // i.e. excluding literals and keywords for (each = 0; each < phrases; each++) { /* Within a single grammar rule (alternative), ensure that each subphrase is present */ int phrase = gram[gp+each]; /* Debug */ #ifdef DEBUG_PARSER if (debug_parser) { indent(depth, outfile); fprintf(outfile, "Input token stream = '%s' '%s' '%s' ...\n", (cp < nextfree ? c[cp].s : "EOF"), (cp+1 < nextfree ? c[cp+1].s : "EOF"), (cp+2 < nextfree ? c[cp+2].s : "EOF")); } #endif if (cp > bestparse) { static char s[128]; #ifdef DEBUG_PARSER if (phrase < 256) { sprintf(s, "'%c'", phrase); } else if (phrase < 512) { sprintf(s, "\"%s\"", keyword[phrase-256]); } else if (phrase < 512+MAX_BIP) { sprintf(s, "{%s}", phrasename[phrase-512]); } else { sprintf(s, "<%s>", phrasename[phrase-512]); } #endif looking_for = s; bestparse = cp; } #ifdef DEBUG_PARSER if (debug_parser) indent(depth, outfile); #endif if (phrase < 256) { /* Literal */ #ifdef DEBUG_PARSER if (debug_parser) fprintf(outfile, "'%c'", phrase); #endif if ((c[cp].t != TYPE_CHAR) || (c[cp].s[0] != phrase)) match = FALSE; else cp++; // Don't record literals } else if (phrase < 512) { /* Keyword */ #ifdef DEBUG_PARSER if (debug_parser) fprintf(outfile, "\"%s\"/%d", keyword[phrase-256], phrase-256); #endif if (strcmp(keyword[phrase-256], c[cp].s) != 0) match = FALSE; else cp++; // Don't record keywords } else if (phrase < 512+MAX_BIP) { /* Built-in phrase */ int where = ap; // next phrase to be parsed will be stored at current 'ap'. #ifdef DEBUG_PARSER if (debug_parser) fprintf(outfile, "{%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]); #endif if (c[cp].t != BIP[phrase-512]) match = FALSE; else { A[ap++] = phrase; A[ap++] = 1; A[ap++] = 1; A[ap++] = cp++; A[saved_ap+3+phrase_count++] = where; // Record BIP } } else { /* Recursive call to parser for a subphrase */ int where = ap; // next phrase to be parsed will be stored at current 'ap'. #ifdef DEBUG_PARSER if (debug_parser) fprintf(outfile, "<%s/%d>", phrasename[phrase-512], phrase); #endif if (!parse(phrase, depth+1)) match = FALSE; else { A[saved_ap+3+phrase_count++] = where; } } /* debug */ #ifdef DEBUG_PARSER if (debug_parser) { fprintf(outfile, "\n"); indent(depth, outfile); fprintf(outfile, "Tried alternative %d: %s - result was %s\n", each+1, saved_desc, (match ? "TRUE" : "FALSE")); fflush(outfile); } #endif if (!match) break; } gp += phrases; // move over all phrases, to next alt if (match) break; #ifdef DEBUG_PARSER else if (debug_parser) { indent(depth, outfile); fprintf(outfile, "** Alternative %d FAILED.\n", i+1); } #endif // gp now points to first element (length) of next alt, or start of next phrase } return(match); } static char debugstr[1024]; static char *debugs; static int paramoffset = 1; static int next_string_const = 1000; char *compile(int ap) { static char arg[128]; static int comparison_alt_cache; int saved_ap; int phrase; // A[ap] is the phrase number. A[ap+1] is the alt. int alt; // For consistency, in BIPs, the Alt should always be 1 // although that may not be the case at the moment :-( int phrases; // defined subphrases int i; int condno; // The following ecce command executed on this file will generate varcalc.g: // ecce -c "(v.//\\.s..(v/ /e)?m,k)0;%c" varcalc.c varcalc.g // May later tweak takeon.c to read from varcalc.c rather than varcalc.g // thus simplifying build process and becoming more like yacc. saved_ap = ap; phrase = A[ap++]; alt = A[ap++]; phrases = A[ap++]; #ifdef DEBUG fprintf(outfile, "compile(A[%d]) phrase=%s\n", saved_ap, phrasename[phrase-512]); #endif switch (phrase) { //\\ B<EOF> = 0; case P_EOF: fprintf(stderr, "EOF!\n"); dump(); exit(0); //\\ B<TAG> = 1; case P_TAG: return NULL; //\\ B<STRING> = 2; case P_STRING: //\\ B<CHARCONST> = 3; case P_CHARCONST: //\\ B<INT> = 5; case P_INT: //\\ B<COMMENT> = 8; case P_COMMENT: break; //\\ B<NEWLINE> = 9; case P_NEWLINE: //\\ P<SS> = <EOF>, <LABEL> <NEWLINE>, <INSTR> <NEWLINE>, <dotPROC> <NEWLINE>, <dotEND> <NEWLINE>, <dotWORD> <NEWLINE>, <dotCONST> <NEWLINE>, <dotARRAY> <NEWLINE>, <OPTCOMMENT> <NEWLINE>, <SYNTAXERROR>; case P_SS: if (alt == 0) { dump(); exit(0); // or would 'break' be cleaner here? } if (alt < 8) { int cx; #ifdef NEVER debugs = debugstr; debugs += sprintf(debugs, " SECTION .data\n"); debugs += sprintf(debugs, "line%d: db \"", lineno); hack = debugs; for (cx = 0; cx < cp-1; cx++) { if (c[cx].t == TYPE_CHARCONST) { if (*c[cx].s == '"') { debugs += sprintf(debugs, "'\", %d, \"", *c[cx].s); } else { debugs += sprintf(debugs, "'%s' ", c[cx].s); } } else { debugs += sprintf(debugs, "%s ", c[cx].s); } } fprintf(outfile, " ;; %s\n", hack); debugs += sprintf(debugs, "\", 10, 0\n"); debugs += sprintf(debugs, " SECTION .text\n"); debugs += sprintf(debugs, " pop eax\n"); debugs += sprintf(debugs, " push eax\n"); debugs += sprintf(debugs, " push eax\n"); debugs += sprintf(debugs, " push dword fmt_tos ; address of %%s format string\n"); debugs += sprintf(debugs, " call printf ; Call C function\n"); debugs += sprintf(debugs, " add esp, 8 ; pop 2 params\n"); debugs += sprintf(debugs, " push dword line%d ; address of string\n", lineno); debugs += sprintf(debugs, " push dword fmt_str ; address of %%s format string\n"); debugs += sprintf(debugs, " call printf ; Call C function\n"); debugs += sprintf(debugs, " add esp, 8 ; pop 2 params\n"); #else hack = debugs = debugstr; for (cx = 0; cx < cp-1; cx++) { if (c[cx].t == TYPE_CHARCONST) { if (*c[cx].s == '"') { debugs += sprintf(debugs, "'\", %d, \"", *c[cx].s); } else { debugs += sprintf(debugs, "'%s' ", c[cx].s); } } else { debugs += sprintf(debugs, "%s ", c[cx].s); } } #endif compile(A[ap]); } return NULL; //\\ P<SYNTAXERROR> = ; case P_SYNTAXERROR: fprintf(stderr, "Parse failed looking for %s\n", looking_for); return NULL; //\\ P<CALL> = "CALL" <TAG> ',' <INT> ',' <INT>; case P_CALL: if (strcmp(c[A[A[ap]+3]].s, "psym") == 0) { // special for 'print "xxx"' plant("text", "", "push", "dword fmt_chr", NULL, "address of format string"); plant("text", "", "call", "printf", NULL, "Call C function"); plant("text", "", "add", "esp", "8", "pop 2 params"); } else { plant("text", NULL, "call", c[A[A[ap]+3]].s, NULL, NULL); if (atoi(c[A[A[ap+1]+3]].s) != 0) { plant("text", "", "add", "esp", itoaf("%d", atoi(c[A[A[ap+1]+3]].s)*4), NULL); } if (atoi(c[A[A[ap+2]+3]].s) != 0) { plant("text", "", "push", "eax", NULL, NULL); } } return NULL; //\\ P<dotPROC> = <LABEL> '.' "PROC"; case P_dotPROC: paramoffset = 1; // somewhat hacky, used with .PARAM plant("text", c[A[A[A[ap]]+3]].s, "push", "ebp", NULL, NULL); plant("text", "", "mov", "ebp", "esp", NULL); return NULL; //\\ P<dotEND> = '.' "END"; case P_dotEND: plant("text", "", "mov", "esp", "ebp", NULL); plant("text", "", "pop", "ebp", NULL, NULL); // fprintf(outfile, " mov eax,0 ; normal, no error, return value\n"); // drop-through case in void procedure. // ... if it was a function with a result, this is wrong anyway... plant("text", "", "ret", "", NULL, NULL); dump(); exit(0); // might be nicer to find a cleaner way to break out of main parse loop... //\\ P<dotWORD> = <LABEL> '.' "WORD" <INT>; case P_dotWORD: #ifdef ORIG plant("bss",c[A[A[A[ap]]+3]].s, "resd", c[A[A[ap+1]+3]].s, NULL, NULL); #else if (atoi(c[A[A[ap+1]+3]].s) == 1) { plant("data",c[A[A[A[ap]]+3]].s, "dd", "$80808080", NULL, NULL); } else { plant("data",c[A[A[A[ap]]+3]].s, itoaf("times %d dd", atoi(c[A[A[ap+1]+3]].s)), "$80808080", NULL, NULL); } #endif return NULL; //\\ P<dotARRAY> = <LABEL> '.' "ARRAY"; case P_dotARRAY: plant("data", c[A[A[A[ap]]+3]].s, "", "", NULL, NULL); return NULL; //\\ P<dotCONST> = '.' "CONST" <INT>; case P_dotCONST: plant("data", "", "dd", c[A[A[ap]+3]].s, NULL, NULL); return NULL; //\\ P<EXIT> = "EXIT"; case P_EXIT: // does this require a RET first? End of program? //\\ P<RET> = "RET" <INT>; case P_RET: if (atoi(c[A[A[ap]+3]].s) != 0) { plant("text", "", "pop", "eax", NULL, NULL); } plant("text", "", "mov", "esp", "ebp", NULL); plant("text", "", "pop", "ebp", NULL, NULL); if ((phrase == P_EXIT) /* || (atoi(c[A[A[ap]+3]].s) == 0) */ ) { // Do we really need to return 0 status code from void function? Let's omit it and see... // OK... can omit for internal functions, but MAIN is called as an int function plant("text", "", "mov", "eax", "0", NULL); } plant("text", "", "ret", "", NULL, NULL); if (phrase == P_EXIT) { dump(); exit(0); } return NULL; //\\ P<INSTR> = <OPTLABEL> <ASSEM> <OPTCOMMENT>; case P_INSTR: compile(A[ap]); compile(A[ap+1]); return NULL; //\\ P<ASSEM> = <ADD>, <SUB>, <MUL>, <DIV>, <MOD>, <NEG>, <NOT>, <BAND>, <BOR>, <LSH>, <RSH>, //\\ <CALL>, <RET>, <PUSHSTR>, <PUSHLIT>, <PUSHIND>, <PUSHADDR>, <PUSHVAR>, <POPIND>, <POPVAR>, //\\ <PARAM>, <CMP>, <BT>, <BF>, <B>, <PRINT>, <EXIT>, <UNKNOWN>; case P_ASSEM: compile(A[ap]); return NULL; //\\ P<CMP> = <COND>; case P_CMP: condno = A[A[ap]+1]; plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "cmp", "ecx", "eax", NULL); #ifdef STUPID_COMP_SHORTCUT comparison_alt_cache = condno; #else // See http://faydoc.tripod.com/cpu/setge.htm for SETGE instruction as an alternative? #ifdef branching fprintf(outfile, " j"); compile(A[ap]); fprintf(outfile, " T_%d\n", nexttmp); fprintf(outfile, " push dword 1 ; false\n"); fprintf(outfile, " jmp T_%d\n", nexttmp+1); fprintf(outfile, "T_%d: push dword 0 ; true\n", nexttmp); fprintf(outfile, "T_%d:", nexttmp+1); nexttmp += 2; #else // fprintf(outfile, " set"); compile(A[ap]); fprintf(outfile, " al\n"); // fprintf(outfile, " xor ah,ah\n"); // fprintf(outfile, " push word 0\n"); // fprintf(outfile, " push ax"); plant("text", "", "mov", "eax", "0", NULL); // fprintf(outfile, " mov ah,0\n"); // fprintf(outfile, " cwde\n"); sprintf(arg, "set%s", cond[condno]); // <COND> plant("text", "", arg, "al", NULL, NULL); //fprintf(outfile, " set"); compile(A[ap]); fprintf(outfile, " al\n"); plant("text", "", "push", "eax", NULL, NULL); /* Rainer suggests the much improved: cmp <whatever> setl al movzx eax,al push eax */ #endif #endif return NULL; //\\ P<COND> = "CMPGE", "CMPLT", "CMPLE", "CMPGT", "CMPEQ", "CMPNE"; case P_COND: // fprintf(outfile, "%s", cond[alt]); handle a level up now return NULL; //\\ P<BF> = "BF" <TAG>; case P_BF: #ifdef STUPID_COMP_SHORTCUT plant("text", "", jcond[comparison_alt_cache ^ 1], c[A[A[ap]+3]].s, NULL, NULL); //fprintf(outfile, " j%s %s", cond[comparison_alt_cache ^ 1], c[A[A[ap]+3]].s); #else fprintf(outfile, " pop eax\n"); // fprintf(outfile, " cmp eax, 0\n"); fprintf(outfile, " or eax, eax\n"); fprintf(outfile, " je %s", c[A[A[ap]+3]].s); #endif return NULL; //\\ P<BT> = "BT" <TAG>; case P_BT: #ifdef STUPID_COMP_SHORTCUT plant("text", "", jcond[comparison_alt_cache], c[A[A[ap]+3]].s, NULL, NULL); // fprintf(outfile, " j%s %s", cond[comparison_alt_cache], c[A[A[ap]+3]].s); #else fprintf(outfile, " pop eax\n"); // fprintf(outfile, " cmp eax, 0\n"); fprintf(outfile, " or eax, eax\n"); fprintf(outfile, " jne %s", c[A[A[ap]+3]].s); #endif return NULL; //\\ P<B> = "B" <TAG>; case P_B: plant("text", "", "jmp", c[A[A[ap]+3]].s, NULL, NULL); return NULL; //\\ P<MUL> = "MUL"; case P_MUL: plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "imul", "ecx", NULL, "(edx:eax = eax*ecx)"); plant("text", "", "push", "eax", NULL, NULL); return NULL; //\\ P<DIV> = "DIV"; case P_DIV: plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "mov", "edx", "0", NULL); plant("text", "", "idiv", "ecx", NULL, "(div -> eax, remainder -> edx)"); plant("text", "", "push", "eax", NULL, NULL); return NULL; //\\ P<MOD> = "MOD"; case P_MOD: plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "mov", "edx", "0", NULL); plant("text", "", "idiv", "ecx", NULL, "(div -> eax, remainder -> edx)"); plant("text", "", "push", "edx", NULL, NULL); return NULL; //\\ P<ADD> = "ADD"; case P_ADD: plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "add", "ecx", "eax", NULL); plant("text", "", "push", "ecx", NULL, NULL); //fprintf(outfile, " pop eax\n"); //fprintf(outfile, " add [esp],eax\n"); return NULL; //\\ P<SUB> = "SUB"; case P_SUB: plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "sub", "ecx", "eax", NULL); plant("text", "", "push", "ecx", NULL, NULL); //fprintf(outfile, " pop eax\n"); //fprintf(outfile, " sub [esp],eax\n"); return NULL; // <NEG>, <NOT>, <LSH>, <RSH>, //\\ P<NEG> = "NEG"; case P_NEG: // fprintf(outfile, " pop eax\n"); // fprintf(outfile, " neg eax\n"); // fprintf(outfile, " push eax"); plant("text", "", "neg", "dword [esp]", NULL, NULL); return NULL; //\\ P<NOT> = "NOT"; case P_NOT: // fprintf(outfile, " pop eax\n"); // fprintf(outfile, " not eax\n"); // fprintf(outfile, " push eax"); plant("text", "", "not", "dword [esp]", NULL, NULL); return NULL; //\\ P<LSH> = "LSH"; case P_LSH: plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "shl", "eax", "cl", NULL); plant("text", "", "push", "eax", NULL, NULL); return NULL; //\\ P<RSH> = "RSH"; case P_RSH: plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "shr", "eax", "cl", NULL); plant("text", "", "push", "eax", NULL, NULL); return NULL; //\\ P<BAND> = "BAND", "LAND"; case P_BAND: // cheap hack for now plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "and", "eax", "ecx", NULL); plant("text", "", "push", "eax", NULL, NULL); return NULL; //\\ P<BOR> = "BOR", "LOR"; case P_BOR: // ditto plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "or", "eax", "ecx", NULL); plant("text", "", "push", "eax", NULL, NULL); return NULL; //\\ P<PRINT> = "PRINT"; case P_PRINT: plant("text", "", "push", "dword fmt_str", NULL, "address of %s format string"); plant("text", "", "call", "printf", NULL, "external call to C"); plant("text", "", "add", "esp", "8", NULL); return NULL; //\\ P<PUSHLIT> = "PUSH" '#' <NUM>; case P_PUSHLIT: // changed from OPD to NUM for the moment sprintf(arg, "dword %s", compile(A[ap])); plant("text", "", "push", arg, NULL, NULL); //fprintf(outfile, " push dword "); compile(A[ap]); return NULL; //\\ P<PUSHSTR> = "PUSHS" <STRING>; case P_PUSHSTR: //fprintf(outfile, " SECTION .data\n"); sprintf(arg, "\"%s\"", c[A[A[ap]+3]].s); // TO DO! String const needs escaping? plant("data", itoaf("string%d", ++next_string_const), "db", arg, "0", NULL); //fprintf(outfile, "string%d: db \"%s\", 0\n", ++next_string_const, c[A[A[ap]+3]].s); //fprintf(outfile, " SECTION .text\n"); plant("text", "", "push", itoaf("dword string%d", next_string_const), NULL, NULL); return NULL; //\\ P<PUSHIND> = "PUSHI" <NUM>; case P_PUSHIND: plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "push", "dword [ecx+eax*4]", NULL, NULL); return NULL; //\\ P<PUSHVAR> = "PUSH" <TAG>; case P_PUSHVAR: sprintf(arg, "dword [%s]", c[A[A[ap]+3]].s); plant("text", "", "push", arg, NULL, NULL); return NULL; //\\ P<PUSHADDR> = "PUSH" '&' <TAG>; case P_PUSHADDR: plant("text", "", "push", c[A[A[ap]+3]].s, NULL, NULL);// if this is a procedure param it is different from a static return NULL; //\\ P<POPIND> = "POPI" <NUM>; case P_POPIND: // fprintf(outfile, " pop ecx\n"); // fprintf(outfile, " pop dword [ecx]"); plant("text", "", "pop", "eax", NULL, NULL); plant("text", "", "pop", "ecx", NULL, NULL); plant("text", "", "pop", "edx", NULL, NULL); plant("text", "", "mov", "[ecx+eax*4]", "edx", NULL); return NULL; //\\ P<POPVAR> = "POP" <TAG>; case P_POPVAR: sprintf(arg, "dword [%s]", c[A[A[ap]+3]].s); plant("text", "", "pop", arg, NULL, NULL); return NULL; //\\ P<PARAM> = '.' "PARAM" <TAG>; case P_PARAM: sprintf(arg, "%s ebp+%0d", c[A[A[ap]+3]].s, 4*++paramoffset); plant("text", "", "%define", arg, NULL, NULL); //fprintf(outfile, "%%define %s ebp+%0d\n", c[A[A[ap]+3]].s, 4*++paramoffset); plant("text", "", "sub", "esp", "4", NULL); return NULL; //\\ P<UNKNOWN> = <TAG> <OPTOPD>; case P_UNKNOWN: fprintf(outfile, " %s; UNKNOWN OPCODE", c[A[A[ap]+3]].s); compile(A[ap+1]); return NULL; //\\ P<OPD> = <NUM>, <TAG>, ; case P_OPD: if (alt == 0) fprintf(outfile, "%s", compile(A[ap])); else if (alt == 1) fprintf(outfile, "%s", c[A[A[ap]+3]].s); return NULL; //\\ P<NUM> = <CHARCONST>, <INT>; case P_NUM: {static char last_num[128], *p; p = last_num; if (alt == 0) { p += sprintf(p, "'"); if (*c[A[A[ap]+3]].s == '\n') { p += sprintf(p, "\\n"); } else { p += sprintf(p, "%s", c[A[A[ap]+3]].s); } if (*c[A[A[ap]+3]].s == '\'') p += sprintf(p, "'"); p += sprintf(p, "'"); } else { p += sprintf(p, "%s", c[A[A[ap]+3]].s); } //fprintf(outfile, "%s", last_num); return last_num; } //\\ P<LABEL> = <TAG> ':'; case P_LABEL: plant("text", c[A[A[A[ap]]+3]].s, "", "", NULL, NULL); // now handled up a level except for OPTLABELs in front of regular code (ie jump dests) if (strcmp(c[A[A[A[ap]]+3]].s, "__start") == 0) { plant("text", "main", "push", "ebp", NULL, "the program label for the entry point"); plant("text", "", "mov", "ebp", "esp", "set up incoming call stack frame"); /* To make ecce work, we need some sort of initialisation like this: if (argc != 3) { fprintf (stderr, "syntax: %s infile outfile\n", argv[0]); exit (1); } if (strcmp (argv[1], argv[2]) == 0) { fprintf (stderr, "%s: output file cannot overwrite input file\n", argv[0]); exit (1); } infile = fopen (argv[1], "r"); if (infile == NULL) { fprintf (stderr, "%s: cannot read file '%s'\n", argv[0], argv[1]); exit (1); } outfile = fopen (argv[2], "w"); if (outfile == NULL) { fprintf (stderr, "%s: cannot write file '%s'\n", argv[0], argv[2]); exit (1); } Note that the unix calling convention has argc at [esp] and the args following. */ } return NULL; //\\ P<OPTLABEL> = <LABEL>, ; case P_OPTLABEL: if (alt == 0) compile(A[ap]); return NULL; //\\ P<OPTOPD> = <OPD>, ; case P_OPTOPD: if (alt == 0) compile(A[ap]); return NULL; //\\ P<OPTCOMMENT> = <COMMENT>, ; case P_OPTCOMMENT: //\\ E break; } } int main(int argc, char **argv) { char outname[256], *s; Progname = argv[0]; if ((argc >= 2) && (strcmp(argv[1], "-d") == 0)) { debug_parser = TRUE; argc -= 1; argv += 1; } if ((argc >= 2) && (strcmp(argv[1], "-O") == 0)) { opt_optimiser = TRUE; argc -= 1; argv += 1; } if (argc != 2) { fprintf(stderr, "syntax: %s [-d] file.asm\n", Progname); exit(1); } sprintf(outname, "%s", argv[1]); s = strrchr(outname, '.'); if (s == NULL) s = outname+strlen(outname); sprintf(s, "%s", ".S"); infile = fopen(argv[1], "r"); if (infile == NULL) { fprintf(stderr, "%s: cannot open input %s - %s\n", Progname, argv[1], strerror(errno)); exit(2); } outfile = fopen(outname, "w"); if (outfile == NULL) { fprintf(stderr, "%s: cannot open output %s - %s\n", Progname, outname, strerror(errno)); exit(3); } fprintf(outfile, "; Compile with: nasm -O1 -f elf %s\n", outname); plant("text", "", "global", "main", NULL, "the standard gcc entry point"); plant("text", "", "extern", "printf", NULL, NULL); plant("text", "", "extern", "getchar", NULL, NULL); plant("text", "", "extern", "putchar", NULL, NULL); plant("text", "", "extern", "fgetc", NULL, NULL); plant("text", "", "extern", "fputc", NULL, NULL); plant("text", "", "extern", "fflush", NULL, NULL); plant("text", "", "extern", "exit", NULL, NULL); plant("text", "", "extern", "stdin", NULL, NULL); plant("text", "", "extern", "stdout", NULL, NULL); plant("data", "fmt_str", "db", "\"%s\"", "0", NULL); plant("data", "fmt_chr", "db", "\"%c\"", "0", NULL); plant("data", "fmt_tos", "db", "\"TOS: %08x\"", "10, 0", NULL); for (;;) { nextfree = 0; arraysize = 0; next_free_a = 0; a_size = 0; cp = 0; // code pointer. Has to be global state. ap = 0; // Analysis record pointer. // parse on a line-by-line basis line_reconstruction(); #ifdef DEBUG_LEXER if (debug_lexer) { int i; // DEBUG ONLY fprintf(stderr, "\nLexical token stream:\n\n"); for (i = 0; i < nextfree; i++) { fprintf(stderr, "C[%d] => %s, line %d, col %d: [%0d] %s\n", i, c[i].f, c[i].l, c[i].col, c[i].t, c[i].s); } } #endif if (startparse(PHRASE_BASE)) { compile(0); } else { fprintf(stderr, "Can't\n"); exit(1); } } dump(); exit(0); return(0); }