// This is a 'toy' compiler for use in teaching an intro compilers class // However although it was originally intended to be a very cut-down language, // it ended up being quite a significant subset of C - certainly at least as // powerful as the original K&R C in the early days of Unix. // It currently lacks typedefs, enums, procedure variables and procedures as // parameters, and a few minor details such as hex and octal constants // The compiler generates an AST, which is a framework for the student to // use to add some optimisations, and from the AST it generates assembly code // for an imaginary stack machine. This simple stack code can be expanded // by a macro assembler into real instructions for a real machine such as // the x86 range. Adding a native code generator will be a student exercise. // The compiler is written in the subset of the language which it accepts. #include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <assert.h> #include <ctype.h> #ifndef FALSE #define FALSE (0!=0) #define TRUE (0==0) #endif #define DROP_SPECIAL_BITS ((1 << 14) - 1) #define OPTIONAL_PHRASE (1<<14) #define NEGATED_PHRASE (1<<15) static char prognames[256]; static char *progname; static char outname[256]; FILE *outfile; int debug = FALSE; /* Parser support */ #define EXTENDED_ERROR_REPORTING TRUE #define MAX_GRAMMAR 783 #define PHRASE_BASE 521 int gram[MAX_GRAMMAR] = { 2, 2, 587, 521, 1, 512, 8, 5, 256, 40, 542, 538, 41, 8, 535, 530, 585, 40, 527, 41, 525, 524, 6, 530, 585, 40, 527, 41, 525, 3, 585, 525, 524, 1, 518, 1, 519, 1, 520, 3, 40, 582, 41, 3, 8, 535, 530, 585, 40, 527, 41, 525, 524, 6, 530, 585, 40, 527, 41, 525, 3, 586, 525, 524, 3, 1, 514, 1, 515, 0, 4, 4, 91, 582, 93, 525, 3, 46, 585, 525, 4, 45, 62, 585, 525, 0, 2, 4, 256, 40, 542, 41, 6, 536, 530, 585, 40, 527, 41, 2, 2, 578, 528, 0, 2, 3, 44, 578, 528, 0, 2, 3, 535, 530, 522, 1, 523, 2, 1, 534, 0, 2, 2, 532, 523, 2, 533, 522, 5, 1, 514, 1, 515, 2, 536, 534, 1, 38, 1, 256, 11, 1, 514, 1, 515, 1, 43, 1, 45, 1, 33, 1, 126, 2, 536, 534, 1, 535, 1, 38, 1, 256, 0, 1, 4, 40, 542, 538, 41, 1, 2, 42, 536, 2, 1, 535, 0, 1, 2, 42, 538, 2, 1, 537, 0, 3, 1, 257, 1, 258, 0, 5, 1, 259, 1, 260, 1, 261, 1, 262, 0, 1, 1, 41, 12, 1, 263, 2, 264, 625, 4, 265, 40, 529, 41, 4, 543, 266, 266, 267, 3, 543, 266, 266, 3, 543, 266, 267, 2, 543, 266, 2, 543, 267, 3, 543, 268, 267, 2, 543, 268, 2, 543, 269, 1, 270, 3, 1, 271, 1, 272, 0, 1, 2, 531, 555, 1, 2, 544, 556, 1, 2, 545, 557, 1, 2, 546, 558, 1, 2, 547, 559, 1, 2, 548, 560, 1, 2, 549, 561, 1, 2, 550, 562, 1, 2, 551, 563, 1, 2, 552, 564, 1, 2, 553, 577, 2, 3, 565, 531, 555, 0, 2, 3, 566, 544, 556, 0, 2, 3, 567, 545, 557, 0, 2, 3, 568, 546, 558, 0, 2, 3, 569, 547, 559, 0, 2, 3, 570, 548, 560, 0, 2, 3, 572, 549, 561, 0, 2, 3, 573, 550, 562, 0, 2, 3, 575, 551, 563, 0, 2, 3, 576, 552, 564, 0, 3, 1, 42, 1, 47, 1, 37, 2, 1, 43, 1, 45, 2, 2, 60, 60, 2, 62, 62, 4, 2, 60, 61, 1, 60, 2, 62, 61, 1, 62, 2, 2, 61, 61, 2, 33, 61, 1, 2, 33339, 38, 1, 2, 38, 38, 1, 1, 94, 1, 2, 33342, 124, 1, 2, 124, 124, 1, 2, 38, 38, 1, 2, 124, 124, 2, 4, 63, 553, 58, 554, 0, 1, 2, 580, 554, 1, 2, 61, 61, 2, 4, 529, 33347, 581, 580, 0, 11, 1, 61, 2, 43, 61, 2, 45, 61, 2, 42, 61, 2, 47, 61, 2, 37, 61, 2, 38, 61, 2, 94, 61, 2, 124, 61, 3, 60, 60, 61, 3, 62, 62, 61, 1, 2, 578, 583, 2, 3, 44, 578, 583, 0, 1, 1, 582, 1, 1, 513, 1, 1, 513, 8, 3, 123, 593, 125, 1, 589, 1, 596, 1, 591, 1, 620, 1, 622, 1, 623, 1, 588, 1, 2, 63, 587, 3, 3, 585, 58, 587, 4, 273, 590, 58, 587, 3, 274, 58, 587, 1, 1, 582, 1, 2, 592, 59, 2, 1, 582, 0, 1, 2, 594, 619, 2, 1, 595, 0, 1, 2, 596, 594, 4, 1, 599, 1, 602, 1, 603, 1, 604, 3, 1, 599, 1, 602, 1, 604, 2, 2, 597, 598, 0, 1, 5, 264, 624, 123, 598, 125, 2, 5, 44, 538, 585, 612, 600, 0, 1, 5, 538, 585, 612, 600, 59, 1, 4, 540, 539, 542, 601, 1, 9, 540, 539, 542, 538, 585, 40, 613, 41, 605, 1, 8, 540, 539, 542, 538, 585, 606, 608, 59, 2, 1, 59, 3, 123, 593, 125, 1, 4, 91, 617, 93, 607, 2, 4, 91, 617, 93, 607, 0, 2, 4, 61, 123, 609, 125, 0, 1, 2, 611, 610, 3, 3, 44, 611, 610, 1, 44, 0, 2, 1, 590, 3, 123, 609, 125, 2, 2, 61, 578, 0, 2, 2, 263, 16925, 2, 615, 614, 2, 3, 44, 615, 614, 0, 1, 4, 542, 538, 585, 616, 2, 4, 91, 617, 93, 616, 0, 2, 1, 590, 0, 1, 2, 587, 619, 2, 2, 587, 619, 0, 2, 6, 275, 40, 582, 41, 587, 621, 7, 276, 40, 582, 41, 123, 593, 125, 2, 2, 277, 587, 0, 3, 5, 278, 40, 582, 41, 587, 7, 279, 587, 278, 40, 582, 41, 59, 9, 280, 40, 592, 59, 592, 59, 592, 41, 587, 4, 3, 281, 585, 59, 2, 282, 59, 2, 283, 59, 3, 284, 592, 59, 1, 1, 513, 1, 1, 513, }; #define MAX_KEYWORD 29 char *keyword[MAX_KEYWORD] = { // Keywords are based at 256 "sizeof", "const", "volatile", "auto", "register", "static", "extern", "void", "struct", "typeof", "long", "int", "short", "char", "FILE", "unsigned", "signed", "case", "default", "if", "switch", "else", "while", "do", "for", "goto", "continue", "break", "return", }; #define MAX_BIP 9 int BIP[MAX_BIP] = { // BIPs precede PHRASEs at 512 upwards 0, 1, 2, 3, 4, 5, 6, 7, 8, }; #define MAX_PHRASE 114 #if defined(DEBUG_PARSER) || defined(EXTENDED_ERROR_REPORTING) // FOR DEBUGGING ONLY char *phrasename[MAX_PHRASE] = { // Based at 512 upwards "EOD", "TAG", "PPP", "MMM", "CHAR", "KEYWORD", "integer-constant", "character-constant", "string-constant", "SS", "level14", "level14-lv", "opt-post-ppp-or-mmm", "opt-postfix", "function-call", "opt-argument-list", "comma-opt-argument-list", "lvalue", "opt-cast", "level13", "prefix-operator-lv", "opt-prefix-operator", "cast", "indirectthrough-seq", "opt-indirectthrough-seq", "pointerto-seq", "opt-pointerto-seq", "opt-qualifier", "opt-scope", "closer", "type", "signed", "level12", "level11", "level10", "level9", "level8", "level7", "level6", "level5", "level4", "level3", "level2", "opt-level12", "opt-level11", "opt-level10", "opt-level9", "opt-level8", "opt-level7", "opt-level6", "opt-level5", "opt-level4", "opt-level3", "level12-op", "level11-op", "level10-op", "level9-op", "level8-op", "level7-op", "ANDAND", "level6-op", "level5-op", "OROR", "level4-op", "level3-op", "opt-conditional", "level1", "equiv", "opt-assign-list", "assign-op", "expression", "opt-comma-level0-list", "rvalue", "identifier", "identifier-lv", "statement", "debug-statement", "labeled-statement", "constant-expression", "expression-statement", "opt-expression", "compound-statement", "opt-declaration-list", "declaration-list", "declaration", "struct-member-declaration", "struct-member-list", "struct-decl", "opt-rest-of-scalar-decl", "rest-of-scalar-decl", "scalar-decl", "proc-fn-decl", "array-decl", "forward-decl-or-actual-body", "array-bounds", "opt-array-bounds", "opt-array-init", "constant-initializer-list", "rest-of-constant-initializer-list", "constant-initializer", "opt-scalar-init", "opt-param-list", "opt-rest-of-param-list", "param", "opt-empty-array-bounds", "opt-constant-expression", "statement-list", "opt-statement-list", "selection-statement", "opt-else-statement", "iteration-statement", "jump-statement", "new-structname", "existing-structname", }; #endif /* DEBUG_PARSER */ int phrase_start[MAX_PHRASE-MAX_BIP] = { 0, 6, 43, 64, 70, 86, 99, 104, 110, 117, 121, 128, 140, 163, 169, 173, 177, 181, 185, 191, 201, 204, 246, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 302, 308, 314, 320, 326, 332, 338, 344, 350, 356, 363, 368, 375, 386, 393, 397, 401, 404, 408, 412, 416, 420, 427, 431, 435, 442, 477, 481, 487, 490, 493, 496, 515, 519, 533, 536, 540, 544, 548, 552, 556, 565, 572, 577, 584, 592, 599, 605, 616, 626, 633, 639, 646, 653, 657, 665, 672, 677, 684, 690, 696, 703, 707, 711, 716, 732, 737, 762, 777, 780, }; #define P_EOD 512 #define B_EOD 0 #define P_TAG 513 #define B_TAG 1 #define P_PPP 514 #define B_PPP 2 #define P_MMM 515 #define B_MMM 3 #define P_CHAR 516 #define B_CHAR 4 #define P_KEYWORD 517 #define B_KEYWORD 5 #define P_integer_constant 518 #define B_integer_constant 6 #define P_character_constant 519 #define B_character_constant 7 #define P_string_constant 520 #define B_string_constant 8 #define P_SS 521 #define P_level14 522 #define P_level14_lv 523 #define P_opt_post_ppp_or_mmm 524 #define P_opt_postfix 525 #define P_function_call 526 #define P_opt_argument_list 527 #define P_comma_opt_argument_list 528 #define P_lvalue 529 #define P_opt_cast 530 #define P_level13 531 #define P_prefix_operator_lv 532 #define P_opt_prefix_operator 533 #define P_cast 534 #define P_indirectthrough_seq 535 #define P_opt_indirectthrough_seq 536 #define P_pointerto_seq 537 #define P_opt_pointerto_seq 538 #define P_opt_qualifier 539 #define P_opt_scope 540 #define P_closer 541 #define P_type 542 #define P_signed 543 #define P_level12 544 #define P_level11 545 #define P_level10 546 #define P_level9 547 #define P_level8 548 #define P_level7 549 #define P_level6 550 #define P_level5 551 #define P_level4 552 #define P_level3 553 #define P_level2 554 #define P_opt_level12 555 #define P_opt_level11 556 #define P_opt_level10 557 #define P_opt_level9 558 #define P_opt_level8 559 #define P_opt_level7 560 #define P_opt_level6 561 #define P_opt_level5 562 #define P_opt_level4 563 #define P_opt_level3 564 #define P_level12_op 565 #define P_level11_op 566 #define P_level10_op 567 #define P_level9_op 568 #define P_level8_op 569 #define P_level7_op 570 #define P_ANDAND 571 #define P_level6_op 572 #define P_level5_op 573 #define P_OROR 574 #define P_level4_op 575 #define P_level3_op 576 #define P_opt_conditional 577 #define P_level1 578 #define P_equiv 579 #define P_opt_assign_list 580 #define P_assign_op 581 #define P_expression 582 #define P_opt_comma_level0_list 583 #define P_rvalue 584 #define P_identifier 585 #define P_identifier_lv 586 #define P_statement 587 #define P_debug_statement 588 #define P_labeled_statement 589 #define P_constant_expression 590 #define P_expression_statement 591 #define P_opt_expression 592 #define P_compound_statement 593 #define P_opt_declaration_list 594 #define P_declaration_list 595 #define P_declaration 596 #define P_struct_member_declaration 597 #define P_struct_member_list 598 #define P_struct_decl 599 #define P_opt_rest_of_scalar_decl 600 #define P_rest_of_scalar_decl 601 #define P_scalar_decl 602 #define P_proc_fn_decl 603 #define P_array_decl 604 #define P_forward_decl_or_actual_body 605 #define P_array_bounds 606 #define P_opt_array_bounds 607 #define P_opt_array_init 608 #define P_constant_initializer_list 609 #define P_rest_of_constant_initializer_list 610 #define P_constant_initializer 611 #define P_opt_scalar_init 612 #define P_opt_param_list 613 #define P_opt_rest_of_param_list 614 #define P_param 615 #define P_opt_empty_array_bounds 616 #define P_opt_constant_expression 617 #define P_statement_list 618 #define P_opt_statement_list 619 #define P_selection_statement 620 #define P_opt_else_statement 621 #define P_iteration_statement 622 #define P_jump_statement 623 #define P_new_structname 624 #define P_existing_structname 625 // GENERATED GRAMMAR FROM takeon.c and c.g // Built-in phrase codes. // B_TAG etc generated by takeon.c // These are implicit in the parser: #define TYPE_WHITESPACE (-1) // skipped internally by parser #define TYPE_EOF 0 #define TYPE_PPP 2 #define TYPE_MMM 3 #define TYPE_CHAR 4 // any naked untyped single character #define TYPE_KEYWORD 5 // implicit in the keyword model of the language, represented by "keywords" in the grammar int bestparse = -1; // for error reporting. char *looking_for = "<UNKNOWN>"; // 'while looking for <PHRASENAME>' (or literal) ... /* Data structures */ // We use a stringpool, and strings are indexes into this pool. This // is useful for the same reasons that the AST is an array indexed // by integers rather than a struct with pointers. It may also // save space by reusing common strings. And we get a free tag // to describe strings. (Also we can compare strings just with // an integer tag comparison, if we ever want to - which is fine // if the table is implemented by a hash table; it's somewhat dubious // when just a scanned linear list, though with modern CPUs it's // barely worth the programmer effort to do anything smarter) // not everything uses this yet. search for strdup etc to find potential problems #define MAXPOOL (16*1024) char stringpool[MAXPOOL]; int nextstring = 0; int str_to_pool(char *s) { int tag; for (tag = 0; tag <= nextstring; tag++) { if (strcmp(stringpool+tag, s) == 0) { return tag; /* found, one way or another */ } } return 0; /* NOT REACHED */ } /* c[] is the source character token stream */ // this struct can be replaced by parallel arrays for the purpose of // simplifying the language in which this compiler is written, in order // to bootstrap itself. struct sourceinfo { // ATOMS for processed input stream char *s; // string contents int l; // lineno int col; // column int t; // type - tag, "string", 'charconst', or char, so far char *f; // source or includefile name }; static struct sourceinfo c[128*1024]; int nextfree = 0, arraysize = 128*1024; char onecharstr[2*256]; /* A[] is the Analysis record. Contents point to a C[] structure when the item is a BIP, otherwise the format is [phraseno] [altno] [no-of-subphrases] [subphrases and/or BIPs...] for example, if A[25] contained 10, that would mean that the token for A[25] was stored in C[10]. (And the C string would be at &stringpool[c[A[25]]]?) */ static int A[2*1024*1024]; int next_free_a = 0, a_size = 2*1024*1024; /* variables used by line-reconstruction (lexer) */ FILE *sourcefile; char *curfile; int startline = TRUE, whitespace = TRUE, lineno = 1, col = 0, ch, peek; // TO DO: support for cpp should go here (#include, #define, #ifdef/#if etc) // - currently using gcc's cpp. (#line as passed through by cpp would be helpful) // "string1" "string2" - concatenate in the lexer? // To support source reconstruction, add an extra field to the c[] structs, // which is the exact white space that was skipped before that token. // hex constants (0x80). octal constants (077). long and long long literals (1L, 1LL) // Need to add token for "&&" char *escape(char *s, int t) { static char result[1024]; char *rslt; int quote; rslt = result; if ((t != B_character_constant) && (t != B_string_constant)) { rslt += sprintf(rslt, "%s", s); return result; } if (t == B_character_constant) quote = 39 /* '\'' */ ; if (t == B_string_constant) quote = '"'; rslt += sprintf(rslt, "%c", quote); for (;;) { int c = *s++; if (c == '\n') rslt += sprintf(rslt, "\\n"); else if (c == '\r') rslt += sprintf(rslt, "\\r"); else if (c == '\t') rslt += sprintf(rslt, "\\t"); else if ((c == 0) && (quote == 39 /* '\'' */ )) rslt += sprintf(rslt, "\\0"); else if (c == '\\') rslt += sprintf(rslt, "\\\\"); else if ((c == '"') && (quote == '"')) rslt += sprintf(rslt, "\\\""); else if ((c == 39) && (quote == 39)) rslt += sprintf(rslt, "\\'"); else rslt += sprintf(rslt, "%c", c); if ((c == 0) || (quote == 39 /* '\'' */ )) break; } sprintf(result+strlen(result), "%c", quote); return result; } void dump_source(void) { int l; int line = -1; for (l = 0; l < nextfree; l++) { if (c[l].t == TYPE_EOF) break; if (c[l].l != line) { fprintf(stdout, "\n"); line = c[l].l; } fprintf(stdout, "%s", escape(c[l].s, c[l].t)); } fprintf(stdout, "\n"); } void error_line(int target) { int l; fprintf(stderr, "Line %d: ", target); for (l = 0; l < nextfree; l++) { if (c[l].t == TYPE_EOF) break; if (c[l].l == target) { fprintf(stderr, "%s", escape(c[l].s, c[l].t)); } } fprintf(stderr, "\n"); } /* Support procs for storing lexical units */ void stores(char *s, int lineno, int col, int type, char *fname) { int tag; if (nextstring + strlen(s) + 1 >= MAXPOOL) exit(1); // TO DO: add message strcpy(stringpool+nextstring, s); /* Create a backstop for when not found */ tag = str_to_pool(s); if (tag == nextstring) nextstring += strlen(s)+1; /* Not found, add it */ c[nextfree].s = stringpool+tag; c[nextfree].l = lineno; c[nextfree].col = col; c[nextfree].f = fname; c[nextfree].t = type; //fprintf(stdout, "stores('%s')\n", c[nextfree].s); nextfree++; } void storec(int ch, int lineno, int col, int type, char *fname) { onecharstr[ch*2] = ch; onecharstr[ch*2+1] = '\0'; // convert char to 1-char string before saving. stores(&onecharstr[ch*2], lineno, col, type, fname); } int mkliteral(char *s) { // use in code generation but belongs in line reconstruction really - derived from 'stores' int tag; if (nextstring + strlen(s) + 1 >= MAXPOOL) exit(1); // TO DO: add message strcpy(stringpool+nextstring, s); // Create a backstop for when not found tag = str_to_pool(s); if (tag == nextstring) nextstring += strlen(s)+1; // Not found, add it c[nextfree].s = stringpool+tag; c[nextfree].l = -1; c[nextfree].col = -1; c[nextfree].f = ""; c[nextfree].t = B_integer_constant; // OOPS: 'mktag' comes through here too... //fprintf(stderr, "c[%d] = %s\n", nextfree, c[nextfree].s); return nextfree++; } /* simple proc to recognise if a token is a keyword. Could use a hash if runtimes warrant it. */ /* (takeon.c could generate a perfect hash; or we could use a trie as in my spelling utils) */ int iskeyword(char *s) { int i; for (i = 0; i < MAX_KEYWORD; i++) if (strcmp(s, keyword[i]) == 0) return TRUE; return FALSE; } /* Main procedures - the parser and the code generator (which embodies the grammar) */ /* Line reconstruction, which for this language equates to lexing */ /* A C compiler always collects characters into the longest possible tokens, even if the result is not valid C. White space always divides tokens. White space can be used to prevent misinterpretation of C source code (for example, x--y would be tokenized as the illegal x -- y [combining the two hyphens into a single token], while x - -y would be toeknized as the valid x - - y). White space must be used to separate an identifier, reserved word, integer constant, floating point constant from a following identifier, reserved word, integer constant, or floating point constant. Although ANSI C requires that comments be replaced by white space, many compilers don.t, which can lead to unwanted token merging. */ // -- from http://www.osdata.com/topic/language/c.htm // ... so there's my answer: john/**/smith is NOT the variable johnsmith, and // my line reconstruction below has to take account of that. // no word yet on whether a---b is legal (and equivalent to a-- - b) but // I'm pretty sure it is. static int xfgetc(FILE *f); static void xungetc(int c, FILE *f); char *wp; int wpstartcol; void whitedump(char *fname, int col) { int tag; *wp = '\0'; tag = str_to_pool(stringpool+nextstring); if (tag == nextstring) nextstring += strlen(stringpool+nextstring)+1; /* Not found, add it */ c[nextfree].s = stringpool+tag; c[nextfree].l = lineno; c[nextfree].col = wpstartcol; c[nextfree].f = fname; c[nextfree].t = TYPE_WHITESPACE; //fprintf(stdout, "wd('%s')\n", c[nextfree].s); nextfree++; wp = stringpool+nextstring; wpstartcol = col; } void line_reconstruction(void) { /* Pre-process input ready for parsing. Tokens are stored in array C[] */ wp = stringpool+nextstring; // add whitespace by *wp++ = ... wpstartcol = col; for (;;) { ch = xfgetc(sourcefile); if (ch == EOF) break; ch &= 255; // int, positive. peek = xfgetc(sourcefile); xungetc(peek, sourcefile); if (isalpha(ch) || (ch == '_')) { /* token or keyword */ int nextfree = 0, startcol = col; char token[4*1024]; whitespace = FALSE; for (;;) { // better to use freespace at end of // stringpool than 'token'??? if (isalpha(ch) || isdigit(ch) || (ch == '_')) { // digits and '_' allowed col++; token[nextfree++] = ch; } else { token[nextfree] = '\0'; xungetc(ch, sourcefile); break; } ch = xfgetc(sourcefile); } stores(token, lineno, startcol, iskeyword(token) ? TYPE_KEYWORD : B_TAG, curfile); //free(token); } else if (isdigit(ch)) { /* Number */ int nextfree = 0; char number[4*1024]; // again, use same trick as whitespace // Store as a string... whitespace = FALSE; for (;;) { if (isdigit(ch)) { col++; number[nextfree++] = ch; } else { number[nextfree] = '\0'; xungetc(ch, sourcefile); break; } ch = xfgetc(sourcefile); } stores(number, lineno, col, B_integer_constant, curfile); //free(number); } else switch (ch) { case '"': // Handle "cc" string constants case 39: // 39 /* '\'' */ : pcc has a bug and won't accept this literal! // Handle 'c' character constants /* literals */ { int nextfree = 0, quotech = ch; char string[4*1024]; // again, whitespace trick would be better whitespace = FALSE; col++; for (;;) { ch = xfgetc(sourcefile); // Newlines are allowed col++; if (ch == quotech) { string[nextfree] = '\0'; break; } else if (ch == '\\') { ch = xfgetc(sourcefile); col++; if (ch == '\\') { string[nextfree++] = ch; } else if (ch == 39 /* '\'' */ ) { string[nextfree++] = 39 /* '\'' */ ; } else if (ch == '"') { string[nextfree++] = '"'; } else if (ch == 'n') { string[nextfree++] = '\n'; } else if (ch == 'r') { string[nextfree++] = '\r'; } else if (ch == 't') { string[nextfree++] = '\t'; } else if (ch == '0') { string[nextfree++] = '\0'; } else { // Warn of unknown (to me) \x escape. Probably an error. string[nextfree++] = '\\'; string[nextfree++] = ch; fprintf(stderr, "? Warning: unknown escape \\%c at line %d\n", ch, lineno); } } else { string[nextfree++] = ch; } } stores(string, lineno, col, (quotech == 39 /* '\'' */ ? B_character_constant : B_string_constant), curfile); //free(string); } break; case '/': /* COMMENTS (or just a divide symbol) Note the ambiguity here: int a; int b; int c; int *d; a=b/c; *d=b/c; c=b/*d; */ wpstartcol = col; col++; whitespace = FALSE; if (peek == '/') { // Handle line comment TO DO: STORE WHITESPACE wp = stringpool+nextstring; // add whitespace by *wp++ = ... *wp++ = peek; do {*wp++ = (ch = xfgetc(sourcefile));} while (ch != '\n'); wp -= 1; // drop \n col = 0; whitespace = TRUE; whitedump(curfile, col); // dump at end of whitespace, *and* on end-of-line during whitespace lineno++; } else if (peek == '*') { /* Handle potential multi-line comment */ wp = stringpool+nextstring; // add whitespace by *wp++ = ... *wp++ = '/'; *wp++ = (ch = xfgetc(sourcefile)); // Now we have read '/*' for (;;) { // TO DO: STORE WHITESPACE col++; *wp++ = (ch = xfgetc(sourcefile)); peek = xfgetc(sourcefile); if (ch == '\n') { col = 0; lineno++; whitedump(curfile, col); // part of comment from next line goes into following token } if ((ch == '*') && (peek == '/')) break; xungetc(peek, sourcefile); } xungetc(peek, sourcefile); col += 2; *wp++ = '/'; (void)xfgetc(sourcefile); // Remove '/' peek = xfgetc(sourcefile); xungetc(peek, sourcefile); // QUESTION: How does this affect # directives? whitedump(curfile, col); // dump at end of whitespace, *and* on end-of-line during whitespace } else { storec(ch, lineno, col, TYPE_CHAR, curfile); } break; case '+': case '-': // ++ and -- are the only symbols that require disambiguation // at the lexer level in order to work with a grammar where // inter-token spaces have already been removed. // TO DO: Add '&&'. Currently hacked in the grammar. whitespace = FALSE; if (peek == ch) { // get second ch ch = xfgetc(sourcefile); peek = xfgetc(sourcefile); xungetc(peek, sourcefile); if (ch == '+') stores("++", lineno, col, TYPE_PPP, curfile); else stores("--", lineno, col, TYPE_MMM, curfile); col++; } else storec(ch, lineno, col, TYPE_CHAR, curfile); col++; break; // WHITESPACE TO DO: STORE WHITESPACE case '\n': lineno++; case '\r': startline = TRUE; col = 0; whitespace = TRUE; wp = stringpool+nextstring; // add whitespace by *wp++ = ... wpstartcol = col; // *wp++ = ch; drop \n whitedump(curfile, col); break; case '\t': case ' ': col++; // Does not affect whitespace (why not?) wp = stringpool+nextstring; // add whitespace by *wp++ = ... wpstartcol = col-1; *wp++ = ch; whitedump(curfile, col); break; default: whitespace = FALSE; storec(ch, lineno, col++, TYPE_CHAR, curfile); } } // set up a dummy at the end because we sometimes look ahead by 1 // in the parsing code and don't want to hit uninitialised data. stores("<EOF>", lineno, col, TYPE_EOF, curfile); nextfree--; } static char xline[1024]; // make off heap instead, shorten binary static int xi = 0, xcur_line = 1; static void xungetc(int c, FILE *f) { if (xi > 0) { xi -= 1; } else if (c == '\n') { xcur_line -= 1; xi = strlen(xline); } ungetc(c, f); } static int xfgetc(FILE *f) { static int last_line = -1; int c; int ch; c = fgetc(f); if (c == EOF) return EOF; ch = c&255; if (ch == '\n') { xline[xi] = '\0'; xi = 0; if (last_line != xcur_line) { // so long sibce I wrote this I've forgotten how it worked (if it did) and // am a bit dubious about a clash here from the new code I'm now adding to // handle whitespace... //strcpy(stringpool+nextstring, xline); //(void)make_binary_tuple(LINE, xcur_line, str_to_pool(xline)); last_line = xcur_line; } xcur_line += 1; } else xline[xi++] = ch; if (xi == 1023) xi = 1022; xline[xi] = '\0'; return c; } /* Parsing. (Table driven top-down recursive-descent parser) */ /* The parser used here is based on a design by Tony Brooker which was originally used in Atlas Autocode and the "Compiler Compiler". It generates a concrete syntax tree rather than the abstract syntax tree more popular in modern compilers. A later phase converts from concrete to abstract. (The concrete syntax tree is a reasonable tool to use for source to source manipulations, prettyprinting, etc etc; you *can* use it to generate code from directly, but it's far more awkward.) Note that the parsing procedure here is just a piece of code to walk a pre-built table. There is nothing in this section which reflects the grammar, if that is what you are looking for. You'll find the grammar embedded in the 'compile()' procedure in the following section. */ // It might be possible to have the parser transparently skip any whitespace c[] tokens // while parsing. Then with cooperation from the lexer we can actually store the white space // on a line-by-line basis, thus allowing full source reconstruction for error reporting, // code listings etc. Maybe just: while (c[cp].t == TYPE_WHITESPACE) cp += 1; int cp = 0; // code pointer. Has to be global state. int ap = 0; // Analysis record pointer. int parse(int pp, int depth) // depth is only for indentation in diags { /* Main parsing procedure. This is a table-driven parser, with the tables being generated from the grammar rules embedded in the 'compile' procedure below. The result of the parse is a tree structure, and the values of the nodes in the tree structure are used to drive a large 'case' statement which selects the actions to be performed after a successful parse. There is no grammatical structure embedded in this procedure. If you're looking for the grammar definition, see the procedure called 'compile' instead. */ int saved_cp; int saved_ap; int i; int gp; int alts; int match; // char saved_desc[256]; /* Initialisation */ gp = phrase_start[pp-512-MAX_BIP]; alts = gram[gp]; gp++; // gp now points to first element (length) of first alt saved_cp = cp; saved_ap = ap; for (i = 0; i < alts; i++) { /* Starting with the root phrase, recursively examine each alternative */ int each; int phrases = gram[gp++]; int phrase_count; int gap = 0; cp = saved_cp; ap = saved_ap; if (ap+3 > next_free_a) next_free_a = ap+3; A[ap++] = pp; // record which phrase (could be done outside loop) A[ap++] = i; // and which alt. // Count slots needed. *Could* be precalculated and stored // in the grammar, either embedded (after the ALT) or as a // separate table for (each = 0; each < phrases; each++) if (gram[gp+each] >= 512) gap++; A[ap++] = gap; // Count of alts (gap) // ap+gap now points to the slot after the space required, which // is where the first subphrase will be stored. ap = ap+gap; // recursive subphrases are stored after this phrase. // ap is left updated if successful. // successfully parsed phrases are stored in A[saved_ap+3+n] if (saved_ap+3+gap > next_free_a) next_free_a = saved_ap+3+gap; match = TRUE; // stays true if all subphrases match phrase_count = 0; // only phrases which make it into the A record, // i.e. excluding literals and keywords for (each = 0; each < phrases; each++) { /* Within a single grammar rule (alternative), ensure that each subphrase is present */ int phrase = gram[gp+each] & DROP_SPECIAL_BITS; int optional_phrase = gram[gp+each] & OPTIONAL_PHRASE; int negated_phrase = gram[gp+each] & NEGATED_PHRASE; while (c[cp].t == TYPE_WHITESPACE) cp += 1; // skip whitespace before each alt if (cp > bestparse) { static char s[128]; // off heap instead? - binary size // this is used in reporting syntax errors. // we get better diags if DEBUG_PARSER is true if (phrase < 256) { sprintf(s, "'%c'", phrase); } else if (phrase < 512) { sprintf(s, "\"%s\"", keyword[phrase-256]); } else if (phrase < 512+MAX_BIP) { //#ifdef EXTENDED_ERROR_REPORTING sprintf(s, "{%s}", phrasename[phrase-512]); //#else //sprintf(s, "{BIP %d}", phrase-512); //#endif } else { //#ifdef EXTENDED_ERROR_REPORTING sprintf(s, "<%s>", phrasename[phrase-512]); //#else //sprintf(s, "<Phrase %d>", phrase-512); //#endif } looking_for = s; bestparse = cp; } if (phrase < 256) { /* Literal */ if ((c[cp].t != TYPE_CHAR) || (c[cp].s[0] != phrase)) match = FALSE; else cp++; // Don't record literals } else if (phrase < 512) { /* Keyword */ if ((c[cp].t != TYPE_KEYWORD) || (strcmp(keyword[phrase-256], c[cp].s) != 0)) match = FALSE; else cp++; // Don't record keywords } else if (phrase < 512+MAX_BIP) { /* Built-in phrase */ int where = ap; // next phrase to be parsed will be stored at current 'ap'. // BUG: BIPS should also allow <?...> and <!...> as users won't distinguish BIPs from phrases if (c[cp].t != BIP[phrase-512]) { match = FALSE; } else { A[ap++] = phrase; A[ap++] = 1; A[ap++] = 1; A[ap++] = cp++; A[saved_ap+3+phrase_count++] = where; // Record BIP } } else { /* Recursive call to parser for a subphrase */ int where = ap; // next phrase to be parsed will be stored at current 'ap'. if (negated_phrase) { int here = cp; if (!parse(phrase, depth+1)) { A[saved_ap+3+phrase_count++] = -1; // was 'where' } else { match = FALSE; } cp = here; // negated phrase never consumes any input } else if (optional_phrase) { int input_stream_pos = cp; if (parse(phrase, depth+1)) { A[saved_ap+3+phrase_count++] = where; cp = input_stream_pos; // now re-parse the input stream with the following alts } else { match = FALSE; } } else { if (parse(phrase, depth+1)) { A[saved_ap+3+phrase_count++] = where; } else { match = FALSE; } } } if (!match) break; } gp += phrases; // move over all phrases, to next alt if (match) break; // gp now points to first element (length) of next alt, or start of next phrase } return(match); } /* The AST is an n-ary tree whose nodes and leaves are a polymorphic set of records. However rather than a complex data structure involving structs and unions, the actual storage of the AST is done in a single flat array of ints. It is up to the programmer to correlate virtual record fields with offsets into the subarrays which denote each AST cell. There were originally two fixed fields in every AST cell: [0] was the type code (eg AST_Mul) and [1] is a count of the number of integers that follow. Much later during development, when the data structure was already well established, I had to add a new field to almost all records, so I did this by having the AST cell creation functions add a new field *before* the [0]th item. So TYPEINFO(x) is held in AST[(x)-1]. At some later point I may correct this, but it was expedient and avoided the need to renumber every reference to an object in the AST array. */ /* I had a BFO this afternoon... sometimes in the ast building code I want to pass a lot of info back up to a parent node, but it's not structured info that readily fills an ast object. BFO: create a generic ast object of variable length just for passing data around. (Not done yet but I may need this when I get to more complex declarations) */ /* There is a table below (op[]) which holds information about each of the AST types. Each row is indexed by the AST type itself, so it has to be kept in synch with the chain of #defines immediately preceding it. The table could be split up into multiple simple arrays of atoms, rather than one array of structs, which would have made bootstrapping much easier, however from previous experience, keeping all of these parallel arrays in synch was so laborious that the saved effort was worth the cost of having to implement structs in the first iteration of the bootstrapped compiler. */ struct operation { int AST_Code; // switch code. Note, >= AST_BASE which is currently 1000. int Children; // Expected number of children. AST[ap+1] is the actual number, if they // differ. (For instance if we decided to code a*b*c as (AST_Times, 3, a, b, c) // rather than as two binary nodes) char *Diag_Name; // The AST_ name as a string for diags char *C_Name; // for use as label when drawing AST as a tree char *Stack_Name; // This is a mild hack for table-driven code generation. Use this opcode // (with no args, for now) as the generated code if present. // Otherwise go through the code-generation switch as normal. int Display_Children; // used in two places - primary use is now to determine what is a safe // branch to walk with a generic tree-walker (I.e. if allowed, the chilod // is a proper AST_ item; if not it may be a string or something else // secondary use is in automatic display of the AST as a tree - do not // display the child unless it is enabled here. // Least significant bit corresponds to rightmost child, so be careful. }; // would be an enum if we supported it. Maybe after bootstrapping. #define AST_BASE 1000 #define AST_Add 1000 #define AST_Sub 1001 #define AST_Mul 1002 #define AST_Div 1003 #define AST_Mod 1004 #define AST_AddAss 1005 #define AST_SubAss 1006 #define AST_MulAss 1007 #define AST_DivAss 1008 #define AST_ModAss 1009 #define AST_Post_Inc 1010 #define AST_Pre_Inc 1011 #define AST_Post_Dec 1012 #define AST_Pre_Dec 1013 #define AST_Idx 1014 #define AST_Member 1015 #define AST_Ptr 1016 #define AST_Call 1017 #define AST_Cond 1018 #define AST_Type 1019 #define AST_Return 1020 #define AST_ReturnResult 1021 #define AST_LE 1022 #define AST_GT 1023 #define AST_LT 1024 #define AST_GE 1025 #define AST_EQ 1026 #define AST_NE 1027 #define AST_IFTHENELSE 1028 #define AST_IFTHEN 1029 #define AST_UNeg 1030 #define AST_UPos 1031 #define AST_UBitNot 1032 #define AST_UBoolNot 1033 #define AST_BoolAnd 1034 #define AST_BoolOr 1035 #define AST_ShortcutBoolAnd 1036 #define AST_ShortcutBoolOr 1037 #define AST_BitAnd 1038 #define AST_BitOr 1039 #define AST_BitAndAss 1040 #define AST_BitOrAss 1041 #define AST_BitXor 1042 #define AST_BitXorAss 1043 #define AST_BitLsh 1044 #define AST_BitRsh 1045 #define AST_BitLshAss 1046 #define AST_BitRshAss 1047 #define AST_Const 1048 #define AST_Var 1049 #define AST_AssignTo 1050 #define AST_Declare 1051 #define AST_DefProc 1052 #define AST_DefParam 1053 #define AST_ReceiveParam 1054 #define AST_UseParam 1055 #define AST_AddressOf 1056 #define AST_IndirectThrough 1057 #define AST_SizeOf 1058 #define AST_TypeOf 1059 #define AST_C_ForLoop 1060 #define AST_C_While 1061 #define AST_C_DoWhile 1062 #define AST_C_Break 1063 #define AST_C_Continue 1064 #define AST_Cast 1065 #define AST_Label 1066 #define AST_Case 1067 #define AST_DefaultCase 1068 #define AST_Goto 1069 #define AST_Switch 1070 #define AST_Sourceline 1071 #define AST_LINENO 1072 #define AST_REDIRECT 1073 #define AST_SEQ 1074 #define AST_CommaSEQ 1075 #define AST_LINEAR_BLOB 1076 #define AST_TYPE_ArrayOf 1077 // child index lower upper #define AST_TYPE_Struct 1078 // child structtypename childname #define AST_TYPE_PointerTo 1079 // child #define AST_TYPE_Atom 1080 // NAME bytes signed #define AST_TYPE_StructMember 1081 // next_member this_element #define AST_SHOWTREE 1082 #define AST_TAG 1083 // only operand is integer index into 'c' array #define AST_Scope 1084 // <pointer-to-following-compound-statement> <previous-AST_Scope> <type> // Scope is not part of the AST tree. Scopes are created and deleted // on the fly, as you walk the AST, generating code etc. // A scope is merely a pointer to the current enclosing block, // and locating a declaration within that block is a simple // linear walk of the code within that block - stopping at the // 'current position' so as not to pick up declarations that // have not yet taken effect. // When walking parent blocks, do not enter sub-compound statements. #define AST_Dummy 1085 #define AST_TOP AST_Dummy // if you added any above, remember to also add to the table below // CURRENTLY THESE *MUST* BE IN THE SAME ORDER AS THE DEFINES ABOVE! // Unless noted otherwise, assume each cell is a simple 2-operand node. /* There is a *small* attempt at table-driving a code generator here, in the cases where an AST node maps directly to an operation, and its arguments (child nodes) can be recursively evaluated in left to right order. This should give the student a taste of what can be done using a table-driven code generator, however it is not taken any further as that is not the primary method to be used in this exercise. Including the stack-based opcodes here does slightly mix the separation between levels that we have been trying to acheive, but that's a fair lesson in itself as a fully clean separation is not always the best strategy. (For example, where do you convert array indexing operations into multiplies and adds? Do it too early and your front-end has to have knowlege of all the target architectures; do it too late and your back-end cannot take advantage of the generic arithmetic optimisations performed on the AST...) */ struct operation op[AST_TOP-AST_BASE+2] = { {AST_Add, 2, "AST_Add", "+", "ADD", 3}, {AST_Sub, 2, "AST_Sub", "-", "SUB", 3}, {AST_Mul, 2, "AST_Mul", "*", "MUL", 3}, {AST_Div, 2, "AST_Div", "/", "DIV", 3}, // Do we need a separate IDIV or will the type mechanism sort it out? {AST_Mod, 2, "AST_Mod", "%", "MOD", 3}, {AST_AddAss, 2, "AST_AddAss", "+=", "ADDI", 3}, {AST_SubAss, 2, "AST_SubAss", "-=", "SUBI", 3}, {AST_MulAss, 2, "AST_MulAss", "*=", "MULI", 3}, {AST_DivAss, 2, "AST_DivAss", "/=", "DIVI", 3}, {AST_ModAss, 2, "AST_ModAss", "%=", "MODI", 3}, {AST_Post_Inc, 1, "AST_Post_Inc", "()++", "", 1}, // TO DO {AST_Pre_Inc, 1, "AST_Pre_Inc", "++()", "", 1}, // TO DO {AST_Post_Dec, 1, "AST_Post_Dec", "()--", "", 1}, // TO DO {AST_Pre_Dec, 1 ,"AST_Pre_Dec", "--()", "", 1}, // TO DO {AST_Idx, 2, "AST_Idx", "[]", "INDEX 4", 3}, // May be subsumed in arithmetic ops before reaching code generator {AST_Member, 2, "AST_Member", ".", "ADDI", 3}, // May be subsumed in arithmetic ops before reaching code generator {AST_Ptr, 2, "AST_Ptr", "->", "ADD", 3}, // May be subsumed in arithmetic ops before reaching code generator {AST_Call, 3, "AST_Call", "()", "", 3<<1}, // "name", UseParam chain, ptr to proc definition {AST_Cond, 3, "AST_Cond", "( ? : )", "", 7}, {AST_Type, 1, "AST_Type", "simple type", "", 1}, // integer representing "int" etc {AST_Return, 1, "AST_Return", "return", "RET 0", 0}, {AST_ReturnResult, 2, "AST_ReturnResult", "return ...", "RET 1", 2}, {AST_LE, 2, "AST_LE", "<=", "CMPLE", 3}, {AST_GT, 2, "AST_GT", ">", "CMPGT", 3}, {AST_LT, 2, "AST_LT", "<", "CMPLT", 3}, {AST_GE, 2, "AST_GE", ">=", "CMPGE", 3}, {AST_EQ, 2, "AST_EQ", "==", "CMPEQ", 3}, {AST_NE, 2, "AST_NE", "!=", "CMPNE", 3}, {AST_IFTHENELSE, 3, "AST_IFTHENELSE", "if () ... else ...", "", 7}, {AST_IFTHEN, 2, "AST_IFTHEN", "if () ...", "", 3}, {AST_UNeg, 1, "AST_UNeg", "-()", "NEG", 1}, {AST_UPos, 1, "AST_UPos", "+()", "NOOP", 1}, {AST_UBitNot, 1, "AST_UBitNot", "~()", "NOT", 1}, {AST_UBoolNot, 1, "AST_UBoolNot", "!()", "BNOT", 1}, {AST_BoolAnd, 2, "AST_BoolAnd", "&&", "BAND", 3}, {AST_BoolOr, 2, "AST_BoolOr", "||", "BOR", 3}, {AST_ShortcutBoolAnd, 2, "AST_ShortcutBoolAnd", "&&", "", 3}, {AST_ShortcutBoolOr, 2, "AST_ShortcutBoolOr", "||", "", 3}, {AST_BitAnd, 2, "AST_BitAnd", "&", "AND", 3}, {AST_BitOr, 2, "AST_BitOr", "|", "OR", 3}, {AST_BitAndAss, 2, "AST_BitAndAss", "&=", "ANDI", 3}, {AST_BitOrAss, 2, "AST_BitOrAss", "|=", "ORI", 3}, {AST_BitXor, 2, "AST_BitXor", "^", "XOR", 3}, {AST_BitXorAss, 2, "AST_BitXorAss", "^=", "XORI", 3}, {AST_BitLsh, 2, "AST_BitLsh", "<<", "LSH", 3}, {AST_BitRsh, 2, "AST_BitRsh", ">>", "RSH", 3}, {AST_BitLshAss, 2, "AST_BitLshAss", "<<=", "LSHI", 3}, {AST_BitRshAss, 2, "AST_BitRshAss", ">>=", "RSHI", 3}, {AST_Const, 2, "AST_Const", "Const", "", 0},// 1st points to C[] item (whence can be found type), 2nd holds bin val for ints // rightchild so far undefined for strings. // - less than useful in summary diags. // proper print finds the argument. {AST_Var, 2, "AST_Var", "Var", "", 0}, // Var was from an earlier design before adding the current // type mechanism. It may soon be dropped. // 1st param is tag, // 2nd param is 'value of' (0) vs 'address of' (1). // Will need to also add a type param later {AST_AssignTo, 2, "AST_AssignTo", "=", "POPI", 3}, {AST_Declare, 3, "AST_Declare", "Decl", "", 7}, // name, type, init assignment {AST_DefProc, 4, "AST_DefProc", "DefProc", "", 7}, // DefProc: scope/qual/type/ptr-seq, id, params, body // recently changed defparam and receiveparam from mask=1 to mask=3 - watch for consequences... // and now from 3 to 7... {AST_DefParam, 3, "AST_DefParam", "DefParam", "", 7}, // param name (ident), param type, link to next DefParam {AST_ReceiveParam, 3, "AST_ReceiveParam", "ReceiveParam", "", 7}, // param name (ident), param type, link to next ReceiveParam {AST_UseParam, 2, "AST_UseParam", "Param", "", 3}, {AST_AddressOf, 1, "AST_AddressOf", "&", "", 1}, // may later modify this to add a const offset {AST_IndirectThrough, 1, "AST_IndirectThrough", "*", "PUSHI", 1}, {AST_SizeOf, 2, "AST_SizeOf", "sizeof", "", 0}, // object, class of object (lvalue, expr, type) {AST_TypeOf, 1, "AST_TypeOf", "typeof", "", 1}, // object (assumed always lvalue?) // 1st three extra fields in for loop are label IDs: exit_label, continue_label, branch_back_label // the continue label may not be the same as the branch back label, for do {} while loops which test // at the end. {AST_C_ForLoop, 4+4, "AST_C_ForLoop", "for (;;) ...", "", 15<<4}, {AST_C_While, 2+3, "AST_C_While", "while () ...", "", 3<<3}, {AST_C_DoWhile, 2+3, "AST_C_DoWhile", "do ... while ();", "", 3<<3}, {AST_C_Break, 1, "AST_C_Break", "break", "", 0}, {AST_C_Continue, 1, "AST_C_Continue", "continue", "", 0}, {AST_Cast, 2, "AST_Cast", "()...", "", 2}, {AST_Label, 2, "AST_Label", "lab:", "", 2}, {AST_Case, 3, "AST_Case", "case <n>:", "", 2<<1}, // leftchild = <n>, // rightchild = link to next case // last = internal number for label {AST_DefaultCase, 2, "AST_DefaultCase", "default:", "", 0}, // leftchild = link to next case // rightchild = internal number for label {AST_Goto, 1, "AST_Goto", "goto ...", "", 1}, {AST_Switch, 5, "AST_Switch", "switch (...)", "", 3<<3}, // leftchild = switch value, rightchild = compound statement // 1st extra field is for target of "break" statement, // partly TO DO: 2nd extra field is link to chain of case statements, // partly TO DO: 3rd extra field is default case {AST_Sourceline, 1, "AST_Sourceline", "src", "", 1}, {AST_LINENO, 1, "AST_LINENO", "Line #", "", 1}, {AST_REDIRECT, 1, "AST_REDIRECT", "REDIR", "", 1}, {AST_SEQ, 2, "AST_SEQ", ";", "", 3}, {AST_CommaSEQ, 2, "AST_CommaSEQ", ",", "", 3}, {AST_LINEAR_BLOB, 1, "AST_LINEAR_BLOB", "BLOB", "", 0}, {AST_TYPE_ArrayOf, 5, "AST_TYPE_ArrayOf", "[]", "", 3<<3}, // child index lower upper elsize {AST_TYPE_Struct, 3, "AST_TYPE_Struct", "{}", "", 2}, // StructName StructMemberList structsize {AST_TYPE_PointerTo, 1, "AST_TYPE_PointerTo", "->", "", 1}, // child {AST_TYPE_Atom, 2, "AST_TYPE_Atom", "atom", "", 0}, // NAME bytes signed {AST_TYPE_StructMember, 4, "AST_TYPE_StructMember", "stmemb", ".", 7<<1}, // next_member tag this_element_type offset #ifdef NEVER {AST_Typed_Object, 3, "AST_Typed_Object", "Obj", "", 5}, // This was intended for intermediate objects with the AST - // but it may go the way of the AST_Var type as well... // 1st param is tag, // 2nd param is 'value of' (0) vs 'address of' (1). // 3rd param is tree of AST_TYPE_* #endif {AST_SHOWTREE, 1, "AST_SHOWTREE", "tree", "tree", 1}, // Internal debugging hack {AST_TAG, 1, "AST_TAG", "tag", "tag", 0}, // All variables are treated as generic tags while building the AST. // They turn into proper variables once the AST is decorated with type information {AST_Scope, 3, "AST_Scope", "scope", "{ { } }", 2<<1}, // following code, parent AST_Scope, type (as in BLOCK_PROCFN) // can't recurse on parent block - infinite recursion // Scope is effectively a '{' ... '}' wrapper. we only care // to distinguish global scope, procdural scope, and syntactically // nested blocks, for the purpose of code generation. Other info // is present but probably won't be used. (eg if/while/do blocks) {AST_Dummy, 10, "AST_Dummy", "dummy", "dummy", 0}, // only used within grammar.c to pass partial results back up the tree {-1, 0, "ERROR IN op[] ARRAY!"} }; // http://citeseer.ist.psu.edu/cache/papers/cs/16951/ftp:zSzzSzftp.cso.uiuc.eduzSzpubzSzlangzSzsmalltalkzSzpaperszSzcompilerszSztree-based-opt.prelim.pdf/mcconnell92treebased.pdf static int nexttrip = 0; static int AST[256*1024]; #define leftchild(t) AST[(t)+2] #define rightchild(t) AST[(t)+3] #define nthchild(t,n) AST[(t)+2+(n)] #define RESERVE_FOR_TYPE_INFO 1 #define TYPEINFO(p) AST[(p)-1] #define astop(p) AST[p] #define children(p) AST[p+1] static int latest_line = -1; void semantic_error(char *s, char *p) { fprintf(stderr, s, p); fprintf(stderr, "Text cells used = %d, Code cells used = %d\n\n", nextfree, nexttrip); fflush(stderr); exit(2); } int mk(int AST_op, int count, int *T) { int trip; int parm; trip = nexttrip+RESERVE_FOR_TYPE_INFO; AST[trip] = AST_op; AST[trip+1] = count; if ((AST_op != AST_Dummy) && (count != op[AST_op-AST_BASE].Children)) fprintf(stderr, "? Internal warning: size mismatch in mk(%d, %d)\n", AST_op, count); for (parm = 0; parm < count; parm++) { AST[trip+parm+2] = T[parm+1]; } nexttrip += 2+count+RESERVE_FOR_TYPE_INFO; return trip; } // the next 3 wrappers are just for convenience. All could be replaced by calls to the above int mkmonop(int AST_op, int child) { int trip; trip = nexttrip+RESERVE_FOR_TYPE_INFO; AST[trip] = AST_op; AST[trip+1] = 1; if (1 != op[AST_op-AST_BASE].Children) fprintf(stderr, "? Internal warning: size mismatch in mkmonop(%d)\n", AST_op); AST[trip+2] = child; nexttrip += 3+RESERVE_FOR_TYPE_INFO; return trip; } int mkbinop(int AST_op, int leftchild, int rightchild) { int trip; trip = nexttrip+RESERVE_FOR_TYPE_INFO; AST[trip] = AST_op; AST[trip+1] = 2; if (2 != op[AST_op-AST_BASE].Children) fprintf(stderr, "? Internal warning: size mismatch in mkbinop(%d)\n", AST_op); AST[trip+2] = leftchild; AST[trip+3] = rightchild; nexttrip += 4+RESERVE_FOR_TYPE_INFO; return trip; } int mkterop(int AST_op, int leftchild, int rightchild, int extrachild) { int trip; trip = nexttrip+RESERVE_FOR_TYPE_INFO; AST[trip] = AST_op; AST[trip+1] = 3; if (3 != op[AST_op-AST_BASE].Children) fprintf(stderr, "? Internal warning: size mismatch in mkterop(%d)\n", AST_op); AST[trip+2] = leftchild; AST[trip+3] = rightchild; AST[trip+4] = extrachild; nexttrip += 5+RESERVE_FOR_TYPE_INFO; return trip; } int mkconst(int n) { char s[128]; int trip = nexttrip+RESERVE_FOR_TYPE_INFO; sprintf(s, "%d", n); AST[trip] = AST_Const; AST[trip+1] = 2; if (2 != op[AST_Const-AST_BASE].Children) fprintf(stderr, "? Internal warning: size mismatch in mkconst(%d)\n", n); AST[trip+2] = mkliteral(s); AST[trip+3] = n; nexttrip += 4+RESERVE_FOR_TYPE_INFO; return trip; } void replace_with_int_const(int p, int val) // AHA BUG FOUND. SOME replace's are not with constants! { AST[p] = AST_REDIRECT; // REDIRECT is a monop and therefore must always fit in the old slot. // preserving AST[p+1] allows us to skip these nodes safely by counting AST[p+2] = mkconst(val); // derived constant, literal is generated and stored in c[].s TYPEINFO(p) = -1; } void replace_with_trip(int p, int trip) // AHA BUG FOUND. SOME replace's are not with constants! { fprintf(stderr, "replace_with_trip: references to %d should be replaced by references to %d\n", p, trip); AST[p] = AST_REDIRECT; // REDIRECT is a monop and therefore must always fit in the old slot. // preserving AST[p+1] allows us to skip these nodes safely by counting AST[p+2] = trip; TYPEINFO(p) = -1; } int clone(int p) { int i = mk(AST[p], AST[p+1], AST+p+1); TYPEINFO(i) = TYPEINFO(p); return i; } int DefToReceive(int p) { int T[4]; // *copy* a chain of DefParams to a chain of ReceiveParams (don't just tweak the types in situ) if (p == -1) return -1; T[1] = leftchild(p);//name T[2] = rightchild(p);//type T[3] = DefToReceive(nthchild(p, 2));//link T[0] = mk(AST_ReceiveParam, 3, T); TYPEINFO(T[0]) = TYPEINFO(p); return T[0]; } int character_constant(int ap) { return mkbinop(AST_Const, A[ap+4], c[A[ap+4]].s[0]); } int integer_constant(int ap) { return mkbinop(AST_Const, A[ap+4], atoi(c[A[ap+4]].s)); } int string_constant(int ap) { return mkbinop(AST_Const, A[ap+4], A[ap+4]); } #ifdef DEBUG_DRAWTREE int subchild(int trip, int k) { // k = 1..displayable children int kids = op[AST[trip]-AST_BASE].Children; // eg 2 int mask = op[AST[trip]-AST_BASE].Display_Children; // eg 3 int bit = 1 << (kids-1); // 2,1,0 int index = 0; // 0..1 int child = 0;// 0..display children-1 0..1 k = k-1; // eg 3..0 0, 1 from external loop for (;;) { if (bit & mask) { // this is child #child if (child == k) return nthchild(trip, index); child += 1; } bit = bit >> 1; index = index + 1; } } int countbits(int n) { int tot = 0; while (n != 0) { if (n&1) tot += 1; n = n>>1; } return tot; } //#define nameof(p) op[AST[p]-AST_BASE].C_Name char *nameof(int t) { int opc,poolptr; char *result = stringpool+nextstring; /* Extract the name of a variable or the value of a const. */ if (t == -1) { sprintf(result, "(null)"); } else { opc = astop(t); if (opc == AST_Const) { sprintf(result, "%d", rightchild(t)); } else if (opc == AST_TAG) { // for simpler diagrams, skip a level sprintf(result, "%s", c[leftchild(t)].s); } else { if ((opc < AST_BASE) || (opc > AST_TOP)) { sprintf(result, "%s", "Bad!"); } else { sprintf(result, "%s", op[opc-AST_BASE].C_Name); } } } poolptr = str_to_pool(result); if (poolptr == nextstring) nextstring += strlen(result)+1; /* Not found, add it */ return(stringpool+poolptr); } #define display_leftchild(p) ((op[(p)-AST_BASE].Children >= 2) && \ (((2 << (op[(p)-AST_BASE].Children-2)) & op[(p)-AST_BASE].Display_Children) != 0)) #define display_rightchild(p) ((op[(p)-AST_BASE].Children >= 2) && \ (((1 << (op[(p)-AST_BASE].Children-2)) & op[(p)-AST_BASE].Display_Children) != 0)) #define display_children(p) countbits(op[(p)-AST_BASE].Display_Children) /* Ascii-art debugging procedure for drawing trees */ int del = 4/*was 1*/; /* distance of graph columns */ int eps = 3; /* distance of graph lines */ /* interface for drawing (can be replaced by "real" graphic using GD or other) */ void graphInit (void); void graphFinish(); void graphBox (char *s, int *w, int *h); void graphDrawBox (char *s, int c, int l); void graphDrawArrow (int c1, int l1, int c2, int l2); /* recursive drawing of the syntax tree */ void exNode (int trip, int c, int l, int *ce, int *cm, int depth, int *needed); /*****************************************************************************/ /* main entry point of the manipulation of the syntax tree */ /* draw_tree is taken from the yacc/lex demo by Thomas Niemann at http://epaperpress.com/lexandyacc/ I wrote an ascii tree-drawing package of my own, but it does not handle nodes with more than 2 children. My package uses diagonal links and Niemann's uses a rectilinear grid; when possible I use my own package as the diagonal tree looks more natural, falling back to the rectilinear layout only when necessary. They cannot be mixed within the one display. I'm in the process of adding a third option, which is to use the graphical layout language of DOT and generate images rather than ascii art, so that the images can be embedded in HTML output. */ void draw_tree_orig(int root); void dottree(int root) { char *operator; int leftkid, rightkid; int linkno = 0; operator = nameof(root); leftkid = leftchild(root); rightkid = rightchild(root); fprintf(stdout, "\"node%d\" [\n label = \"<f0> %d: %s ", root, root, operator); if (display_leftchild(astop(root))) fprintf(stdout, "| <f%0d> %d: ", ++linkno, root+1); if (display_rightchild(astop(root))) fprintf(stdout, "| <f%0d> %d: ", ++linkno, root+2); linkno = 0; fprintf(stdout, "\"\n shape = \"record\"\n];\n\n"); if (display_leftchild(astop(root))) dottree(leftkid); if (display_rightchild(astop(root))) dottree(rightkid); if (display_leftchild(astop(root))) fprintf(stdout, "\"node%0d\":f1 -> \"node%0d\":f0 [\n id = %d\n];\n\n", root, leftchild(root), 888); if (display_rightchild(astop(root))) fprintf(stdout, "\"node%0d\":f2 -> \"node%0d\":f0 [\n id = %d\n];\n\n", root, rightchild(root), 999); } void draw_tree(int trip) { int rte, rtm, needed; fprintf(stdout, "\nTree for AST[%d]:\n", trip); graphInit (); needed = FALSE; exNode (trip, 0, 0, &rte, &rtm, 0, &needed); if (needed) { graphFinish(); } else { draw_tree_orig(trip); } #ifdef DOT_SUPPORT fprintf(stdout, "digraph g {\n"); fprintf(stdout, "graph [\n"); fprintf(stdout, " rankdir = \"LR\"\n"); fprintf(stdout, "];\n\n"); fprintf(stdout, "node [\n"); fprintf(stdout, " fontsize = \"16\"\n"); fprintf(stdout, " shape = \"ellipse\"\n"); fprintf(stdout, "];\n\n"); fprintf(stdout, "edge [\n"); fprintf(stdout, "];\n\n"); dottree(trip); fprintf(stdout, "}\n"); #endif } /*c----cm---ce----> drawing of leaf-nodes l leaf-info */ /*c---------------cm--------------ce----> drawing of non-leaf-nodes l node-info * | * ------------- ...---- * | | | * v v v * child1 child2 ... child-n * che che che *cs cs cs cs * */ void indentsp(int d) { int i; for (i = 0; i < d*4; i++) { putchar(' '); } } void exNode ( int trip, int c, int l, /* start column and line of node */ int *ce, int *cm, /* resulting end column and mid of node */ int depth, int *needed ) { int opc; int w, h; /* node width and height */ char *s; /* node text */ int cbar; /* "real" start column of node (centred above subnodes) */ int k; /* child number */ int che, chm; /* end column and mid of children */ int cs; /* start column of children */ char word[40]; //indentsp(depth);fprintf(stdout, "start: TRIP=%d startcol=%d startline=%d\n", trip, c, l); if (trip == -1) return; opc = astop(trip); if (display_children(opc) >= 3) *needed = TRUE; s = nameof(trip); sprintf(word, "%s", s); s = word; //indentsp(depth);fprintf(stdout, "graphbox: s = %s\n", s); /* construct node text box */ graphBox (s, &w, &h); cbar = c; //indentsp(depth);fprintf(stdout, "assign: c=%d\n", c); *ce = c + w; *cm = c + w / 2; /* node is leaf */ if ( (opc == AST_Const || opc == AST_TAG || display_children(opc) == 0) ) { //indentsp(depth);fprintf(stdout, "drawbox: s = %s cbar=%d\n", s, cbar); graphDrawBox (s, cbar, l); return; } /* node has children */ cs = c; //indentsp(depth);fprintf(stdout, "node has %d children: cs=c=%d\n", display_children(opc), c); for (k = 1; k <= display_children(opc); k++) { //indentsp(depth);fprintf(stdout, "%d: exnode1 %d cs=%d\n", k, subchild(trip, k), cs); exNode (subchild(trip, k), cs, l+h+eps, &che, &chm, depth+1, needed); cs = che; } /* total node width */ if (w < che - c) { cbar += (che - c - w) / 2; *ce = che; *cm = (c + che) / 2; } /* draw node */ //indentsp(depth);fprintf(stdout, "cbar=%d\n", cbar); graphDrawBox (s, cbar, l); /* draw arrows (not optimal: children are drawn a second time) */ cs = c; for (k = 1; k <= display_children(opc); k++) { //indentsp(depth);fprintf(stdout, "%d: exnode2 %d cs=%d\n", k, subchild(trip, k), cs); exNode (subchild(trip, k), cs, l+h+eps, &che, &chm, depth+1, needed); graphDrawArrow (*cm, l+h, chm, l+h+eps-1); cs = che; } } /* interface for drawing */ #define lmax 2000 #define cmax 2000 char graph[lmax][cmax]; /* array for ASCII-Graphic */ void graphTest (int l, int c) { int ok; ok = 1; if (l < 0) ok = 0; if (l >= lmax) ok = 0; if (c < 0) ok = 0; if (c >= cmax) ok = 0; if (ok) return; printf ("\n+++error: l=%d, c=%d not in drawing rectangle 0, 0 ... %d, %d", l, c, lmax, cmax); // fprintf (stderr, "\n+++error: l=%d, c=%d not in drawing rectangle 0, 0 ... %d, %d", // l, c, lmax, cmax); { int i, j; int lmx=20, cmx=60; for (i = 0; i < lmx; i++) { for (j = cmx-1; j > 0 && graph[i][j] == ' '; j--); graph[i][cmx-1] = 0; if (j < cmx-1) graph[i][j+1] = 0; if (graph[i][j] == ' ') graph[i][j] = 0; } for (i = lmx-1; i > 0 && graph[i][0] == 0; i--); printf ("\n"); for (j = 0; j <= i; j++) printf ("\n // %s", graph[j]); printf("\n"); }; exit (1); } void graphInit (void) { int i, j; for (i = 0; i < lmax; i++) { for (j = 0; j < cmax; j++) { graph[i][j] = ' '; } } } void graphFinish() { int i, j; char *s; for (i = 0; i < lmax; i++) { for (j = cmax-1; j > 0 && graph[i][j] == ' '; j--); graph[i][cmax-1] = 0; if (j < cmax-1) graph[i][j+1] = 0; if (graph[i][j] == ' ') graph[i][j] = 0; } for (i = lmax-1; i > 0 && graph[i][0] == 0; i--); printf ("\n"); for (j = 0; j <= i; j++) { char *p; s = graph[j]; // hacks to slightly improve formatting if (j == 0) s += 2; else if (j == 1) s += 1; else if ((p=strchr(s, '|')) == NULL || p[1]=='|') s += 2; printf ("\n // %s", s); } printf("\n\n"); } void graphBox (char *s, int *w, int *h) { *w = strlen (s) + del; *h = 1; } void graphDrawBox (char *s, int c, int l) { int i; //fprintf(stdout, "c=%d strlen=%d del=%d\n", c, strlen(s), del); graphTest (l, c+strlen(s)-1+del); for (i = 0; i < strlen (s); i++) { graph[l][c+i+del] = s[i]; } } void graphDrawArrow (int c1, int l1, int c2, int l2) { int m; graphTest (l1, c1); graphTest (l2, c2); m = (l1 + l2) / 2; while (l1 != m) { graph[l1][c1] = '|'; if (l1 < l2) l1++; else l1--; } while (c1 != c2) { graph[l1][c1] = '-'; if (c1 < c2) c1++; else c1--; } while (l1 != l2) { graph[l1][c1] = '|'; if (l1 < l2) l1++; else l1--; } graph[l1][c1] = '|'; } /*-----------------------------------------------------------------------*/ /*-----------------------------------------------------------------------*/ /* See drawtree.c for test harness. Small mods made for this interface. */ /* NOTE!!! Now we have n-ary trees, this will not work. ifthenelse breaks */ static int tree_debug = (0!=0); static int vertical = (0==0); static int horizontal = (0==0); static int wide = (0==0); static int trim = (0==0); static int testone = (0==0); // row col long pic[256][256]; // 0..255 is char, >= 256 is ptr to string (poolptr index?) int oldtextblit(int row, int col, char *src) { // post-processing string expansion int l = 0; for (;;) { if (*src == '\0') break; if (tree_debug) fprintf(stderr, "1: Planting '%c' at [%d][%d]\n", *src, row, col); pic[row][col] = *src; col += 1; src += 1; l += 1; } return l; } int textblit(int row, int col, char *src) { // store pointer to string, unpack later on output int l = strlen(src), poolptr; strcpy(stringpool+nextstring, src); poolptr = str_to_pool(src); if (poolptr == nextstring) nextstring += strlen(src)+1; /* Not found, add it */ if (tree_debug) fprintf(stderr, "?: Planting string '%s' at [%d][%d]\n", src, row, col); pic[row][col] = 256+poolptr; return (l+(wide?3:1))>>(wide?2:1); // half size because on diagonal } void layout(int id, int idx, int rowoff, int coloff, int *depth, int *width) { char *operator; int opc; int leftkid, rightkid; int leftkiddepth = 0, leftkidwidth = 0; int rightkiddepth = 0, rightkidwidth = 0; int deltadepth = 0, deltawidth = 0; int i; if (tree_debug) fprintf(stderr, ">> %d:layout(%d, rowcol[%d][%d], depth %d, width %d);\n", id, idx, rowoff, coloff, *depth, *width); if (idx == -1) return; // was NOOP, now (null) operator = nameof(idx); leftkid = leftchild(idx); rightkid = rightchild(idx); // Anchor the corner node. { static char temp[128]; sprintf(temp, "%s", operator); (void)textblit(rowoff, coloff, temp); /* not strcpy - don't copy NUL */ } deltawidth = 1; if (display_rightchild(astop(idx))) { int len = ((strlen(nameof(leftkid))+(wide?3:1))>>(wide?2:1))+1; // text on the diagonal while (len-- > 1) { deltawidth += 1; pic[rowoff][coloff-1+deltawidth] = (vertical ? '\\' : '-'); if (tree_debug) fprintf(stderr, "2: Planting '%c' at [%d][%d]\n", pic[rowoff][coloff-1+deltawidth], rowoff, coloff-1+deltawidth); } // attach the RHS tree if (tree_debug) fprintf(stderr, "Recursing to right node %d\n", rightkid); layout(2*id, rightkid, rowoff, coloff+deltawidth, &rightkiddepth, &rightkidwidth); deltadepth = rightkiddepth; } else { deltadepth = 1; /* The op itself */ } // testing: correcting a typo if (((strlen(operator)+(wide?3:1))>>(wide?2:1)) >= deltawidth) deltawidth = ((strlen(operator)+(wide?3:1))>>(wide?2:1))+2; if (display_leftchild(astop(idx))) { // draw the down link // calculate extra height if ((((strlen(nameof(leftkid))+(wide?3:1))>>(wide?2:1))) > deltadepth) { deltadepth = ((strlen(nameof(leftkid))+(wide?3:1))>>(wide?2:1)); } for (i = 1; i < deltadepth+1 /* +1 for spacer row */; i++) { pic[rowoff+i][coloff] = (horizontal ? '/' : '|'); if (tree_debug) fprintf(stderr, "2: Planting '%c' at [%d][%d]\n", pic[rowoff+i][coloff], rowoff+i, coloff); } // recurse on the LHS tree if (tree_debug) fprintf(stderr, "Recursing to left node %d\n", leftkid); layout(2*id+1, leftkid, rowoff+deltadepth+1, coloff, &leftkiddepth, &leftkidwidth); *depth = (*depth) + leftkiddepth + deltadepth + 1; } else *depth = (*depth) + deltadepth; if (rightkidwidth+deltawidth > leftkidwidth) { *width = (rightkidwidth+deltawidth); } else { *width = leftkidwidth; } if (tree_debug) fprintf(stderr, "<< %d:layout(%d, rowcol[%d][%d], depth %d, width %d);\n", id, idx, rowoff, coloff, *depth, *width); } void draw_tree_orig(int root) { int depth = 0, width = 0, row, col, offset, trimmable; fprintf(stdout, "\n"); // Init. for (col = 0; col < 256; col++) { for (row = 0; row < 256; row++) { pic[row][col] = ' '; } } /* Generate layout */ layout(1, root, 128, 0, &depth, &width); if (tree_debug) fprintf(stderr, "Dump layout: rows = %d cols = %d\n", depth, width); if (tree_debug) fflush(stderr); if (vertical) { /* apply vertical shear first */ offset = 1; for (col = 1; col < 256; col++) { // move this column down by 'offset' for (row = 255; row > offset; row--) { pic[row][col] = pic[row-offset][col]; pic[row-offset][col] = ' '; } offset += 1; } } if (horizontal) { /* apply horizontal shear next */ row = 255; // start at bottom of drawing offset = 0; for (;;) { static long temp[1024]; for (col = 0; col < 256; col++) { temp[col] = ' '; } for (col = 0; col < 256; col++) { temp[col*2+offset] = pic[row][col]; temp[col*2+offset+1] = ' '; } for (col = 0; col < 256; col++) { pic[row][col] = temp[col]; } if (row == 0) break; offset += 1; /* more shear on next row up */ row -= 1; } } if (trim) { trimmable = (0==0); for (;;) { for (row = 0; row < 256; row++) { if (pic[row][0] != ' ') { trimmable = (0!=0); break; } } if (!trimmable) break; for (row = 0; row < 256; row++) { for (col = 0; col+1 < 256; col++) { pic[row][col] = pic[row][col+1]; } pic[row][255] = ' '; } } } if (wide) { /* apply widening last */ row = 255; // start at bottom of drawing offset = 0; for (;;) { static long temp[1024]; for (col = 0; col < 256; col++) { temp[col] = ' '; } for (col = 0; col < 256; col++) { temp[col*2+offset] = pic[row][col]; temp[col*2+offset+1] = ' '; } for (col = 0; col < 256; col++) { pic[row][col] = temp[col]; } if (row == 0) break; row -= 1; } } /* display tree */ for (row = 0; row < 256; row++) { trimmable = (0 == 0); for (col = 0; col < 256; col++) { if (pic[row][col] != ' ') { trimmable = (0!=0); break; } } if (!trimmable) { fprintf(stdout, " "); // INDENT for (col = 255; col >= 0; col--) { if (pic[row][col] != ' ') break; pic[row][col] = '\0'; } printf(" // "); for (col = 0; col < 256; col++) { if ((pic[row][col] < -128) || (pic[row][col] > 255)) { //fprintf(stderr, "Inserting \"%s\"\n", &stringpool[pic[row][col]-256]); // col+1 was a hacky bugfix for losing first char of string oldtextblit(row, col+1, &stringpool[pic[row][col]-256]); // strings painted here... } else { if (pic[row][col] == '\0') break; putchar(pic[row][col]); } } putchar('\n'); } } putchar('\n'); fflush(stdout); return; } #else void draw_tree_orig(int root) { } #endif // may later need to add base, offset (or parent) to each of these? See how it goes... int mktag(char *tag) { return mkmonop(AST_TAG, mkliteral(tag)); } int TypeArrayOf(int child, int index, int lower, int upper) { int T[6]; T[1] = child; T[2] = index; T[3] = 0; // always 0, in C T[4] = upper; T[5] = -1; // array element size (includes alignment padding) return mk(AST_TYPE_ArrayOf, 5, T); } int TypeStructMember(int NextMember, int MemberNameTag, int ThisMemberType) { int T[5]; T[1] = NextMember; T[2] = MemberNameTag; T[3] = ThisMemberType; T[4] = -1; // offset of this member relative to start of struct return mk(AST_TYPE_StructMember, 4, T); } int TypeStruct(int StructNameTag, int StructMemberList) { int T[4]; T[1] = StructNameTag; T[2] = StructMemberList; T[3] = -1; // size of struct object (includes alignment padding) return mk(AST_TYPE_Struct, 3, T); } int get_member_offset_inner(int Struct, int MemberTag, char *filename, int lineno) #define get_member_offset(s, m) get_member_offset_inner(s,m,__FILE__,__LINE__) { // Too messy. Surely should only be one type of parameter? if ((astop(Struct) == AST_AddressOf) && (astop(leftchild(Struct)) == AST_TYPE_Struct)) { // if tag is good, go for it Struct = leftchild(Struct); } else if ((astop(Struct) == AST_AddressOf) && (astop(leftchild(Struct)) == AST_TAG)) { if (astop(TYPEINFO(leftchild(Struct))) == -1) { fprintf(stdout, "! %s, Line %d: get_member_offset: typeinfo (@%d)\n", filename, lineno, TYPEINFO(leftchild(Struct))); exit(1); } else if (astop(TYPEINFO(leftchild(Struct))) != AST_TYPE_Struct) { fprintf(stdout, "! %s, Line %d: get_member_offset: typeinfo (@%d) is %d\n", filename, lineno, TYPEINFO(leftchild(Struct)), astop(TYPEINFO(leftchild(Struct)))); exit(1); } Struct = TYPEINFO(leftchild(Struct)); } else if (astop(Struct) == AST_TYPE_Struct) { // good struct, use it } else { fprintf(stdout, "! %s, Line %d: get_member_offset: Struct type (@%d) is %d\n", filename, lineno, Struct, astop(Struct)); if (astop(Struct) == AST_AddressOf) { fprintf(stdout, " Points to a %d (@%d)\n", astop(leftchild(Struct)), leftchild(Struct)); } exit(1); } if (astop(MemberTag) != AST_TAG) { fprintf(stdout, "! %s, Line %d: get_member_offset: tag field (@%d) contains %d\n", filename, lineno, MemberTag, astop(MemberTag)); exit(1); } assert(astop(Struct) == AST_TYPE_Struct); {int member = rightchild(Struct); for (;;) { assert(astop(member) == AST_TYPE_StructMember); //fprintf(stderr, "comparing %s to %s\n", c[leftchild(MemberTag)].s, c[leftchild(rightchild(member))].s); if (c[leftchild(MemberTag)].s == c[leftchild(rightchild(member))].s) { int offset = nthchild(member, 3); //fprintf(stderr, "returning offset %d\n", offset); return offset; } member = leftchild(member); if (member == -1) break; } } fprintf(stderr, "! %s, Line %d: get_member_offset - no result\n", filename, lineno); exit(1); /* NOT REACHED */ return -1; } int TypePointerTo(int child) { return mkmonop(AST_TYPE_PointerTo, child); } int TypeAtom(char *signedness, char *size) // eg TypeAtom("signed" "long") { int T[4]; T[1] = 0; if (strcmp(size, "int") == 0) { T[2] = 4; } else if (strcmp(size, "long") == 0) { T[2] = 4; } else if (strcmp(size, "long long") == 0) { T[2] = 8; } else if (strcmp(size, "short") == 0) { T[2] = 2; } else if (strcmp(size, "char") == 0) { T[2] = 1; } else { T[2] = 0; } return mk(AST_TYPE_Atom, 2, T); } int ConstructType(char *what, int scope, int qualifier, int type, int pointers, int arrays) { int result; result = type; if ((astop(type) != AST_TYPE_Atom) && (astop(type) != AST_TYPE_Struct)) { // should probably apply scope/qual here? fprintf(stderr, "! Warning (Internal): basic type = %d\n", astop(type)); } if ((pointers == -1) && (arrays == -1)) return result; // FOR NOW. TO DO! if (pointers != -1) { int chain = pointers; int chainlen = 0; for (;;) { assert(astop(chain) == AST_TYPE_PointerTo); if (leftchild(chain) == -1) break; chain = leftchild(chain); chainlen += 1; if (chainlen == 5) exit(1); // arbitrary limit used when debugging a loop. remove later. } leftchild(chain) = result; result = pointers; } if (arrays != -1) { int chain = arrays; int chainlen = 0; for (;;) { assert(astop(chain) == AST_TYPE_ArrayOf); if (leftchild(chain) == -1) break; chain = leftchild(chain); chainlen += 1; if (chainlen == 5) exit(1); } leftchild(chain) = result; result = arrays; } return result; } int current_scope = -1; // -1 is global scope // given that C has four scopes: function, file, block, and function prototype - it's // quite probably that this scheme is insufficient for true C. Seems to work for my // cut down version though. #define BLOCK_EXTERNALS 0 #define BLOCK_PROCFN 1 #define BLOCK_COMPOUND 2 #define BLOCK_LOOP 3 #define BLOCK_IF 4 #define BLOCK_CASE 5 void push_scope(int here) // 'here' is an AST_Scope(block, parent block, type) { current_scope = here; } void pop_scope(void) { current_scope = rightchild(current_scope); } // build a set of links back through each AST_Scope void build_block_scope(int p, int parent_AST_Scope, int this_blocks_AST_Scope) { int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return; if (astop(p) == AST_Scope) { // leftchild = this (following) code block // rightchild = AST_Scope of parent block - plugged here... // 1st extra field is for type of block rightchild(p) = this_blocks_AST_Scope; build_block_scope(leftchild(p), this_blocks_AST_Scope /* is now the parent */, p /* is now this_block's AST_Scope */); } else { // recursively search for more compound-statement blocks (really AST_Scopes) throughout the code: disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { build_block_scope(nthchild(p, i), parent_AST_Scope, this_blocks_AST_Scope); } this = this>>1; } } } // TO DO: Currently labels assume a flat namespace. Need to make them obey scoping rules. // Only one occurence of any one label per block? Or per procedure? - no best solution is // to replace user labels with generated labels. // ALSO... *BIG* todo ... C has several name spaces for different kinds of objects - // variables, labels, typedefs, struct names, etc. Need to get definitive list, and // provide interfaces to look up each individually. // struct, union & enum share a namespace. (http://web.torek.net/torek/c/types2.html) int locate_tag(int p, int tag) { int t; int disp; int this; int i; // The place where this currently falls down is where we are compiling inside this block // but have not yet reached the relevant declaration - we'll get the one in this block // when in fact we *should* get the one from the enclosing block, eg: // { // int i = 3; // { // printf("i = %d\n", i); // int i = 10; // // blah blah blah // } // } // .. this will print i = 10 (or possibly undefined or random value) as it will // find the second i within the current scope :-( // Hack solution: when treewalking, stop when you get to the current ap. if (astop(p) == AST_Declare) { if (c[leftchild(leftchild(p))].s == c[tag].s) { return rightchild(p); } } else if (astop(p) == AST_DefProc) { // DefProc: scope/qual/type/ptr-seq, id, params, body if (c[leftchild(rightchild(p))].s == c[tag].s) { return p; } } else if (astop(p) == AST_ReceiveParam) { // name type link if (c[leftchild(leftchild(p))].s == c[tag].s) { // just like Declare return rightchild(p); } // now recurse on rest of paramlist } else if (astop(p) == AST_TYPE_Struct) { if (rightchild(p) == -1) return -1; // recursion killer :-) This is another incomplete subfield reference. if (c[leftchild(leftchild(p))].s == c[tag].s) { return p; } } else if (astop(p) == AST_Scope) { return -1; // AST_Scope is effectively the start of a sub-block that we want to skip. } // recursively search for more declarations within this block disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { //fprintf(stderr, "%d: nthchild(%d,%d)\n", nthchild(p,i), p, i); if ((astop(p) != AST_TYPE_PointerTo) && (astop(nthchild(p, i)) != AST_TYPE_StructMember)) {t = locate_tag(nthchild(p, i), tag); if (t != -1) return t;} // See above } this = this>>1; } return -1; // NOT REACHED } // looks up variable declns *and* struct type declarations - returns AST_TYPE_* // (parent is only used for diagnostics) int lookup_identifier(int tag, int parent) { // search in current block first, if not found, start going upwards through enclosing blocks. int loops = 0; int t; int scope = current_scope; int current_block = leftchild(scope); for (;;) { loops += 1; if (loops == 10) { fprintf(stderr, "! Implementation error: max scopes = 10 for now. Possibly a bad data structure.\n"); exit(1); } t = locate_tag(current_block, tag); // finds declns or struct types if (t != -1) return t; if (scope == rightchild(scope)) break; scope = rightchild(scope); if (scope == -1) break; current_block = leftchild(scope); // expand search to parent } error_line(c[tag].l); fprintf(stderr, "\"%s\", Line %d: Error: %s not declared\n", c[tag].f, c[tag].l, c[tag].s); exit(1); /* NOT REACHED */ return -1; } int get_member_type(int struct_type, int fieldname_tag) { int field; assert(astop(struct_type) == AST_TYPE_Struct); assert(rightchild(struct_type) != -1); assert(astop(fieldname_tag) == AST_TAG); // now find the field in the struct, and return an ast cell describing that field field = rightchild(struct_type); for (;;) { if (field == -1) break; if (c[leftchild(rightchild(field))].s == c[leftchild(fieldname_tag)].s) { // no strcmp return nthchild(field, 2); } field = leftchild(field); // next_member tag this_element_type } fprintf(stderr, "* Error: Member %s not found in struct %s\n", c[leftchild(fieldname_tag)].s, c[leftchild(leftchild(struct_type))].s); return -1; } // Pull name and result typeinfo out of // P<declaration> = <struct-decl> | // <scalar-decl> | // <proc-fn-decl> | // <array-decl>; // - this will be a single unit of one of the above, not a list of them // Depends on how grammar.c currently implements the phrases above, which // at some point may be unified via a new cleaner AST_Declare operator. // would prefer a single fn with 2 results. Should really make them into // two "int *" out parameters int DeclName(int declp) { for (;;) { switch (astop(declp)) { case AST_SEQ: declp = leftchild(declp); break; case AST_Declare: return leftchild(declp); // case AST_TYPE_Struct: // return -1; default: fprintf(stderr, "* Internal Error: missing case %d: in DeclName\n", astop(declp)); // fprintf(stderr, "* Internal Error: missing case %s: in DeclName\n", op[astop(declp)].Diag_Name); return -1; } } } int DeclType(int declp) { for (;;) { switch (astop(declp)) { case AST_SEQ: declp = leftchild(declp); break; case AST_Declare: return rightchild(declp); // case AST_TYPE_Struct: // return -1; default: fprintf(stderr, "* Internal Error: missing case %d: in DeclType\n", astop(declp)); // fprintf(stderr, "* Internal Error: missing case %s: in DeclType\n", op[astop(declp)].Diag_Name); return -1; } } } /* Convert the concrete syntax tree into an abstract syntax tree. (the grammar itself is also embedded in this section) */ /* looks like currently a bug in declaration of pointer to struct (like 'c' in this code) */ // TO DO: current outstanding bug - this constuct is not recognised: // statements other than declarations are valid at top-level (eg compound-block) // char line[512], *s; -- See P_rest_of_decl: // *++fp (due to grammar misunderstanding) // *--pp // sizeof(int) // 123L 123LL 0xff 077 // doubles & floats // enums? Or maybe 'left as an exercise'? // const declarations, and using them in folding constants // WON'T DO: // unions /* This is primarily the main 'build_ast()' procedure, which is actually where the grammar of the language is defined. The grammar is extracted from in-line comments, and converted into a table by the 'takeon' program which you can find in the same directory as this file. (You can view the extracted grammar in file "grammar.g") The style of compiler on which this design is based actually goes directly from concrete syntax tree to code generation - but that was from the days when memory was tight. Since most modern compilers - and especially books about them - are AST-based, we'll take that extra step here in order to give our students an AST-based compiler to experiment with. *a++ = 1; is not allowed although a++ = 1; is! - bug in lvalue? Another ambiguity for the lexer: a & &b; a && b; We need a tree-walker that assigns type information bottom-up from terminals. Resets the type on a cast. vars need a type field we need temps we need to serialise the AST for SSA elimination Convert PUSH &x; <expr>; POPI into <expr>; POP x Deliberate language restrictions: no typedefs (use #define) no '...' parameters (varargs) no procedures as parameters possibly will simplify expressions later. These would be extensions anyway, but we're still not doing them: no inline procedures (though may add later if easy) no nested procedures */ int build_ast_inner(int ap, int caller) { #define build_ast(ap) build_ast_inner(ap, __LINE__) /* Main code-generation procedure. This is called after parsing, with parameters which describe the parse tree. By jumping to the corresponding statement in the large 'case' below, we execute the actions associated with the parse-tree nodes. The grammar which was used to build the parse tables is extracted from the source below (from comments marked with "//\\") and the tables are built with the associated 'takeon' program. (See this same directory for the source. It's quite short...) See http://www.difranco.net/cop2220/op-prec.htm if you need to convince yourself of some of the more bizarre peculiarities of C's operator precedence rules. This one looks like a bug however: int test(void) { int j, k = 0; j = k++ ++; } /tmp/test.c:4: error: lvalue required as increment operand - I think that is allowed in this grammar. Mind you better to remove ++ and -- altogether... (make it a 'small c' suitable to compile kdf9emul.c subset) */ int T[100]; int saved_ap; int phrase; // A[ap] is the phrase number. A[ap+1] is the alt. int alt; // For consistency, in BIPs, the Alt should always be 1 // although that may not be the case at the moment :-( int phrases; // defined subphrases //fprintf(stdout, "\"grammar.c\", %d: build_ast(%d);\n", caller, ap); saved_ap = ap; phrase = A[ap++] & DROP_SPECIAL_BITS; alt = A[ap++]; phrases = A[ap++]; if ((phrase < 512) || (phrase > MAX_PHRASE+512)) { fprintf(stderr, "* Internal error: build_ast(%d) called from line %d - phrase = %d\n", ap, caller, phrase); exit(1); } switch (phrase) { /* Built-in phrases */ // BIPS (Built-in Phrases) are linked to the type-code returned // by the line-reconstruction code (aka lexer) // These *must* come first. These are pre-recognised by the line reconstruction code) similar // to how lex tokenises input) so if you have any place in your source where an item could be // ambiguous at the lexical level, do not handle it by line-reconstruction - instead have the // parser build it up from individual characters. // Parse-time phrase definitions. Implemented by C code *as we parse*. Anything done here // on an unsuccessful parse track *must* be undone by an explicit failure item before returning // from that track. (that will make more sense when you see an example) // BIPS are never actually called in the current code. (all the // work in building them is done at line reconstruction time) //\\ B<EOD>=0; case P_EOD: return -1; //\\ B<TAG>=1; case P_TAG: //\\ B<PPP>=2; case P_PPP: //\\ B<MMM>=3; case P_MMM: //\\ B<CHAR>=4; case P_CHAR: //\\ B<KEYWORD>=5; case P_KEYWORD: //\\ B<integer-constant>=6; case P_integer_constant: //\\ B<character-constant>=7; case P_character_constant: //\\ B<string-constant>=8; case P_string_constant: if (c[A[ap]].l > latest_line) latest_line = c[A[ap]].l; return A[ap]; //\\ # //\\ # Phrase definitions. SS is the main entry point. //\\ # // Regular phrase definitions. SS is the main entry point. // Note that the entire program is parsed before *any* code is executed // <!phrase> means that the parse will continue past this item if <phrase> is *not* present // at this point in the source. // <?phrase> means that the parse will continue if the input at this point in the source // does match <phrase>, *BUT* the parse pointer will not be stepped past the object and // the input is still present to be matched by whatever follows. This allows for quick // context-sensitive checks (usually looking ahead quite a few tokens) to avoid a lot of // unnecessary parsing. // A phrase define by C<phrase> is implemented in C code and executed *at parse time* // It may or may not match any tokens in the input stream. // Depending on the support code for the parser, spaces and comments may or may not have already // been removed. Keywords in this grammar are given "thusly", and literal characters are // matched in single quotes as in '='. A single quote itself is matched by '''' // We do not yet handle extended BNF constructs or regular expression (regexp) literal matching. // inserted file: // note fn/procedure declarations not yet included in the grammar //\\ P<SS> = //\\ <statement> <SS>, //\\ <EOD>; case P_SS: if (alt == 0) { T[1] = build_ast(A[ap+0]); // <expression> T[2] = build_ast(A[ap+1]); // <SS> return mkbinop(AST_SEQ, T[1], T[2]); } else { return -1; } //\\ P<level14> = //\\ "sizeof" '(' <type> <opt-pointerto-seq> ')', //\\ <indirectthrough-seq> <opt-cast> <identifier> '(' <opt-argument-list> ')' <opt-postfix> <opt-post-ppp-or-mmm>, //\\ <opt-cast> <identifier> '(' <opt-argument-list> ')' <opt-postfix>, //\\ <identifier> <opt-postfix> <opt-post-ppp-or-mmm>, //\\ <integer-constant>, //\\ <character-constant>, //\\ <string-constant>, //\\ '(' <expression> ')'; case P_level14: // All of this is "TO DO:" fix this if (alt == 0) { return -1; #ifdef NEVER T[1] = -1;//build_ast(A[ap+0]); // <type> - currently in a state of flux! TO DO T[2] = build_ast(A[ap+1]); // <opt-pointerto-seq> // TO DO: modify the type with the pointers if present #endif } else if ((alt == 1) || (alt == 2)) { if (alt == 1) { T[1] = build_ast(A[ap+0]); // <indirectthrough-seq> T[2] = build_ast(A[ap+1]); // <opt-cast> T[3] = build_ast(A[ap+2]); // <identifier> T[4] = build_ast(A[ap+3]); // '(' <opt-argument-list> ')' T[5] = build_ast(A[ap+4]); // <opt-postfix> T[6] = build_ast(A[ap+5]); // <opt-post-ppp-or-mmm> } else { T[1] = -1; // absent <indirectthrough-seq> T[2] = build_ast(A[ap+0]); // <opt-cast> T[3] = build_ast(A[ap+1]); // <identifier> T[4] = build_ast(A[ap+2]); // '(' <opt-argument-list> ')' T[5] = build_ast(A[ap+3]); // <opt-postfix> T[6] = -1; // <opt-post-ppp-or-mmm> } // Question: *fred(2)->j ? *(fred(2)->j) or (*fred(2))->j ???? Looks like the latter. OR IS IT??? Confirm with GCC T[0] = mkterop(AST_Call, T[3], T[4], -1); // extra param will point to the defproc for the procedure if (T[1] != -1) T[0] = mkmonop(T[1], T[0]); // add pointers (ignore cast for now) if (T[5] != -1) { // opt-postfix1 first T[7] = T[5]; while (leftchild(T[5]) != -1) T[5] = leftchild(T[5]); leftchild(T[5]) = T[0]; T[0] = T[7]; } if (T[6] != -1) { // then opt-post-ppp etc T[7] = T[6]; while (leftchild(T[6]) != -1) T[6] = leftchild(T[6]); leftchild(T[6]) = T[0]; T[0] = T[7]; } return T[0]; } else if (alt == 3) { T[1] = build_ast(A[ap+0]); // <identifier> T[2] = build_ast(A[ap+1]); // <opt-postfix> T[3] = build_ast(A[ap+2]); // <opt-post-ppp-or-mmm> if ((T[2] == -1) && (T[3] == -1)) return T[1]; // ident only if (T[2] == -1) return mkmonop(T[3], T[1]); // ident++ only T[0] = T[2]; while (leftchild(T[2]) != -1) T[2] = leftchild(T[2]); leftchild(T[2]) = T[1]; if (T[3] == -1) return T[0]; return mkmonop(T[3], T[0]); } else if (alt == 4) { return integer_constant(ap+0); // build_ast(A[ap+0]); // TO DO: distinguish integer const in AST } else if (alt == 5) { return character_constant(ap+0); // build_ast(A[ap+0]); // TO DO: distinguish char const in AST } else if (alt == 6) { return string_constant(ap+0); // build_ast(A[ap+0]); // TO DO: distinguish string const in AST } else { return build_ast(A[ap+0]); // <expression> } //\\ P<level14-lv> = //\\ <indirectthrough-seq> <opt-cast> <identifier> '(' <opt-argument-list> ')' <opt-postfix> <opt-post-ppp-or-mmm>, //\\ <opt-cast> <identifier> '(' <opt-argument-list> ')' <opt-postfix>, //\\ <identifier-lv> <opt-postfix> <opt-post-ppp-or-mmm>; case P_level14_lv: // yuk. duplicates code from elsewhere. Should be common-factored. if (alt <= 1) { // <indirectthrough-seq> <opt-cast> <identifier> '(' <opt-argument-list> ')' <opt-postfix> <opt-post-ppp-or-mmm>, if (alt == 0) { T[1] = build_ast(A[ap+0]); // <indirectthrough-seq> T[2] = build_ast(A[ap+1]); // <opt-cast> T[3] = build_ast(A[ap+2]); // <identifier> T[4] = build_ast(A[ap+3]); // '(' <opt-argument-list> ')' T[5] = build_ast(A[ap+4]); // <opt-postfix> T[6] = build_ast(A[ap+5]); // <opt-post-ppp-or-mmm> } else { T[1] = -1; // absent <indirectthrough-seq> T[2] = build_ast(A[ap+0]); // <opt-cast> T[3] = build_ast(A[ap+1]); // <identifier> T[4] = build_ast(A[ap+2]); // '(' <opt-argument-list> ')' T[5] = build_ast(A[ap+3]); // <opt-postfix> T[6] = -1; // <opt-post-ppp-or-mmm> } // Question: *fred(2)->j ? *(fred(2)->j) or (*fred(2))->j ???? Looks like the latter T[0] = mkterop(AST_Call, T[3], T[4], -1); if (T[1] != -1) T[0] = mkmonop(T[1], T[0]); // add pointers (ignore cast for now) if (T[5] != -1) { // opt-postfix1 first T[7] = T[5]; while (leftchild(T[5]) != -1) T[5] = leftchild(T[5]); leftchild(T[5]) = T[0]; T[0] = T[7]; } if (T[6] != -1) { // then opt-post-ppp etc T[7] = T[6]; while (leftchild(T[6]) != -1) T[6] = leftchild(T[6]); leftchild(T[6]) = T[0]; T[0] = T[7]; } return T[0]; } else { // <identifier-lv> <opt-postfix> <opt-post-ppp-or-mmm>; T[1] = build_ast(A[ap+0]); // <identifier> T[2] = build_ast(A[ap+1]); // <opt-postfix> T[3] = build_ast(A[ap+2]); // <opt-post-ppp-or-mmm> if ((T[2] == -1) && (T[3] == -1)) return T[1]; // ident only if (T[2] == -1) return mkmonop(T[3], T[1]); // ident++ only T[0] = T[2]; while (leftchild(T[2]) != -1) T[2] = leftchild(T[2]); leftchild(T[2]) = T[1]; if (T[3] == -1) return T[0]; return mkmonop(T[3], T[0]); } // TO DO: apparently, a[k]++->j.l is valid, so need to put the ++ back in the mix // the way it was before... (and if there are any non-sensical combinations, weed // them out after parsing) //\\ P<opt-post-ppp-or-mmm> = //\\ <PPP>, //\\ <MMM>, //\\ ; case P_opt_post_ppp_or_mmm: if (alt == 0) return AST_Post_Inc; if (alt == 1) return AST_Post_Dec; return -1; //\\ P<opt-postfix> = //\\ '[' <expression> ']' <opt-postfix>, //\\ '.' <identifier> <opt-postfix>, //\\ '-' '>' <identifier> <opt-postfix>, //\\ ; case P_opt_postfix: if (alt == 3) return -1; T[1] = build_ast(A[ap+0]); // <identifier> or <expression> T[2] = build_ast(A[ap+1]); // <opt-postfix> //fprintf(stderr, "opt-postfix: (alt %d) t1 = %d, t2 = %d\n", alt, T[1], T[2]); if (alt == 0) { //fprintf(stderr, "T[1] = %d\n", T[1]); T[3] = mkbinop(AST_Idx, -1, T[1]); } else if (alt == 1) { T[3] = mkbinop(AST_Member, -1, T[1]); } else if (alt == 2) { T[3] = mkbinop(AST_Ptr, -1, T[1]); } if (T[2] == -1) return T[3]; T[0] = T[2]; while (leftchild(T[2]) != -1) T[2] = leftchild(T[2]); leftchild(T[2]) = T[3]; return T[0]; //\\ P<function-call> = //\\ "sizeof" '(' <type> ')', //\\ <opt-indirectthrough-seq> <opt-cast> <identifier> '(' <opt-argument-list> ')'; // also need sizeof(var) or sizeof(expr)??? does (var) become an expr? case P_function_call: if (alt == 0) { T[1] = build_ast(A[ap+0]); // <type> // AST_SizeOf - rightchild: 0 = var 1 = expr 2 = type return mkbinop(AST_SizeOf, T[1], 2); // TO DO: should fold to a literal, use new type mechanism } else { // TO DO: opt-indirectthrough-seq, cast indirection T[1] = build_ast(A[ap+2]); // <identifier> T[2] = build_ast(A[ap+3]); // <opt-argument-list> return mkterop(AST_Call, T[1], T[2], -1); } //\\ P<opt-argument-list> = //\\ <level1> <comma-opt-argument-list>, //\\ ; //\\ P<comma-opt-argument-list> = //\\ ',' <level1> <comma-opt-argument-list>, //\\ ; case P_opt_argument_list: case P_comma_opt_argument_list: if (alt == 0) { T[1] = build_ast(A[ap+0]); // <level1> T[2] = build_ast(A[ap+1]); // <comma-opt-argument-list> return mkbinop(AST_UseParam, T[1], T[2]); } else { return mkbinop(AST_UseParam, -1, -1); // instead of -1, so that joe() has a parameter but that parameter is -1 } // TO DO: grammar needs a little tweaking to allow "*(type *)&x" as an lvalue // (and need to check that "(type)var" is a valid lvalue too) //\\ P<lvalue> = //\\ <indirectthrough-seq> <opt-cast> <level14> | <level14-lv>; case P_lvalue: if (alt == 0) { T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); // TO DO: handle casts T[3] = build_ast(A[ap+2]); T[0] = T[1]; while (leftchild(T[1]) != -1) T[1] = leftchild(T[1]); leftchild(T[1]) = T[3]; return T[0]; // TO DO: testing *fred = ... } return build_ast(A[ap+0]); // <level14-lv> //\\ P<opt-cast> = <cast> | ; case P_opt_cast: return -1; // TO DO: //\\ P<level13> = //\\ <prefix-operator-lv> <level14-lv>, //\\ <opt-prefix-operator> <level14>; case P_level13: // these two can be folded if we don't change them again. if (alt == 0) { T[1] = build_ast(A[ap+0]); // <prefix-operator-lv> T[2] = build_ast(A[ap+1]); // <level14-lv> T[0] = T[1]; while (leftchild(T[0]) != -1) T[0] = leftchild(T[0]); leftchild(T[0]) = T[2]; return T[1]; } else { T[1] = build_ast(A[ap+0]); // <opt-prefix-operator> T[2] = build_ast(A[ap+1]); // <level14> if (T[1] == -1) return T[2]; T[0] = T[1]; while (leftchild(T[0]) != -1) T[0] = leftchild(T[0]); leftchild(T[0]) = T[2]; return T[1]; } //\\ P<prefix-operator-lv> = //\\ <PPP>, //\\ <MMM>, //\\ <opt-indirectthrough-seq> <cast>, //\\ '&', //\\ "sizeof"; // all tuples terminate their leftchild branches with -1 for the following operand to come case P_prefix_operator_lv: if (alt == 0) { return mkmonop(AST_Pre_Inc, -1); // ++ } else if (alt == 1) { return mkmonop(AST_Pre_Dec, -1); // -- } else if (alt == 2) { T[1] = build_ast(A[ap+0]); // <opt-indirectthrough-seq> T[2] = build_ast(A[ap+1]); // <cast> if (T[1] == -1) return T[2]; T[0] = T[1]; while (leftchild(T[1]) != -1) T[1] = leftchild(T[1]); leftchild(T[1]) = T[2]; return T[0]; } else if (alt == 3) { return mkmonop(AST_AddressOf, -1); // & } else { return mkbinop(AST_SizeOf, -1, 0); // rightchild: 0 = var 1 = expr 2 = type } //\\ P<opt-prefix-operator> = //\\ <PPP>, //\\ <MMM>, //\\ '+', //\\ '-', //\\ '!', //\\ '~', //\\ <opt-indirectthrough-seq> <cast>, //\\ <indirectthrough-seq>, //\\ '&', //\\ "sizeof", //\\ ; case P_opt_prefix_operator: if (alt == 0) { return mkmonop(AST_Pre_Inc, -1); // ++ } else if (alt == 1) { return mkmonop(AST_Pre_Dec, -1); // -- } else if (alt == 2) { return mkmonop(AST_UPos, -1); // + } else if (alt == 3) { return mkmonop(AST_UNeg, -1); // - } else if (alt == 4) { return mkmonop(AST_UBoolNot, -1); // ! not sure if we need a UShortcutBoolNot cf BoolAnd etc? } else if (alt == 5) { return mkmonop(AST_UBitNot, -1); // ~ } else if (alt == 6) { // TO DO: add <opt-indirectthrough-seq> // needs leftmost -1 return build_ast(A[ap+0]); // <cast> } else if (alt == 7) { // needs leftmost -1 return build_ast(A[ap+0]); // <indirectthrough-seq> } else if (alt == 8) { return mkmonop(AST_AddressOf, -1); // & } else if (alt == 9) { // sizeof <rvalue> return mkbinop(AST_SizeOf, -1, 1); // rightchild: 0 = var 1 = expr 2 = type // shouldn't be called, or will be called as sizeof rvalue } return -1; //\\ P<cast> = //\\ '(' <type> <opt-pointerto-seq> ')'; case P_cast: // TO DO: meeds work. returned structure will be // indirectthough(indirectthrough(cast(var{-1},type))) // *** result ***MUST*** have a -1 in the terminal leftchild for a following operand T[1] = build_ast(A[ap+0]); // <type> -- TO DO: make sure it is a valid AST_ object T[2] = build_ast(A[ap+1]); // <opt-pointerto-seq> if (T[2] == -1) return mkbinop(AST_Cast, -1, T[1]); T[0] = T[2]; while (leftchild(T[2]) != -1) T[2] = leftchild(T[2]); leftchild(T[2]) = T[1]; return mkbinop(AST_Cast, -1, T[0]); // I have split pointer-seq into pointerto-seq and indirectthrough-seq, // because one is used in type *definitions* and the other is used in the // code generator to perform an indirection through a pointer. //\\ P<indirectthrough-seq> = //\\ '*' <opt-indirectthrough-seq>; case P_indirectthrough_seq: // *** result ***MUST*** have a -1 in the terminal leftchild for a following operand return mkmonop(AST_IndirectThrough, build_ast(A[ap+0])); // <opt-indirectthrough-seq> //\\ P<opt-indirectthrough-seq> = //\\ <indirectthrough-seq>, //\\ ; case P_opt_indirectthrough_seq: if (alt == 0) return build_ast(A[ap+0]); // <indirectthrough-seq> return -1; //\\ P<pointerto-seq> = //\\ '*' <opt-pointerto-seq>; case P_pointerto_seq: // *** result ***MUST*** have a -1 in the terminal leftchild for a following operand return mkmonop(AST_TYPE_PointerTo, build_ast(A[ap+0])); // <opt-pointerto-seq> //\\ P<opt-pointerto-seq> = //\\ <pointerto-seq>, //\\ ; case P_opt_pointerto_seq: if (alt == 0) return build_ast(A[ap+0]); // <pointerto-seq> return -1; //\\ P<opt-qualifier> = "const" | "volatile" | ; case P_opt_qualifier: return alt; // TO DO: //\\ P<opt-scope> = "auto" | "register" | "static" | "extern" | ; case P_opt_scope: return alt; // TO DO: // TYPE CODE // bottom 16 bits (15:0) - size of data (eg ints are 4 bytes but it could be a struct - max member size 16K) // next 2 bits (17:16) - signed/unsigned/default // to come: scope/qualifier. Maybe "inline". //\\ P<closer> = ')'; case P_closer: return -1; //\\ P<type> = case P_type: switch (alt) { // TO DO: still to handle signedness. probably should use a better interface proc - no strings //\\ "void", case 0: return TypeAtom("", "void"); //\\ "struct" <existing-structname>, case 1: return TypeStruct(build_ast(A[ap+0]), -1); // use this as type info. Details will be plugged in by populate_types.c //\\ "typeof" '(' <lvalue> ')', case 2: return -1; // TO DO: extract type from lvalue - AST_SizeOf and AST_TypeOf? - done before code gen? //\\ <signed> "long" "long" "int", case 3: return TypeAtom(""/*build_ast(A[ap+0])*/, "long long"); //\\ <signed> "long" "long", case 4: return TypeAtom(""/*build_ast(A[ap+0])*/, "long long"); //\\ <signed> "long" "int", case 5: return TypeAtom(""/*build_ast(A[ap+0])*/, "long"); //\\ <signed> "long", case 6: return TypeAtom(""/*build_ast(A[ap+0])*/, "long"); //\\ <signed> "int", case 7: return TypeAtom(""/*build_ast(A[ap+0])*/, "int"); //\\ <signed> "short" "int", case 8: return TypeAtom(""/*build_ast(A[ap+0])*/, "short"); //\\ <signed> "short", case 9: return TypeAtom(""/*build_ast(A[ap+0])*/, "short"); //\\ <signed> "char", case 10: return TypeAtom(""/*build_ast(A[ap+0])*/, "char"); //\\ "FILE"; case 11: return TypeAtom("", "void"); default: return -1; } #ifdef NEVER // TO DO: adding typeof properly, and hack for FILE (avoiding full implementation of typedefs) // struct size limited to 32K-1 if (alt == 0) return 0; if (alt == 1) return (1<<16)-1; // TO DO: look up struct defn in name table and find size. ($1 is AST_TAG) if (alt != 11) T[0] = build_ast(A[ap+0]); if (alt == 2) return (1<<16)-1; // TO DO: typeof T[0] if (alt <= 4) return 8+T[0]; if (alt == 7) return 4+T[0]; if (alt <= 9) return 2+T[0]; if (alt <= 10) return 1+T[0]; return (1<<16)-1; // need to determine sizeof struct FILE. Probably never needed. #endif //\\ P<signed> = "unsigned", "signed", ; case P_signed: if (alt == 0) return 1<<16; if (alt == 1) return 2<<16; return 0; // let caller decide default. (Is it always signed? What about const char strings?) //\\ P<level12> = //\\ <level13> <opt-level12>; //\\ P<level11> = //\\ <level12> <opt-level11>; //\\ P<level10> = //\\ <level11> <opt-level10>; //\\ P<level9> = //\\ <level10> <opt-level9>; //\\ P<level8> = //\\ <level9> <opt-level8>; //\\ P<level7> = //\\ <level8> <opt-level7>; //\\ P<level6> = //\\ <level7> <opt-level6>; //\\ P<level5> = //\\ <level6> <opt-level5>; //\\ P<level4> = //\\ <level5> <opt-level4>; //\\ P<level3> = //\\ <level4> <opt-level3>; // opt-conditional is a ternary operator, but it is still leftchild() that we tweak :-) //\\ P<level2> = //\\ <level3> <opt-conditional>; case P_level12: case P_level11: case P_level10: case P_level9: case P_level8: case P_level7: case P_level6: case P_level5: case P_level4: case P_level3: case P_level2: T[1] = build_ast(A[ap+0]); // <level-N> T[2] = build_ast(A[ap+1]); // <opt-level-N-1> if (T[2] == -1) return T[1]; T[0] = T[2]; while (leftchild(T[0]) != -1) T[0] = leftchild(T[0]); leftchild(T[0]) = T[1]; return T[2]; //\\ P<opt-level12> = //\\ <level12-op> <level13> <opt-level12>, //\\ ; //\\ P<opt-level11> = //\\ <level11-op> <level12> <opt-level11>, //\\ ; //\\ P<opt-level10> = //\\ <level10-op> <level11> <opt-level10>, //\\ ; //\\ P<opt-level9> = //\\ <level9-op> <level10> <opt-level9>, //\\ ; //\\ P<opt-level8> = //\\ <level8-op> <level9> <opt-level8>, //\\ ; //\\ P<opt-level7> = //\\ <level7-op> <level8> <opt-level7>, //\\ ; //\\ P<opt-level6> = //\\ <level6-op> <level7> <opt-level6>, //\\ ; //\\ P<opt-level5> = //\\ <level5-op> <level6> <opt-level5>, //\\ ; //\\ P<opt-level4> = //\\ <level4-op> <level5> <opt-level4>, //\\ ; //\\ P<opt-level3> = //\\ <level3-op> <level4> <opt-level3>, //\\ ; case P_opt_level12: case P_opt_level11: case P_opt_level10: case P_opt_level9: case P_opt_level8: case P_opt_level7: case P_opt_level6: case P_opt_level5: case P_opt_level4: case P_opt_level3: if (alt == 0) { // also needs the parent to do the while (leftchild()==-1) trick... // to get exprs like a * b / c correct. T[1] = build_ast(A[ap+0]); // <op> T[2] = build_ast(A[ap+1]); // <RHS> T[3] = build_ast(A[ap+2]); // <rest-of-expression> if (T[3] == -1) return mkbinop(T[1], -1, T[2]); // leave a hole T[0] = T[3]; while (leftchild(T[3]) != -1) T[3] = leftchild(T[3]); leftchild(T[3]) = mkbinop(T[1], -1, T[2]); // move hole leftwards and downwards return T[0]; // (bug fixed: was returning T[3] !) } return -1; //\\ P<level12-op> = //\\ '*', //\\ '/', //\\ '%'; case P_level12_op: if (alt == 0) { return AST_Mul; } else if (alt == 1) { return AST_Div; } else { return AST_Mod; } //\\ P<level11-op> = //\\ '+', //\\ '-'; case P_level11_op: if (alt == 0) { return AST_Add; } else { return AST_Sub; } //\\ P<level10-op> = //\\ '<' '<', //\\ '>' '>'; case P_level10_op: if (alt == 0) { return AST_BitLsh; } else { return AST_BitRsh; // 'Bit' as opposed to arithmetic shift - LSR not ASR. } //\\ P<level9-op> = //\\ '<' '=', //\\ '<', //\\ '>' '=', //\\ '>'; case P_level9_op: if (alt == 0) { return AST_LE; } else if (alt == 1) { return AST_LT; } else if (alt == 2) { return AST_GE; } else { return AST_GT; } //\\ P<level8-op> = //\\ '=' '=', //\\ '!' '='; case P_level8_op: if (alt == 0) { return AST_EQ; } else { return AST_NE; } //\\ P<level7-op> = //\\ <!ANDAND> '&'; case P_level7_op: return AST_BitAnd; //\\ P<ANDAND> = '&&'; case P_ANDAND: return -1; //\\ P<level6-op> = //\\ '^'; case P_level6_op: return AST_BitXor; //\\ P<level5-op> = //\\ <!OROR> '|'; case P_level5_op: return AST_BitOr; //\\ P<OROR> = '||'; case P_OROR: return -1; //\\ P<level4-op> = //\\ '&' '&'; case P_level4_op: return AST_ShortcutBoolAnd; // unless overridden in a later optimisation //\\ P<level3-op> = //\\ '|' '|'; case P_level3_op: return AST_ShortcutBoolOr; // ditto // BIG 'TO DO' HERE! Each of the two results here must be of the same AST_TYPE_* so that a single // piece of code can assign the result of the conditional expression to something else. This means // that if one item is smaller than the other, it must be widened (here) to match. //\\ P<opt-conditional> = //\\ '?' <level3> ':' <level2>, //\\ ; case P_opt_conditional: if (alt == 0) { T[1] = -1; // plugged in by caller T[2] = build_ast(A[ap+0]); // <level3> T[3] = build_ast(A[ap+1]); // <level2> return mk(AST_Cond, 3, T); } else { return -1; } //\\ P<level1> = //\\ <opt-assign-list> <level2>; case P_level1: // need some right-associative magic for a = b = c = expr. // this is the only example of walking down the RHS looking for a hole. T[1] = build_ast(A[ap+0]); // <opt-assign-list> T[2] = build_ast(A[ap+1]); // <level2> if (T[1] == -1) return T[2]; T[0] = T[1]; while (rightchild(T[0]) != -1) T[0] = rightchild(T[0]); rightchild(T[0]) = T[2]; //fprintf(stderr, "rightchild plugged with %d, returning %d\n", T[2], T[1]); return T[1]; //\\ P<equiv> = //\\ '=' '='; case P_equiv: return -1; //\\ P<opt-assign-list> = //\\ <lvalue> <!equiv> <assign-op> <opt-assign-list>, //\\ ; case P_opt_assign_list: if (alt == 0) { T[1] = build_ast(A[ap+0]); // <lvalue> T[2] = -1; // <!equiv> T[3] = build_ast(A[ap+2]); // <assign-op> T[4] = build_ast(A[ap+3]); // <opt-assign-list> //fprintf(stderr, "T[1] = %d, T[4] = %d\n", T[1], T[4]); return mkbinop(T[3], T[1], T[4]); } else { return -1; } //\\ P<assign-op> = //\\ '=', //\\ '+' '=', //\\ '-' '=', //\\ '*' '=', //\\ '/' '=', //\\ '%' '=', //\\ '&' '=', //\\ '^' '=', //\\ '|' '=', //\\ '<' '<' '=', //\\ '>' '>' '='; case P_assign_op: if (alt == 0) { // = return AST_AssignTo; } else if (alt == 1) { // += TO DO: Note that code generation doesn't yet handle += etc... return AST_AddAss; } else if (alt == 2) { // -= return AST_SubAss; } else if (alt == 3) { // *= return AST_MulAss; } else if (alt == 4) { // /= return AST_DivAss; } else if (alt == 5) { // %= return AST_ModAss; } else if (alt == 6) { // &= return AST_BitAndAss; } else if (alt == 7) { // ^= return AST_BitXorAss; } else if (alt == 8) { // |= return AST_BitOrAss; } else if (alt == 9) { // <<= return AST_BitLshAss; } else { // >>= return AST_BitRshAss; } //\\ P<expression> = //\\ <level1> <opt-comma-level0-list>; case P_expression: T[1] = build_ast(A[ap+0]); // <level1> T[2] = build_ast(A[ap+1]); // <level0-list> if (T[2] == -1) return T[1]; return mkbinop(AST_CommaSEQ, T[1], T[2]); //\\ P<opt-comma-level0-list> = //\\ ',' <level1> <opt-comma-level0-list>, //\\ ; case P_opt_comma_level0_list: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); // <level1> T[2] = build_ast(A[ap+1]); // <opt-comma-level0-list>, if (T[2] == -1) return T[1]; return mkbinop(AST_CommaSEQ, T[1], T[2]); //\\ P<rvalue> = //\\ <expression>; case P_rvalue: return build_ast(A[ap+0]); // <expression> //\\ P<identifier> = //\\ <TAG>; case P_identifier: T[1] = mkmonop(AST_TAG, build_ast(A[ap+0])); return T[1]; // return lookup_identifier(T[1]); // WRONG!!!! can only lookup at codegen stage when walking the AST // return mkbinop(AST_Var, T[1], 0); // 0 reflects 'value of', 1 is 'address of' //\\ P<identifier-lv> = //\\ <TAG>; case P_identifier_lv: T[1] = mkmonop(AST_TAG, build_ast(A[ap+0])); return T[1]; // look up in name table, return an AST object that includes the name and the type info // return lookup_identifier(T[1]); // WRONG!!!! can only lookup at codegen stage when walking the AST // return mkbinop(AST_Var, T[1], 0); // 0 reflects 'value of', 1 is 'address of' *** TESTING A BUGFIX!!! *** // empty statements come via the <expression-statement> rule // reorder rules with literals and keyword choices first, minimise backtracking? //\\ P<statement> = '{' <compound-statement> '}' | //\\ <labeled-statement> | //\\ <declaration> | //\\ <expression-statement> | //\\ <selection-statement> | //\\ <iteration-statement> | //\\ <jump-statement> | //\\ <debug-statement>; case P_statement: T[1] = build_ast(A[ap+0]); //fprintf(stderr, "STATEMENT: alt=%d ast=%d\n", alt, T[1]); return T[1]; //\\ P<debug-statement> = '?' <statement>; case P_debug_statement: return mkmonop(AST_SHOWTREE, build_ast(A[ap+0])); //\\ P<labeled-statement> = <identifier> ':' <statement> | //\\ "case" <constant-expression> ':' <statement> | //\\ "default" ':' <statement>; case P_labeled_statement: if (alt == 0) { return mkbinop(AST_SEQ, mkbinop(AST_Label, build_ast(A[ap+0]), -1), build_ast(A[ap+1])); } else if (alt == 1) { T[1] = build_ast(A[ap+0]); T[2] = -1; // points to top of switch T[3] = -1; // integer holding internal label number for this jump address, used in code generation stage return mkbinop(AST_SEQ, mk(AST_Case, 3, T), // first -1 for enclosing switch/compound, second for label build_ast(A[ap+1])); } return mkbinop(AST_SEQ, mkbinop(AST_DefaultCase, -1, -1), // -1's same as above build_ast(A[ap+0])); //\\ P<constant-expression> = <expression>; case P_constant_expression: // TO DO: fold, then check that top of tree is a const. Otherwise semantic error. T[1] = build_ast(A[ap+0]); // if (astop(T[1] != AST_Const)) { // Can't - not folded yet! // fprintf(stderr, "* Error: expression is not constant\n"); exit(1); // } return T[1]; //\\ P<expression-statement> = <opt-expression> ';'; case P_expression_statement: return build_ast(A[ap+0]); //\\ P<opt-expression> = <expression> | ; case P_opt_expression: if (alt == 1) return -1; return build_ast(A[ap+0]); //\\ P<compound-statement> = <opt-declaration-list> <opt-statement-list>; case P_compound_statement: // returns an AST_Scope containing the block // First, handle scope: // dummy for AST_Scope creation T[1] = -1; // this block T[2] = -1; // parent block T[3] = BLOCK_COMPOUND; // type (Do we need to handle procedural scope here too?) T[4] = mk(AST_Scope, 3, T); // only tricky thing with this definition of AST_Scope is procedure params rightchild(T[4]) = current_scope; // Now handle block // push_scope(T[4]); T[5] = build_ast(A[ap+0]); // <opt-declaration-list> T[6] = build_ast(A[ap+1]); // <opt-statement-list>; // pop_scope(); //if (T[5] == -1) return T[6]; // may be -1 //if (T[6] == -1) return T[5]; // decls only, no code? Maybe side effects in decl initialisation? T[7] = mkbinop(AST_SEQ, T[5], T[6]); leftchild(T[4]) = T[7]; return T[4]; //\\ P<opt-declaration-list> = <declaration-list> | ; case P_opt_declaration_list: if (alt == 1) return -1; // TO DO: return build_ast(A[ap+0]); //\\ P<declaration-list> = <declaration> <opt-declaration-list> ; case P_declaration_list: T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); return mkbinop(AST_SEQ, T[1], T[2]); // TO DO: //\\ P<declaration> = <struct-decl> | //\\ <scalar-decl> | // note these are procedures or forward declarations but not procedure variables: //\\ <proc-fn-decl> | //\\ <array-decl>; case P_declaration: return build_ast(A[ap+0]); // no procedure variables in structs (or elsewhere) //\\ P<struct-member-declaration> = <struct-decl> | //\\ <scalar-decl> | //\\ <array-decl>; case P_struct_member_declaration: return build_ast(A[ap+0]); // This is a declaration of a struct type, not an actual specific struct of that type. I think. // Do we need an "AST_DeclareStruct" for this, to parallel AST_Declare for variables? (we'er // not doing that, nor using AST_Declare. Currently using AST_TYPE_Struct) //\\ P<struct-member-list> = <struct-member-declaration> <struct-member-list>, ; case P_struct_member_list: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); return TypeStructMember(T[2], DeclName(T[1]), DeclType(T[1])); // TO DO: pull info out of declaration // TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO // SHIT. Major design flaw. DeclName & DeclType cannot look up the type info until the scope // structure is in place, and that is currently being done *after* the AST building. Are we going to // have to implement the scoping calls during this phase too??? // TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO TO DO // CURRENT MEGA-TO DO: // looks like we may need to restructure *ALL* declarations so that they are // AST_Declare name AST_TYPE_* initial, otherwise picking apart all these declarations // so that the relevant pieces can be put in the struct member list is going to be a nightmare...??? //\\ P<struct-decl> = "struct" <new-structname> '{' <struct-member-list>'}'; case P_struct_decl: T[1] = build_ast(A[ap+0]); assert(astop(T[1]) == AST_TAG); T[2] = build_ast(A[ap+1]); return TypeStruct(T[1]/*tag*/, T[2]/*linked list*/); // ALTHOUGH THERE IS A LOT OF BACKTRACKING HERE, I'M DELIBERATELY WRITING DECARATIONS LIKE THIS // UNTIL I GET THEM TO WORK, OTHERWISE IT IS WAY TOO CONFUSING! //\\ P<opt-rest-of-scalar-decl> = ',' <opt-pointerto-seq> <identifier> <opt-scalar-init> <opt-rest-of-scalar-decl>, ; case P_opt_rest_of_scalar_decl: if (alt == 1) return -1; //\\ P<rest-of-scalar-decl> = <opt-pointerto-seq> <identifier> <opt-scalar-init> <opt-rest-of-scalar-decl> ';'; case P_rest_of_scalar_decl: T[1] = build_ast(A[ap+0]); // TO DO: if (pointers) T[1] = pointers(T[2], T[1]) T[2] = build_ast(A[ap+1]); T[3] = build_ast(A[ap+2]); // if present, add initialisation code (GLA or explicit assign??? - TO DO student exercise) T[4] = build_ast(A[ap+3]); return mkbinop(AST_SEQ, mk(AST_Dummy, 3, T), T[4]); // or does it have to be 10 for show_ast to work??? //\\ P<scalar-decl> = <opt-scope> <opt-qualifier> <type> <rest-of-scalar-decl>; case P_scalar_decl: { int seq = mkbinop(AST_SEQ, -1, -1); int topseq = seq; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); T[3] = build_ast(A[ap+2]); T[0] = build_ast(A[ap+3]); // linked list of *ident=n, // T[0] is an AST_SEQ of multiple <rest-of-scalar-decl>s for (;;) { assert(astop(T[0]) == AST_SEQ); T[4] = leftchild(leftchild(T[0])); T[5] = rightchild(leftchild(T[0])); T[6] = nthchild(leftchild(T[0]), 2); // now it is as if we had a phrase: // <opt-scope> <opt-qualifier> <type> <opt-pointerto-seq> <identifier> <opt-scalar-init> ';'; if (T[4] != -1) fprintf(stderr, "! Ptr to scalar declaration: %s\n", c[leftchild(T[5])].s); // *** handle the declaration here. *** leftchild(seq) = mkterop(AST_Declare, T[5], ConstructType(c[leftchild(T[5])].s, T[1], T[2], T[3], T[4], -1), T[6]); // ConstructType scope qualifier type pointers arrays rightchild(seq) = -1; T[0] = rightchild(T[0]); if (T[0] == -1) break; rightchild(seq) = mkbinop(AST_SEQ, -1, -1); seq = rightchild(seq); } return topseq; } //\\ P<proc-fn-decl> = <opt-scope> <opt-qualifier> <type> <opt-pointerto-seq> <identifier> '(' <opt-param-list> ')' <forward-decl-or-actual-body>; case P_proc_fn_decl: T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); T[3] = build_ast(A[ap+2]); T[4] = build_ast(A[ap+3]); T[5] = build_ast(A[ap+4]); T[6] = build_ast(A[ap+5]); T[7] = build_ast(A[ap+6]); if (T[7] == -1) return -1; // For now. TO DO: add node for forward declaration // Body is an AST_Scope. We unpick it and insert the // parameter list before the compound-block body. assert(astop(T[7]) == AST_Scope); leftchild(T[7]) = mkbinop(AST_SEQ, DefToReceive(T[6]), leftchild(T[7])); // ^ DefParam becomes ReceiveParam (copy, not in situ) inside // the procedure body, for local references to find. // maybe need new ast type. tie defparams to proc defn for callers, // and 'receiveparam's to receive the params inside the procedure? // (there may not be any code generated but we do have to assign base reg/offset etc) // (We already have 'useparam's at the point of call) T[1] = ConstructType(c[leftchild(T[5])].s, /*scope*/ T[1], /*qualifier*/ T[2], /*type*/ T[3], /*pointers*/ T[4], /*arrays*/ -1); T[2] = T[5]; // Name T[3] = T[6]; // Params T[4] = T[7]; // Body return mk(AST_DefProc, 4, T); // first node is result type, will be plugged by parent // DefProc: scope/qual/type/ptr-seq, id, params, body // currently only one array dec per statement. will generalise once this is working... //\\ P<array-decl> = <opt-scope> <opt-qualifier> <type> <opt-pointerto-seq> <identifier> <array-bounds> <opt-array-init> ';'; case P_array_decl: T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); T[3] = build_ast(A[ap+2]); T[4] = build_ast(A[ap+3]); T[5] = build_ast(A[ap+4]); T[6] = build_ast(A[ap+5]); T[7] = build_ast(A[ap+6]); fprintf(stderr, "! Array declaration: %s\n", c[leftchild(T[5])].s); return mkterop(AST_Declare, /*nametag*/T[5], ConstructType(c[leftchild(T[5])].s, T[1], T[2], T[3], T[4], T[6]), // ConstructType scope qualifier type pointers arrays /*init*/T[7]); //\\ P<forward-decl-or-actual-body> = ';' | '{' <compound-statement> '}'; case P_forward_decl_or_actual_body: if (alt == 0) return -1; // TO DO: make a nametable entry for the forward declaration, // - necessary to allow mutual recursion T[1] = build_ast(A[ap+0]); assert(astop(T[1]) == AST_Scope); nthchild(T[1], 2) = BLOCK_PROCFN; // override BLOCK_COMPOUND return T[1]; //\\ P<array-bounds> = '[' <opt-constant-expression> ']' <opt-array-bounds>; //\\ P<opt-array-bounds> = '[' <opt-constant-expression> ']' <opt-array-bounds> | ; case P_array_bounds: case P_opt_array_bounds: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); // TO DO: multi-d arrays return TypeArrayOf(/*child*/-1, /*index*/TypeAtom("signed", "int"), /*lower*/-1, /*upper*/T[1]); // child plugged by caller // index is an integer // lower is always 0 // TO DO: upper if given // <constant-expression> originally included a comma-list, which caused confusion // the constant expression list may include inline structs... would have been // nicer to have separated those out at a higher level rather than include them here. // turns out this is valid C: int a[2] = {1, 2}, b[3] = {4, 5, 6}; - need to fix initializer // think this means that a <constant-initializer> may be an <array-init>. //\\ P<opt-array-init> = '=' '{' <constant-initializer-list> '}', ; case P_opt_array_init: if (alt == 1) return -1; // TO DO: only partially done return mkbinop(AST_AssignTo, -1, build_ast(A[ap+0])); //\\ P<constant-initializer-list> = <constant-initializer> <rest-of-constant-initializer-list>; //\\ P<rest-of-constant-initializer-list> = ',' <constant-initializer> <rest-of-constant-initializer-list> | //\\ ',' | //\\ ; case P_constant_initializer_list: case P_rest_of_constant_initializer_list: if (alt >= 1) return -1; return mkbinop(AST_CommaSEQ, build_ast(A[ap+0]), build_ast(A[ap+1])); // TO DO: only partially done // struct initialisation //\\ P<constant-initializer> = <constant-expression>, '{' <constant-initializer-list> '}'; case P_constant_initializer: return build_ast(A[ap+0]); //\\ P<opt-scalar-init> = '=' <level1> | ; case P_opt_scalar_init: // TO DO: expression *must not* allow comma expressions. So I've replaced <expression> with <level1> // - probably should use <rvalue> but that is currently pointing to <expression> if (alt == 1) return -1; return mkbinop(AST_AssignTo, -1, build_ast(A[ap+0])); // TO DO: need to add code into declaration AST. Depends if known at compile/link time // or needs to be computed at decl time. //\\ P<opt-param-list> = "void" <?closer>, <param> <opt-rest-of-param-list>; case P_opt_param_list: if (alt == 0) return -1; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); nthchild(T[1], 2) = T[2]; return T[1]; //\\ P<opt-rest-of-param-list> = ',' <param> <opt-rest-of-param-list> | ; case P_opt_rest_of_param_list: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); nthchild(T[1], 2) = T[2]; return T[1]; // if we want things like "const int" parameters, need to add "const" here... //\\ P<param> = <type> <opt-pointerto-seq> <identifier> <opt-empty-array-bounds>; case P_param: T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); T[3] = build_ast(A[ap+2]); T[4] = build_ast(A[ap+3]); T[1] = T[3]; T[2] = ConstructType(c[leftchild(T[1])].s, /*scope*/ -1, /*qualifier*/ -1, /*type*/ T[1], /*pointers*/ T[2], /*arrays*/ T[4]); T[3] = -1; // to be plugged by parent return mk(AST_DefParam, 3, T); #ifdef NEVER T[1] = build_ast(A[ap+2]); // ident T[2] = build_ast(A[ap+0]); // type derived from <type> and <opt-pointerto-seq> and <opt-empty-array-bounds> ... T[4] = build_ast(A[ap+1]); // <opt-pointerto-seq> if (T[4] != -1) { // insert T[2] down left branch of T[4]? T[0] = T[4]; while (leftchild(T[4]) != -1) T[4] = leftchild(T[4]); leftchild(T[4]) = T[2]; T[2] = T[0]; } // TO DO: still to add arrayof if needed T[3] = -1; // link to next plugged by parent return mk(AST_DefParam, 3, T); #endif //\\ P<opt-empty-array-bounds> = '[' <opt-constant-expression> ']' <opt-empty-array-bounds> | ; case P_opt_empty_array_bounds: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); // TO DO: multi-d arrays return TypeArrayOf(/*child*/-1, /*index*/TypeAtom("signed", "int"), /*lower*/-1, /*upper*/T[1]); // child plugged by caller // index is an integer // lower is always 0 // TO DO: upper if given //\\ P<opt-constant-expression> = <constant-expression>, ; case P_opt_constant_expression: if (alt == 1) return -1; return build_ast(A[ap+0]); //\\ P<statement-list> = <statement> <opt-statement-list>; //\\ P<opt-statement-list> = <statement> <opt-statement-list> | ; case P_statement_list: case P_opt_statement_list: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); if (T[2] == -1) return T[1]; return mkbinop(AST_SEQ, T[1], T[2]); //\\ P<selection-statement> = "if" '(' <expression> ')' <statement> <opt-else-statement> | //\\ "switch" '(' <expression> ')' '{' <compound-statement> '}'; case P_selection_statement: T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); if (alt == 0) { T[3] = build_ast(A[ap+2]); if (T[3] == -1) return mkbinop(AST_IFTHEN, T[1], T[2]); return mk(AST_IFTHENELSE, 3, T); // extra fields for jump labels? } T[3] = -1; // extra hole for break label. T[4] = -1; // link to chain of case statements T[5] = -1; // link to default case return mk(AST_Switch, 5, T); //\\ P<opt-else-statement> = "else" <statement> | ; case P_opt_else_statement: if (alt == 1) return -1; T[1] = build_ast(A[ap+0]); //fprintf(stderr, "ELSE: %d\n", T[1]); return T[1]; //\\ P<iteration-statement> = "while" '(' <expression> ')' <statement> | //\\ "do" <statement> "while" '(' <expression> ')' ';' | //\\ "for" '(' <opt-expression> ';' <opt-expression> ';' <opt-expression> ')' <statement>; case P_iteration_statement: // loops need jumpback, continue, and exit labels. Store them in the tuple? // use ints, or generate private names and use regular 'label' AST items? T[1] = build_ast(A[ap+0]); T[2] = build_ast(A[ap+1]); // the labels *could* be allocated here rather than in the code generator. May be more convenient. T[3] = -1; // exit_lab T[4] = -1; // continue_lab T[5] = -1; // branch_back_lab if (alt == 0) { return mk(AST_C_While, 2+3, T); } else if (alt == 1) { return mk(AST_C_DoWhile, 2+3, T); } else { if (T[2] == -1) T[2] = mkconst(TRUE); // for (init ; ; inc) ... is treated as for (init ; TRUE; inc ) ... // - we can let the code generator optimise the TRUE != 0 test away T[3] = build_ast(A[ap+2]); T[4] = build_ast(A[ap+3]); T[5] = -1; // ditto T[6] = -1; T[7] = -1; T[8] = -1; // body lab return mk(AST_C_ForLoop, 4+4, T); } //\\ P<jump-statement> = "goto" <identifier> ';' | //\\ "continue" ';' | //\\ "break" ';' | //\\ "return" <opt-expression> ';'; case P_jump_statement: // Can we warn about an unlabelled statement after a jump statement? eg "* Access?" // perhaps by putting <?unlabelled-statement> in the grammar and testing to see if it is non-null? // (or better, <!label>, where label of course includes case and default. // Nope. Probably better done during the final (code-generating) tree-walk - combine // with generic dead code removal? if (alt == 0) { // goto has to look for a label within the current procedure. If we allow nested // procedures, extreme care has to be taken when jumping out of a procedure. May not // be allowed in C except via longjump()? // actually just about any label target you can see in any scope is a valid goto target - // you can jump out of any lexical compound block to a parent block, and you can't jump // out of a procedure because you can't have statements outside of a procedure. Any othe // block that has already been closed is no longer visible to you via the scoping rules. // slight runtime issue therefore when jumping to a parent compound-statement, when you // are jumping out of a block that had local declarations. OK when all your references // are via a single stack base, but what if you were keeping track of the stack depth // and indexing negatively from the (varying) stack top??? T[1] = build_ast(A[ap+0]); return mkmonop(AST_Goto, T[1]); } else if (alt == 1) { // continue and break need to be within a loop, so we need a stack of blocks to chain back through. return mkmonop(AST_C_Continue, -1); // -1 hooks back to enclosing loop } else if (alt == 2) { return mkmonop(AST_C_Break, -1); // -1 hooks back to enclosing loop } else { T[1] = build_ast(A[ap+0]); if (T[1] == -1) { return mkmonop(AST_Return, -1); // -1 hooks back to enclosing procedure. warn if missing result } else{ return mkbinop(AST_ReturnResult, T[1], -1); // -1 hooks back to enclosing fn. (& Check the type/spurious) } } //\\ P<new-structname> = //\\ <TAG>; case P_new_structname: // TO DO: vars should have a proper type field, and the struct name should be // treated like a type. We need a proper name table now. T[1] = mkmonop(AST_TAG, build_ast(A[ap+0])); return T[1]; //\\ P<existing-structname> = //\\ <TAG>; case P_existing_structname: // TO DO: vars should have a proper type field, and the struct name should be // treated like a type. We need a proper name table now. // solution to the problem that has been bothering me... // return a struct type object with the name present but not the type info, // to be filled in on the populate_types.c pass T[1] = mkmonop(AST_TAG, build_ast(A[ap+0])); return T[1]; // end of inserted file. default: if ((phrase >= 512) && (phrase < 512+MAX_PHRASE)) { semantic_error("* Missing case label P_%s:\n", phrasename[phrase-512]); } else { char temp[16]; sprintf(temp, "%d", phrase); semantic_error("* Missing case label P_<<%s>>\n", temp); } } //\\ E return -1; // RETURN ERROR TRIP. need assert here??? } /* We have a loop! should be easy to find: #6999 0x0804b9fc in build_ast_inner (ap=3, caller=1301) at grammar.c:200 #7000 0x0804e5b6 in build_ast_inner (ap=386, caller=1301) at grammar.c:1301 #7001 0x0804e5b6 in build_ast_inner (ap=253, caller=1292) at grammar.c:1301 #7002 0x0804e55a in build_ast_inner (ap=249, caller=1258) at grammar.c:1292 #7003 0x0804e40e in build_ast_inner (ap=79, caller=1124) at grammar.c:1258 #7004 0x0804dcb3 in build_ast_inner (ap=75, caller=1112) at grammar.c:1124 #7005 0x0804dc37 in build_ast_inner (ap=70, caller=1108) at grammar.c:1112 #7006 0x0804dc0b in build_ast_inner (ap=66, caller=1094) at grammar.c:1108 #7007 0x0804db58 in build_ast_inner (ap=61, caller=1270) at grammar.c:1094 #7008 0x0804e4a7 in build_ast_inner (ap=57, caller=1221) at grammar.c:1270 #7009 0x0804e223 in build_ast_inner (ap=16, caller=1124) at grammar.c:1221 #7010 0x0804dcb3 in build_ast_inner (ap=12, caller=1037) at grammar.c:1124 #7011 0x0804d89a in build_ast_inner (ap=8, caller=200) at grammar.c:1037 #7012 0x0804b9fc in build_ast_inner (ap=3, caller=1301) at grammar.c:200 */ /* typedef struct operation { int AST_Code; int Children; char *Diag_Name; char *C_Name; int Display_Children; } operation; */ static char so_temp[1024]; void show_object(int p) { switch (astop(p)) { case AST_TAG: strcat(so_temp, c[leftchild(p)].s); return; case AST_Idx: show_object(leftchild(p)); strcat(so_temp, "["); show_object(rightchild(p)); strcat(so_temp, "]"); return; case AST_Ptr: show_object(leftchild(p)); strcat(so_temp, "->"); show_object(rightchild(p)); return; case AST_Member: show_object(leftchild(p)); strcat(so_temp, "."); show_object(rightchild(p)); return; default: strcat(so_temp, "???"); return; // for now - TO DO } } char *so(int p) { so_temp[0] = '\0'; show_object(p); return so_temp; } int get_lineno(int p) { int l; int disp; int this; int i; switch (astop(p)) { case AST_TAG: return c[leftchild(p)].l; case AST_Idx: case AST_Ptr: case AST_Member: l = get_lineno(leftchild(p)); if (l == -1) return get_lineno(rightchild(p)); default: disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { l = get_lineno(nthchild(p, i)); if (l != -1) return l; } this = this>>1; } return -1; } } void print_all_trips(void) { // breaks when we shorten a trip using AST_REDIRECT :-( int ap = RESERVE_FOR_TYPE_INFO; for (;;) { int oper = AST[ap]; int count = AST[ap+1]; int child = ap+2; int disp = op[oper-AST_BASE].Display_Children; int this; if ((oper >= AST_BASE) && (oper <= AST_TOP)) { fprintf(stdout, "%4d: %s", ap, op[oper-AST_BASE].Diag_Name); } else { fprintf(stdout, "%4d: %d", ap, oper/*op[oper-AST_BASE].Diag_Name*/); } if (oper == AST_TAG) { if (AST[child] != -1) { fprintf(stdout, " \"%s\" (c[%d].s)", c[AST[child]].s, AST[child]); } else { fprintf(stdout, " %d", AST[child]); } } else { this = 1<<(count-1); for (;;) { if (child == ap+2+count) break; if ((AST_BASE <= AST[child]) && (AST[child] <= AST_TOP) && (astop(AST[child]) == AST_TAG)) { fprintf(stdout, " \"%s\"", c[leftchild(AST[child])].s); } else { if (disp&this) { fprintf(stdout, " [%d]", AST[child]); } else { fprintf(stdout, " %d", AST[child]); } } this = this >> 1; child += 1; } } fprintf(stdout, " [[typeinfo=%d]]\n", TYPEINFO(ap)); ap = ap + 2 + AST[ap+1] + RESERVE_FOR_TYPE_INFO; if (ap >= nexttrip) break; } fprintf(stdout, "-----------------\n"); } void show_ast(int ap, int depth, int comefrom) { int oper = AST[ap]; int count = AST[ap+1]; int child = ap+2; int disp; int this; if (ap == -1) { /*fprintf(stdout, "show_ast(-1);\n");*/ return; } if ((oper < AST_BASE) || (oper > AST_TOP)) { fprintf(stderr, "%4d: oper = %d, count = %d, Children = %d (came from %4d:)\n", ap, oper, count, op[oper-AST_BASE].Children, comefrom); exit(1); } #ifdef NEVER while (astop(ap) == AST_REDIRECT) { fprintf(stderr, "Redirect from %d to %d\n", ap, leftchild(ap)); ap = leftchild(ap); } oper = AST[ap]; count = AST[ap+1]; child = ap+2; #endif if ((oper != AST_REDIRECT) && (count != op[oper-AST_BASE].Children)) { fprintf(stderr, "%4d: oper = %d (%s), count = %d, Children = %d (came from %4d:)\n", ap, oper, op[oper-AST_BASE].Diag_Name, count, op[oper-AST_BASE].Children, comefrom); fflush(stderr); assert(count == op[oper-AST_BASE].Children); } switch (oper) { case AST_REDIRECT: fprintf(stdout, "%4d: %s %d\n", ap, op[oper-AST_BASE].Diag_Name, AST[ap+2]); break; case AST_TAG: fprintf(stdout, "%4d: %s \"%s\" [[typeinfo=%d]]\n", ap, op[oper-AST_BASE].Diag_Name, c[AST[ap+2]].s, TYPEINFO(ap)); if (TYPEINFO(ap) != -1) { show_ast(TYPEINFO(ap),depth+1,comefrom); } break; case AST_Var: fprintf(stdout, "%4d: %s %s %d\n", ap, op[oper-AST_BASE].Diag_Name, c[AST[ap+2]].s, AST[ap+3]); break; case AST_Const: fprintf(stdout, "%4d: %s %s\n", ap, op[oper-AST_BASE].Diag_Name, escape(c[AST[ap+2]].s, c[AST[ap+2]].t)); break; default: if ((oper < AST_BASE) || (oper > AST_TOP)) { fprintf(stderr, "* WARNING: show_ast oper=%d branched via 'default:' - add a case for it.\n", oper); } else { fprintf(stderr, "* WARNING: show_ast oper=%s branched via 'default:' - add a case for it.\n", op[oper-AST_BASE].Diag_Name); } exit(0); case AST_Add: ; case AST_Sub: ; case AST_Mul: ; case AST_Div: ; case AST_Mod: ; case AST_AddAss: ; case AST_SubAss: ; case AST_MulAss: ; case AST_DivAss: ; case AST_ModAss: ; case AST_Post_Inc: ; case AST_Pre_Inc: ; case AST_Post_Dec: ; case AST_Pre_Dec: ; case AST_Idx: ; case AST_Member: ; case AST_Ptr: ; case AST_Call: ; case AST_Cond: ; case AST_Type: ; case AST_Return: ; case AST_ReturnResult: ; case AST_LE: ; case AST_GT: ; case AST_LT: ; case AST_GE: ; case AST_EQ: ; case AST_NE: ; case AST_IFTHENELSE: ; case AST_IFTHEN: ; case AST_Switch: ; case AST_UNeg: ; case AST_UPos: ; case AST_UBitNot: ; case AST_UBoolNot: ; case AST_BoolAnd: ; case AST_BoolOr: ; case AST_ShortcutBoolAnd: ; case AST_ShortcutBoolOr: ; case AST_BitAnd: ; case AST_BitOr: ; case AST_BitAndAss: ; case AST_BitOrAss: ; case AST_BitXor: ; case AST_BitXorAss: ; case AST_BitLsh: ; case AST_BitRsh: ; case AST_BitLshAss: ; case AST_BitRshAss: ; case AST_AssignTo: ; case AST_Declare: ; case AST_DefProc: ; case AST_DefParam: ; case AST_ReceiveParam: ; case AST_UseParam: ; case AST_AddressOf: ; case AST_IndirectThrough: ; case AST_C_ForLoop: ; case AST_C_While: ; case AST_C_DoWhile: ; case AST_C_Break: ; case AST_C_Continue: ; case AST_Cast: ; case AST_SizeOf: ; case AST_Case: ; case AST_DefaultCase: ; case AST_Label: ; case AST_Goto: ; case AST_Sourceline: ; case AST_LINENO: ; case AST_LINEAR_BLOB: ; case AST_SEQ: ; case AST_CommaSEQ: ; case AST_TYPE_ArrayOf: ; // child index lower upper case AST_TYPE_Struct: ; // child structtypename childname case AST_TYPE_PointerTo: ; // child case AST_TYPE_Atom: ; // NAME bytes signed case AST_TYPE_StructMember: ; // next_member this_element case AST_Scope: ; case AST_SHOWTREE: ; // May or may not need to include the AST_TYPE_* entries here? disp = op[oper-AST_BASE].Display_Children; fprintf(stdout, "%4d: %s", ap, op[oper-AST_BASE].Diag_Name); if (disp == 0) { fprintf(stdout, "\n"); return; } this = 1<<(count-1); for (;;) { if (child == ap+2+count) break; //if (disp&this) fprintf(stdout, " %s", op[AST[AST[child]]-AST_BASE]); if (disp&this) fprintf(stdout, " [%d]", AST[child]); else fprintf(stdout, " %d", AST[child]); this = this >> 1; child += 1; } fprintf(stdout, " [[typeinfo=%d]]\n", TYPEINFO(ap)); this = 1<<(count-1); { child = ap+2; for (;;) { if (child == ap+2+count) break; if (disp&this) if (astop(AST[child]) != AST_TYPE_Struct) show_ast(AST[child], depth+1, ap); this = this >> 1; child += 1; } } break; } fflush(stdout); return; } void show_one_ast(int ap, int depth, int comefrom) { int oper = AST[ap]; int count = AST[ap+1]; int child = ap+2; int disp; int this; if (ap == -1) { /*fprintf(stdout, "show_ast(-1);\n");*/ return; } if ((oper < AST_BASE) || (oper > AST_TOP)) { fprintf(stderr, "%4d: oper = %d, count = %d, Children = %d (came from %4d:)\n", ap, oper, count, op[oper-AST_BASE].Children, comefrom); exit(1); } #ifdef NEVER while (astop(ap) == AST_REDIRECT) { fprintf(stderr, "Redirect from %d to %d\n", ap, leftchild(ap)); ap = leftchild(ap); } oper = AST[ap]; count = AST[ap+1]; child = ap+2; #endif if (count != op[oper-AST_BASE].Children) { fprintf(stderr, "%4d: oper = %d (%s), count = %d, Children = %d (came from %4d:)\n", ap, oper, op[oper-AST_BASE].Diag_Name, count, op[oper-AST_BASE].Children, comefrom); fflush(stderr); assert(count == op[oper-AST_BASE].Children); } switch (oper) { case AST_TAG: fprintf(stdout, "%4d: %s \"%s\" [[typeinfo=%d]]\n", ap, op[oper-AST_BASE].Diag_Name, c[AST[ap+2]].s, TYPEINFO(ap)); break; case AST_Var: fprintf(stdout, "%4d: %s %s %d\n", ap, op[oper-AST_BASE].Diag_Name, c[AST[ap+2]].s, AST[ap+3]); break; case AST_Const: fprintf(stdout, "%4d: %s %s\n", ap, op[oper-AST_BASE].Diag_Name, escape(c[AST[ap+2]].s, c[AST[ap+2]].t)); break; default: if ((oper < AST_BASE) || (oper > AST_TOP)) { fprintf(stderr, "* WARNING: show_one_ast oper=%d branched via 'default:' - add a case for it.\n", oper); } else { fprintf(stderr, "* WARNING: show_one_ast oper=%s branched via 'default:' - add a case for it.\n", op[oper-AST_BASE].Diag_Name); } exit(0); case AST_Add: ; case AST_Sub: ; case AST_Mul: ; case AST_Div: ; case AST_Mod: ; case AST_AddAss: ; case AST_SubAss: ; case AST_MulAss: ; case AST_DivAss: ; case AST_ModAss: ; case AST_Post_Inc: ; case AST_Pre_Inc: ; case AST_Post_Dec: ; case AST_Pre_Dec: ; case AST_Idx: ; case AST_Member: ; case AST_Ptr: ; case AST_Call: ; case AST_Cond: ; case AST_Type: ; case AST_Return: ; case AST_ReturnResult: ; case AST_LE: ; case AST_GT: ; case AST_LT: ; case AST_GE: ; case AST_EQ: ; case AST_NE: ; case AST_IFTHENELSE: ; case AST_IFTHEN: ; case AST_Switch: ; case AST_UNeg: ; case AST_UPos: ; case AST_UBitNot: ; case AST_UBoolNot: ; case AST_BoolAnd: ; case AST_BoolOr: ; case AST_ShortcutBoolAnd: ; case AST_ShortcutBoolOr: ; case AST_BitAnd: ; case AST_BitOr: ; case AST_BitAndAss: ; case AST_BitOrAss: ; case AST_BitXor: ; case AST_BitXorAss: ; case AST_BitLsh: ; case AST_BitRsh: ; case AST_BitLshAss: ; case AST_BitRshAss: ; case AST_AssignTo: ; case AST_Declare: ; case AST_DefProc: ; case AST_DefParam: ; case AST_ReceiveParam: ; case AST_UseParam: ; case AST_AddressOf: ; case AST_IndirectThrough: ; case AST_C_ForLoop: ; case AST_C_While: ; case AST_C_DoWhile: ; case AST_C_Break: ; case AST_C_Continue: ; case AST_Cast: ; case AST_SizeOf: ; case AST_Case: ; case AST_DefaultCase: ; case AST_Label: ; case AST_Goto: ; case AST_Sourceline: ; case AST_LINENO: ; case AST_REDIRECT: ; case AST_LINEAR_BLOB: ; case AST_SEQ: ; case AST_CommaSEQ: ; case AST_TYPE_ArrayOf: ; // child index lower upper case AST_TYPE_Struct: ; // child structtypename childname case AST_TYPE_PointerTo: ; // child case AST_TYPE_Atom: ; // NAME bytes signed case AST_TYPE_StructMember: ; // next_member this_element case AST_Scope: ; // May or may not need to include the AST_TYPE_* entries here? disp = op[oper-AST_BASE].Display_Children; fprintf(stdout, "%4d: %s", ap, op[oper-AST_BASE].Diag_Name); if (disp == 0) { fprintf(stdout, "\n"); return; } this = 1<<(count-1); for (;;) { if (child == ap+2+count) break; //if (disp&this) fprintf(stdout, " %s", op[AST[AST[child]]-AST_BASE]); if (disp&this) fprintf(stdout, " [%d]", AST[child]); else fprintf(stdout, " %d", AST[child]); this = this >> 1; child += 1; } fprintf(stdout, " [[typeinfo=%d]]\n", TYPEINFO(ap)); break; } fflush(stdout); return; } //#include "walk.c" // TO DO: tree rotations to bring constants together // make sure address arithmetic (eg struct offsets) is also folded // arithmetic identities (* 1, + 0, / 1) // check for / 0 // common factors: a*b + a*c = a*(b+c) // advanced: formula simplification void Fold(int p, int *count) { int op; int arity; if ((p == -1) || (astop(p) == -1)) return; op = astop(p); arity = children(p); if (arity == 1) { if ((leftchild(p) != -1) && (astop(leftchild(p)) == AST_Const)) { switch (op) { case AST_UNeg: replace_with_int_const(p, -rightchild(leftchild(p))); *count = *count + 1; return; case AST_UPos: replace_with_int_const(p, +rightchild(leftchild(p))); *count = *count + 1; return; case AST_UBitNot: replace_with_int_const(p, ~rightchild(leftchild(p))); *count = *count + 1; return; case AST_UBoolNot: replace_with_int_const(p, !rightchild(leftchild(p))); *count = *count + 1; return; default: return; } } } else if (arity == 2) { if ((astop(leftchild(p)) == AST_Const) && (astop(rightchild(p)) == AST_Const)) { switch (op) { case AST_Add: replace_with_int_const(p, rightchild(leftchild(p)) + rightchild(rightchild(p))); *count = *count + 1; return; case AST_Sub: replace_with_int_const(p, rightchild(leftchild(p)) - rightchild(rightchild(p))); *count = *count + 1; return; case AST_Mul: replace_with_int_const(p, rightchild(leftchild(p)) * rightchild(rightchild(p))); *count = *count + 1; return; case AST_Div: if (rightchild(rightchild(p)) != 0) replace_with_int_const(p, rightchild(leftchild(p)) / rightchild(rightchild(p))); *count = *count + 1; return; case AST_Mod: if (rightchild(rightchild(p)) != 0) replace_with_int_const(p, rightchild(leftchild(p)) % rightchild(rightchild(p))); *count = *count + 1; return; case AST_LE: replace_with_int_const(p, rightchild(leftchild(p)) <= rightchild(rightchild(p))); *count = *count + 1; return; case AST_GT: replace_with_int_const(p, rightchild(leftchild(p)) > rightchild(rightchild(p))); *count = *count + 1; return; case AST_LT: replace_with_int_const(p, rightchild(leftchild(p)) < rightchild(rightchild(p))); *count = *count + 1; return; case AST_GE: replace_with_int_const(p, rightchild(leftchild(p)) >= rightchild(rightchild(p))); *count = *count + 1; return; case AST_EQ: replace_with_int_const(p, rightchild(leftchild(p)) == rightchild(rightchild(p))); *count = *count + 1; return; case AST_NE: replace_with_int_const(p, rightchild(leftchild(p)) != rightchild(rightchild(p))); *count = *count + 1; return; case AST_BoolAnd: replace_with_int_const(p, rightchild(leftchild(p)) && rightchild(rightchild(p))); *count = *count + 1; return; case AST_BoolOr: replace_with_int_const(p, rightchild(leftchild(p)) || rightchild(rightchild(p))); *count = *count + 1; return; case AST_ShortcutBoolAnd: replace_with_int_const(p, rightchild(leftchild(p)) && rightchild(rightchild(p))); *count = *count + 1; return; case AST_ShortcutBoolOr: replace_with_int_const(p, rightchild(leftchild(p)) || rightchild(rightchild(p))); *count = *count + 1; return; case AST_BitAnd: replace_with_int_const(p, rightchild(leftchild(p)) & rightchild(rightchild(p))); *count = *count + 1; return; case AST_BitOr: replace_with_int_const(p, rightchild(leftchild(p)) | rightchild(rightchild(p))); *count = *count + 1; return; case AST_BitXor: replace_with_int_const(p, rightchild(leftchild(p)) ^ rightchild(rightchild(p))); *count = *count + 1; return; case AST_BitLsh: replace_with_int_const(p, rightchild(leftchild(p)) << rightchild(rightchild(p))); *count = *count + 1; return; // shift by >= 32, or 0, or negative? case AST_BitRsh: replace_with_int_const(p, rightchild(leftchild(p)) >> rightchild(rightchild(p))); *count = *count + 1; return; default: return; } } } else if (op == AST_Cond) { if ((astop(leftchild(p)) == AST_Const) && (astop(rightchild(p)) == AST_Const) && (astop(nthchild(p, 2)) == AST_Const) ) { // case AST_Cond: ; replace_with_int_const(p, ( rightchild(leftchild(p)) ? rightchild(rightchild(p)) : rightchild(nthchild(p, 2)))); *count = *count + 1; return; } } } int fold_constant_expressions(int p) { int count = 0; int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return 0; // recursively process all children, doing the folds on the way back up // once grandchildren constant expressions have already been folded: if ((astop(p) < AST_BASE) || (astop(p) > AST_TOP)) { fprintf(stderr, "* Internal Error: p=%d astop(p) - %d\n", p, astop(p)); exit(1); } disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { count += fold_constant_expressions(nthchild(p, i)); while (astop(nthchild(p, i)) == AST_REDIRECT) nthchild(p, i) = leftchild(nthchild(p,i)); // perform redirection } this = this>>1; } Fold(p, &count); return count; } // This is given as an example on how to do optimisations at the AST level. // These examples are trivial. It will be a student exercise to add more // useful optimisations. void Identities(int p, int *count) { int op; int arity; if ((p == -1) || (astop(p) == -1)) return; op = astop(p); arity = children(p); if (arity == 2) { switch (op) { case AST_Idx: if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // x[0] => &x+0 => &x replace_with_trip(p, leftchild(p)); *count = *count+1; } case AST_Add: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } return; case AST_Sub: // to do: x - x => 0 if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // convert to AST_Neg } return; case AST_Mul: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==1)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==1)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // replace expr with 0 replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with 0 replace_with_trip(p, rightchild(p)); *count = *count+1; } return; case AST_Div: // to do: x / x => 1 if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==1)) { replace_with_trip(p, leftchild(p)); *count = *count+1; } return; case AST_Mod: // TO DO: lower to &n-1 if power of two return; case AST_BoolAnd: case AST_ShortcutBoolAnd: // this is buggy in shortcut case if side-effects :- boolfn() && FALSE if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==TRUE)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==TRUE)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==FALSE)) { // result of whole expr is FALSE - but AST_BoolAnd should eval RHS anyway? replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==FALSE)) { // result of whole expr is FALSE - but AST_BoolAnd should eval LHS anyway? replace_with_trip(p, rightchild(p)); *count = *count+1; } return; case AST_BoolOr: case AST_ShortcutBoolOr: // this is buggy in shortcut case if side-effects :- boolfn() || TRUE if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==FALSE)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==FALSE)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==TRUE)) { // whole expression is TRUE - but AST_BoolOr should eval RHS anyway? replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==TRUE)) { // whole expression is TRUE - but AST_BoolOr should eval LHS anyway? replace_with_trip(p, rightchild(p)); *count = *count+1; } return; case AST_BitAnd: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==-1)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==-1)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // whole expression is 0 replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // whole expression is 0 replace_with_trip(p, rightchild(p)); *count = *count+1; } return; case AST_BitOr: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==-1)) { // replace expr with -1 replace_with_trip(p, leftchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==-1)) { // replace expr with -1 replace_with_trip(p, rightchild(p)); *count = *count+1; } return; case AST_BitXor: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // replace expr with right replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } return; case AST_BitLsh: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // whole expr is 0 replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } return; case AST_BitRsh: if ((astop(leftchild(p)) == AST_Const) && (rightchild(leftchild(p))==0)) { // whole expr is 0 replace_with_trip(p, rightchild(p)); *count = *count+1; } else if ((astop(rightchild(p)) == AST_Const) && (rightchild(rightchild(p))==0)) { // replace expr with left replace_with_trip(p, leftchild(p)); *count = *count+1; } return; default: return; } } } int fold_identities(int p) { int count = 0; int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return 0; if ((astop(p) < AST_BASE) || (astop(p) > AST_TOP)) { fprintf(stderr, "* Internal Error: p=%d astop(p) - %d\n", p, astop(p)); exit(1); } disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { count += fold_identities(nthchild(p, i)); while (astop(nthchild(p, i)) == AST_REDIRECT) nthchild(p, i) = leftchild(nthchild(p,i)); // perform redirection } this = this>>1; } Identities(p, &count); return count; } // walk the tree top-down. On the way back up, propogate type information from // the leaves, generating derived types for the result of an operation // depending on the types of the operands (eg long + short => long) // This info is also still to be used to do automatic type casting (eg widening short to int) // - probably at point of assignment or parameter passing, where the declared type is known. int derive_type(int p) // return an appropriate derived type that is a function of the operands. { if ((p == -1) || (astop(p) == -1)) return -1; switch (astop(p)) { case AST_ReceiveParam: // procedure parameters are more like declarations return rightchild(p); case AST_TYPE_Struct: // special case for struct members inside another struct if (rightchild(p) != -1) return p; rightchild(p) = rightchild(lookup_identifier(leftchild(leftchild(p)), p)); // which is a struct name tag // don't forget nthchild,2 when this is fixed return p; // not sure if a return is meaningful. case AST_Const: // TO DO: 1) strings, char consts, 2) should already have been assigned at creation? return TypeAtom("signed", "int"); // looking up a tag is wrong, since it might be something like a struct field - it // depends on the context. so don't lookup here? Punt a level up to where context is known? case AST_TAG: // TO DO: lookup tag, and return a pair of tag, type? return -1; case AST_Idx: { int objtype; int idxtype; if (astop(leftchild(p)) == AST_TAG) { TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); } if (astop(rightchild(p)) == AST_TAG) { TYPEINFO(rightchild(p)) = lookup_identifier(leftchild(rightchild(p)), p); } objtype = TYPEINFO(leftchild(p)); idxtype = TYPEINFO(rightchild(p)); if ((objtype == -1) || (idxtype == -1)) { assert(objtype != -1); assert(idxtype != -1); } if ((astop(objtype) != AST_TYPE_ArrayOf) && (astop(objtype) != AST_TYPE_PointerTo)) { // TO DO: ***OR*** a pointer to something, which is // treated very similarly to an array... error_line(get_lineno(leftchild(p))); fprintf(stderr, "* Error: indexed object %s is not an array or pointer (found %s)\n", so(leftchild(p)), op[astop(objtype)-AST_BASE].Diag_Name); exit(1); } // TO DO: verify that idxtype is some kind of integer that can index the array return leftchild(objtype); // set parent TYPEINFO to an ast cell whose type is the struct type } case AST_Member: { // objtype should be a struct (struct's type name is tag:leftchild(objtype)) // (struct can be located using the scope rules.) int objtype; // fieldtype should be a tag. int fieldtype; if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); objtype = TYPEINFO(leftchild(p)); if (astop(objtype) != AST_TYPE_Struct) { error_line(get_lineno(leftchild(p))); fprintf(stderr, "* Error: AST_Member: object %s is not a struct (appears it is a %d)\n", so(leftchild(p)), astop(objtype)); exit(1); } if (astop(rightchild(p)) == AST_TAG) { // lookup fieldtype in the struct (error if not a field of this struct) fieldtype = TYPEINFO(rightchild(p)) = get_member_type(objtype /* struct type */, rightchild(p) /* ast tag */); } else { fieldtype = TYPEINFO(rightchild(p)); fprintf(stderr, "* Internal Error: struct member is not a tag? (got %d)\n", astop(fieldtype)); exit(1); } return fieldtype; } case AST_Ptr: { int objtype; int fieldtype; if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); objtype = TYPEINFO(leftchild(p)); if (astop(objtype) != AST_TYPE_PointerTo) { error_line(get_lineno(leftchild(p))); fprintf(stderr, "* Error: AST_Ptr: object %d is not a pointer to something (appears it is a %d)\n", leftchild(p), astop(objtype)); exit(1); } if (astop(rightchild(p)) == AST_TAG) { // lookup fieldtype in the struct (error if not a field of this struct) fieldtype = TYPEINFO(rightchild(p)) = get_member_type(leftchild(objtype) /* struct type */, rightchild(p) /* ast tag */); } else { fieldtype = TYPEINFO(rightchild(p)); error_line(get_lineno(leftchild(p))); fprintf(stderr, "* Internal Error: struct member is not a tag? (got %d)\n", astop(fieldtype)); exit(1); } return fieldtype; // or maybe pointer to fieldtype? } case AST_IndirectThrough: { int objtype; if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); objtype = TYPEINFO(leftchild(p)); // I think we just peel off one level of AST_TYPE_PointerTo from the type info // for each AST_IndiectThrough (but it's an error is none present) if (astop(objtype) != AST_TYPE_PointerTo) { error_line(get_lineno(leftchild(p))); fprintf(stderr, "* Error: Indirected object %s is not a pointer (got %d)\n", so(leftchild(p)), astop(objtype)); exit(1); } return leftchild(objtype); } case AST_Call: // DefProc: scope/qual/type/ptr-seq, id, params, body { // locate the function's DefProc, int defproc; defproc = lookup_identifier(leftchild(leftchild(p)), p); // set this nodes nthchild(ap, 2) to the DefProc node nthchild(p, 2) = defproc; // return the type that the function returns here return leftchild(defproc); } // monops: case AST_UNeg: case AST_UPos: case AST_UBitNot: case AST_UBoolNot: case AST_Post_Inc: case AST_Pre_Inc: case AST_Post_Dec: case AST_Pre_Dec: if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); return TYPEINFO(leftchild(p)); // TO DO: add assignment operators case AST_AddAss: case AST_SubAss: case AST_MulAss: case AST_DivAss: case AST_ModAss: case AST_BitAndAss: case AST_BitOrAss: case AST_BitXorAss: case AST_BitLshAss: case AST_BitRshAss: case AST_AssignTo: // Can drop through for now. //if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); //if (astop(rightchild(p)) == AST_TAG) TYPEINFO(rightchild(p)) = lookup_identifier(leftchild(rightchild(p)), p); // remove the comment symbols immediately above when I put some code back in the block below... //if ((TYPEINFO(leftchild(p))) != (TYPEINFO(rightchild(p)))) { // BUG: Actually we want a full tree compare, esp to count the levels of indirection // and anyway, this is reporting *everything* as wrong! // test cases "int *a; int b; a=&b;" ... "int a; int b; float c; a = b+c;" //fprintf(stderr, "? Info: possible type mismatch on assignment\n"); //} // binops: same as for cond below, derived type must consider both left and right children - TO DO case AST_Add: case AST_Sub: case AST_Mul: case AST_Div: case AST_Mod: case AST_LE: case AST_GT: case AST_LT: case AST_GE: case AST_EQ: case AST_NE: case AST_BoolAnd: case AST_BoolOr: case AST_ShortcutBoolAnd: case AST_ShortcutBoolOr: case AST_BitAnd: case AST_BitOr: case AST_BitXor: case AST_BitLsh: case AST_BitRsh: if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); if (astop(rightchild(p)) == AST_TAG) TYPEINFO(rightchild(p)) = lookup_identifier(leftchild(rightchild(p)), p); // we need to detect gross type mismatches here, and also do type conversions // where appropriate return TYPEINFO(leftchild(p)); case AST_Cond: // should check that the condition (leftchild(p)) is an int/boolean if (astop(leftchild(p)) == AST_TAG) TYPEINFO(leftchild(p)) = lookup_identifier(leftchild(leftchild(p)), p); if (astop(rightchild(p)) == AST_TAG) TYPEINFO(rightchild(p)) = lookup_identifier(leftchild(rightchild(p)), p); if (astop(nthchild(p,2)) == AST_TAG) TYPEINFO(nthchild(p,2)) = lookup_identifier(leftchild(nthchild(p,2)), p); return TYPEINFO(rightchild(p)); // left and right must be coerced to a compatible supertype // so that the result of this expr can be unconditionally used // - so it is still a TO DO to incorporate nthchild(p, 2) also. default: return -1; } } int get_size_inner(int p, char *filename, int lineno) #define get_size(p) get_size_inner(p, __FILE__, __LINE__) { if (astop(p) == AST_TYPE_Atom) return rightchild(p); if (astop(p) == AST_TYPE_PointerTo) return 4; if (astop(p) == AST_TYPE_StructMember) return get_size(nthchild(p,2)); // leftchild - child, rightchild - index - 2,3,4 - lower, upper, element-size if (astop(p) == AST_TYPE_ArrayOf) { if (nthchild(p,4) == -1) fprintf(stderr, "? get size: %d nthchild: %d elements: %d\n", get_size(leftchild(p)), nthchild(p,4), rightchild(nthchild(p,3))); return get_size(leftchild(p)) * rightchild(nthchild(p,3)); // assume lower bound 0 } if (astop(p) == AST_TYPE_Struct) return nthchild(p,2); // the size error_line(get_lineno(p)); fprintf(stdout, "! bad getsize object - %s, Line %d: get_size: ", filename, lineno); show_one_ast(p,-1,-1); exit(1); /* NOT REACHED */ return -1; } void populate_types(int p) { int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return; if (astop(p) == AST_Scope) { push_scope(p); } // recursively process all children, decorating the tree with derived type information on the way back up disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { populate_types(nthchild(p, i)); } this = this>>1; } TYPEINFO(p) = derive_type(p); // derived from its operands, which have already been handled if (astop(p) == AST_TYPE_Struct) { int member = rightchild(p), offset = 0; for (;;) { // ... AST_TYPE_StructMember ... nthchild(member, 3) = offset; offset += get_size(member); // fill in the member object offsets member = leftchild(member); if (member == -1) break; } // fill in the struct size nthchild(p, 2) = offset; } else if (astop(p) == AST_TYPE_ArrayOf) { // fill in the array member size nthchild(p, 4) = get_size(leftchild(p)); } if (astop(p) == AST_Scope) { pop_scope(); } } int temp_label(void) { static int next = 999; next += 1; return next; } void assign_loop_labels(int p, int break_lab, int continue_lab, int branch_back_lab) { // (It's really "loop and switch" due to need to handle "break" keyword) int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return; // Put source-level tests here in common code, not in code-generator where they were before. // If I can't find a way to get the source line here, maybe even move it back up to grammar.c if (astop(p) == AST_C_Continue) { if (continue_lab == -1) {fprintf(stderr, "* ERROR: continue not in a loop\n"); exit(1);} leftchild(p) = continue_lab; return; } else if (astop(p) == AST_C_Break) { if (break_lab == -1) {fprintf(stderr, "* ERROR: break not in a loop or switch\n"); exit(1);} leftchild(p) = break_lab; return; } // When a loop or switch body is found, set up the parameters // for any continues or breaks inside those statements. // Nested switch statements take priority over loops for a "break" if ((astop(p) == AST_C_While) || (astop(p) == AST_C_DoWhile)) { break_lab = nthchild(p, 2+0); continue_lab = nthchild(p, 2+1); branch_back_lab = nthchild(p, 2+2); // actually in while() it is used for the start of the code body if (break_lab == -1) nthchild(p, 2+0) = break_lab = temp_label(); if (continue_lab == -1) nthchild(p, 2+1) = continue_lab = temp_label(); if (branch_back_lab == -1) nthchild(p, 2+2) = branch_back_lab = temp_label(); // again, body_lab assign_loop_labels(rightchild(p), break_lab, continue_lab, branch_back_lab); } else if (astop(p) == AST_C_ForLoop) { break_lab = nthchild(p, 4+0); continue_lab = nthchild(p, 4+1); branch_back_lab = nthchild(p, 4+2); if (break_lab == -1) nthchild(p, 4+0) = break_lab = temp_label(); if (continue_lab == -1) nthchild(p, 4+1) = continue_lab = temp_label(); if (branch_back_lab == -1) nthchild(p, 4+2) = branch_back_lab = temp_label(); nthchild(p, 4+3) = temp_label(); // body lab assign_loop_labels(nthchild(p, 3), break_lab, continue_lab, branch_back_lab); } else if (astop(p) == AST_Switch) { int case_lab_chain; int default_lab; break_lab = nthchild(p, 2+0); // <cond> <body> break_lab case_lab_chain = nthchild(p, 2+1); // chain of 'case <n>' labels default_lab = nthchild(p, 2+2); // default label. if (break_lab == -1) { nthchild(p, 2) = break_lab = temp_label(); } assign_loop_labels(rightchild(p), break_lab, continue_lab, branch_back_lab); } // recursively search for more loops and switch bodies throughout the code: disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { assign_loop_labels(nthchild(p, i), break_lab, continue_lab, branch_back_lab); } this = this>>1; } } // WARNING: a switch statement generates a "DEFAULT" opcode even though no "default:" was given. // it should in that case send it to a piece of code that prints out the fact that a switch label // was missing and there was no default. Depending on how fancy the diags get, that can be specific // about which switch it was (source line no) and what the index was. void build_switch_table(int p, int enclosing_switch) { int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return; if (astop(p) == AST_Switch) { // leftchild = switch value // rightchild = compound statement // 1st extra field is for target of "break" statement (integer) // 2nd extra field is link to chain of case statements (integer) // 3rd extra field is default case (integer) build_switch_table(rightchild(p), p); // Now we have all the case labels in a linked list, and we know // where the default is. if (nthchild(p, 3) == -1) { if (nthchild(p, 4) == -1) { fprintf(stderr, "* Error: switch block does not contain any case statements (or default:)\n"); } else { fprintf(stderr, "? Warning: switch block does not contain any case statements\n"); } } } else if (astop(p) == AST_Case) { // leftchild = <n>, // rightchild = link to next case // last = internal number for label if (enclosing_switch == -1) fprintf(stderr, "* ERROR: case with no switch?\n"); // move to point of call in grammar.c? nthchild(p, 2) = temp_label(); rightchild(p) = nthchild(enclosing_switch, 2+1); nthchild(enclosing_switch, 2+1) = p; } else if (astop(p) == AST_DefaultCase) { if (enclosing_switch == -1) fprintf(stderr, "* ERROR: default with no switch?\n"); // move to point of call in grammar.c? leftchild(p) = enclosing_switch; rightchild(p) = temp_label(); nthchild(enclosing_switch, 2+2) = p; } else { // recursively search for more switch bodies throughout the code: // We could alternatively call one level of this in grammar.c after // compiling the switch disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { build_switch_table(nthchild(p, i), enclosing_switch); } this = this>>1; } } } void check_function_results(int p, int enclosing_defproc, int *resultcount) { if ((p == -1) || (astop(p) == -1)) return; // Note that it was easier to have the Populate_Types module handle the AST_Call // item, where the caller of a procedure is given a link to the procedure // definition, in order to check the return type, and to pass critical // info about stack usage to the PRECALL and CALL macros switch (astop(p)) { // leftchild() = scope/qual/type/ptr-seq case AST_DefProc: { int count = 0; check_function_results(nthchild(p,3), p, &count); if ((count == 0) && (rightchild(leftchild(p)) != 0)) { error_line(c[leftchild(rightchild(p))].l); fprintf(stderr, "* Error: non-void function %s does not return a result\n", c[leftchild(rightchild(p))].s); } } break; case AST_ReturnResult: //"returns result in void procedure" *resultcount = 1 + *resultcount; break; case AST_Return: //"result value missing in non-void procedure" *resultcount = *resultcount + 1; break; default: break; } { int i; int disp; int this; // recursively search for more proc/fn bodies throughout the code: // We could alternatively call one level of this in grammar.c after // compiling the proc disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { if (astop(nthchild(p, i)) != AST_TYPE_Struct) check_function_results(nthchild(p, i), enclosing_defproc, resultcount); } this = this>>1; } } } #define fixup(x) while (astop(x) == AST_REDIRECT) x = leftchild(x) void tweak_booleans1(int p) // add != 0 where needed { int i; int disp; int this; int cond; // recursively search for more "&&" and "||"s throughout the code: disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { if (astop(nthchild(p, i)) != AST_TYPE_Struct) {tweak_booleans1(nthchild(p, i)); fixup(nthchild(p, i));} } this = this>>1; } // if the condition did not have an explicit comparison at the root, // then add an implied "!= 0" (although there's a chance for // optimisation here if the operand is already coerced into a // proper boolean, -1 or 0) if ((AST_ShortcutBoolAnd == astop(p)) || (astop(p) == AST_ShortcutBoolOr)) { cond = leftchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); leftchild(p) = cond; } } } cond = rightchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); rightchild(p) = cond; } } } } else if (astop(p) == AST_C_While) { cond = leftchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); leftchild(p) = cond; } } } } else if (astop(p) == AST_C_DoWhile) { cond = rightchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); rightchild(p) = cond; } } } } else if (astop(p) == AST_C_ForLoop) { cond = rightchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); rightchild(p) = cond; } } } } else if (astop(p) == AST_IFTHENELSE) { cond = leftchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); leftchild(p) = cond; } } } } else if (astop(p) == AST_IFTHEN) { // picks up statements like: if (!(a < b))) ...; cond = leftchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); leftchild(p) = cond; } } } } else if (astop(p) == AST_Cond) { cond = leftchild(p); if ((astop(cond) < AST_LE) || (astop(cond) > AST_NE)) { if ((astop(cond) != AST_ShortcutBoolAnd) && (astop(cond) != AST_ShortcutBoolOr)) { if (astop(cond) != AST_UBoolNot) { cond = mkbinop(AST_NE, cond, mkconst(0)); leftchild(p) = cond; } } } } } void tweak_booleans2(int p) // propogate (!(x)) if possible { int i; int disp; int this; int cond; // simple_tests/boolean1.c is currently buggy. "if (!(!x)) ..." // recursively search for more opportunities to un-negate easy conditions disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { if (astop(nthchild(p, i)) != AST_TYPE_Struct) {tweak_booleans2(nthchild(p, i)); fixup(nthchild(p, i));} } this = this>>1; } if (AST_UBoolNot == astop(p)) { cond = leftchild(p); if ((astop(cond) >= AST_LE) && (astop(cond) <= AST_NE)) { int regular_op[6] = {AST_LE, AST_GT, AST_LT, AST_GE, AST_EQ, AST_NE}; int negate_op[6] = {AST_GT, AST_LE, AST_GE, AST_LT, AST_NE, AST_EQ}; assert(regular_op[astop(cond)-AST_LE] == astop(cond)); //astop(cond) = negate_op[astop(cond)-AST_LE]; replace_with_trip(p, mkbinop(negate_op[astop(cond)-AST_LE], leftchild(cond), rightchild(cond))); // } else if (astop(cond) == AST_UBoolNot) { ... remove double negation? Or is !(!x) a good shorthand for canonicalising a boolean? // (yes, that's what gcc does) } else { // something like if (!a) ... replace with 'a == 0'? } } else if (astop(p) == AST_IFTHENELSE) { while (astop(leftchild(p)) == AST_UBoolNot) { // if (!(condition)) a; else b; -> if (condition) b; else a; int tmp = nthchild(p,2); nthchild(p,2) = rightchild(p); rightchild(p) = tmp; leftchild(p) = leftchild(leftchild(p)); } // currently similar hack for IFTHEN is being done in code generator and seems to work but probably shouldn't } } void tweak_booleans3(int p, int insert_cond_needed) { // insert (x ? TRUE : FALSE) around any conditions that // are not handled by short-circuit jumps at obvious places int q; int r; int i; int disp; int this; int cond; // remember that once wrapped, recurse on the condition with ,TRUE if (((astop(p) >= AST_LE) && (astop(p) <= AST_NE)) || ((astop(p) == AST_ShortcutBoolAnd) || (astop(p) == AST_ShortcutBoolOr))) { if (insert_cond_needed) { r = clone(p); tweak_booleans3(leftchild(r), FALSE); tweak_booleans3(rightchild(r), FALSE); q = mkterop(AST_Cond, r, mkconst(TRUE), mkconst(FALSE)); replace_with_trip(p, q); } else { tweak_booleans3(leftchild(p), FALSE); // THIS CODE NEEDS TO BE REVIEWED re TRUE/FALSE being passed on to next level tweak_booleans3(rightchild(p), FALSE); } } else if (AST_UBoolNot == astop(p)) { if (insert_cond_needed) { r = clone(p); tweak_booleans3(leftchild(r), FALSE); q = mkterop(AST_Cond, r, mkconst(TRUE), mkconst(FALSE)); replace_with_trip(p, q); } else { tweak_booleans3(leftchild(p), FALSE); } } else if (astop(p) == AST_C_While) { cond = leftchild(p); tweak_booleans3(cond, FALSE); tweak_booleans3(rightchild(p), TRUE); } else if (astop(p) == AST_C_DoWhile) { cond = rightchild(p); tweak_booleans3(cond, FALSE); tweak_booleans3(leftchild(p), TRUE); } else if (astop(p) == AST_C_ForLoop) { cond = rightchild(p); tweak_booleans3(cond, FALSE); tweak_booleans3(leftchild(p), TRUE); tweak_booleans3(nthchild(p,2), TRUE); tweak_booleans3(nthchild(p,3), TRUE); } else if (astop(p) == AST_IFTHENELSE) { cond = leftchild(p); tweak_booleans3(cond, FALSE); tweak_booleans3(rightchild(p), TRUE); tweak_booleans3(nthchild(p,2), TRUE); } else if (astop(p) == AST_IFTHEN) { // picks up statements like: if (!(a < b))) ...; cond = leftchild(p); tweak_booleans3(cond, FALSE); tweak_booleans3(rightchild(p), TRUE); } else if (astop(p) == AST_Cond) { cond = leftchild(p); tweak_booleans3(cond, FALSE); tweak_booleans3(rightchild(p), TRUE); tweak_booleans3(nthchild(p,2), TRUE); } else { // recursively search for more opportunities to un-negate easy conditions disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if ((nthchild(p, i) != -1) && (disp&this)) { if (astop(nthchild(p, i)) != AST_TYPE_Struct) tweak_booleans3(nthchild(p, i), TRUE); } this = this>>1; } } } void tweak_booleans(int p) // add != 0 where needed { tweak_booleans1(p); tweak_booleans2(p); tweak_booleans3(p, TRUE); // no idea what this is initialised to. forgot already! } // http://www.ece.cmu.edu/~koopman/stack_compiler/stack_co.html // Generate assembly code for a virtual stack machine. // This assembly code assumes a macro-assembler; it is in fact fairly easy to // write a macro-assembler which generates x86 code, treating the x86 as a stack // machine. And that in turn can be peephole optimised into fairly acceptable // register-based code. However we do not recommend that path for a real // compiler, so we supply that as a separate program rather than integrating // it here. Note that this pseudo-code could alternatively be compiled to a // byte-code and interpreted, in the style of the JVM and others. // dump code to output file. void stack_emit(char *label, char *opcode, char *addressing_mode, char *operand, char *comment) { static char *SPACES = " "; char line[512]; sprintf(line, "%s%s", label, SPACES); line[9] = ' '; line[10] = '\0'; sprintf(line+10, "%s%s", opcode, SPACES); if (line[19] == ' ') {line[19] = '\0';sprintf(line+19, "%s%s", addressing_mode, operand);} strcat(line, " "); if (strlen(line) < 55) {strcat(line, SPACES);line[54] = ' '; line[55] = '\0';} strcat(line, "; "); strcat(line, comment); fprintf(stdout, "%s\n", line); } // recursive code-generation procedure. void stack_code(int ap, int thenlab, int elselab, int donelab) { char temp[128]; char temp2[128]; int oper; int count; int disp; char *opname; char *code; int this; if ((ap == -1) || AST[ap] == -1) return; while (AST[ap] == AST_REDIRECT) { fprintf(stderr, "! redirect from %d to %d in genstack - should have been handled earlier.\n", ap, leftchild(ap)); ap = leftchild(ap); // hack } if ((ap == -1) || AST[ap] == -1) return; oper = AST[ap]; count = AST[ap+1]; if ((oper < AST_BASE) || (oper > AST_TOP)) { fprintf(stderr, "! Bad oper %d at switch in genstack.c\n", oper); exit(1); } disp = op[oper-AST_BASE].Display_Children; opname = op[oper-AST_BASE].Diag_Name; code = op[oper-AST_BASE].Stack_Name; if (count != op[oper-AST_BASE].Children) { fprintf(stderr, "! Oper = %d (%s)\n", oper, opname); assert(count == op[oper-AST_BASE].Children); } switch (oper) { case AST_TYPE_Struct: return; case AST_Scope: push_scope(ap); // next block, block type stack_code(leftchild(ap), -1,-1,-1); // now compile the block under the new scope pop_scope(); // and return to where we were before entering the block return; case AST_SEQ: stack_code(leftchild(ap), -1,-1,-1); stack_code(rightchild(ap), -1,-1,-1); break; case AST_Declare: // decln that needs runtime action to set it up, eg dynamic array, or initialised var. // stack_code(leftchild(ap), -1,-1,-1); // rightchild is the type information break; case AST_TAG: { if (astop(TYPEINFO(ap)) == AST_TYPE_ArrayOf) { stack_emit("", "PUSH", "&", c[leftchild(ap)].s, "push address of zeroth element of array"); } else if (astop(TYPEINFO(ap)) == AST_TYPE_Struct) { stack_emit("", "PUSH", "&", c[leftchild(ap)].s, "push address of base of struct"); } else if (astop(TYPEINFO(ap)) == -1) { stack_emit("", "PUSH", "", c[leftchild(ap)].s, "(implied external?)"); } else { sprintf(temp, "[[typeinfo=%d]]", astop(TYPEINFO(ap))); stack_emit("", "PUSH", "", c[leftchild(ap)].s, temp); } } break; case AST_Var: // no longer used now that we have the tag lookup type system break; case AST_Idx: ; // BREAKTHOUGH! [] and . operators should only push the address of the base object - // everything else must be evaluated as an *offset* to be added to that base. // So look at the left operand first to see what it is. Classic example is: /* struct wow { int i; int a[4]; }; struct wow test[9]; { test[7].a[3] = test[7].i; } */ // 166: = .. // / \ .. // / \ .. // / \ .. // 151: [ ] \ .. // / \ \ .. // 156: . 3 :146 . :184 .. // / \ / \ .. // / a :172 / \ .. // 161: [ ] 189: [ ] i :180 .. // / \ / \ .. // 133: test 7 :137 / \ .. // 171: test 7 :175 .. // In order to work properly however, the [] and . operators must have LHS operands // whose TYPEINFO() is filled in correctly. // This also necessitates any AddressOf operators being propogated down the left // branch until it hits the base object (which it might be doing already?) // 121: AST_Declare [99] [113] [-1] [[typeinfo=-1]] // 99: AST_TAG "test" [[typeinfo=-1]] // 113: AST_TYPE_ArrayOf [93] [108] 0 103 20 [[typeinfo=-1]] // 108: AST_TYPE_Atom // 199: AST_SEQ [127] [-1] [[typeinfo=-1]] // 127: AST_Scope [194] 219 2 [[typeinfo=-1]] // 194: AST_SEQ [-1] [166] [[typeinfo=-1]] // 166: AST_AssignTo [151] [184] [[typeinfo=36]] // 151: AST_Idx [156] [146] [[typeinfo=36]] // 156: AST_Member [161] [142] [[typeinfo=55]] // 161: AST_Idx [133] [137] [[typeinfo=93]] // 133: AST_TAG "test" [[typeinfo=113]] // 113: AST_TYPE_ArrayOf [93] [108] 0 103 20 [[typeinfo=-1]] // 108: AST_TYPE_Atom // 137: AST_Const 7 // 142: AST_TAG "a" [[typeinfo=55]] // 55: AST_TYPE_ArrayOf [36] [50] 0 45 4 [[typeinfo=-1]] // 36: AST_TYPE_Atom // 50: AST_TYPE_Atom // 146: AST_Const 3 // 184: AST_Member [189] [180] [[typeinfo=10]] // 189: AST_Idx [171] [175] [[typeinfo=93]] // 171: AST_TAG "test" [[typeinfo=113]] // 113: AST_TYPE_ArrayOf [93] [108] 0 103 20 [[typeinfo=-1]] // 108: AST_TYPE_Atom // 175: AST_Const 7 // 180: AST_TAG "i" [[typeinfo=10]] // 10: AST_TYPE_Atom #ifdef OLD_AND_BROKEN {int objecttype = leftchild(TYPEINFO(leftchild(ap))); // array of objecttype stack_code(leftchild(ap), -1,-1,-1); // we can do index range testing here if wanted, using TYPEINFO(leftchild(ap)) //fprintf(stdout, "Next thing before 'INDEX' is array offset - %d\n", rightchild(ap)); stack_code(rightchild(ap), -1,-1,-1); // if it is a const, we want to push & add, // and convert earlier so that constants can be folded if (astop(objecttype) == AST_TYPE_Struct) { sprintf(temp, "(by size of struct %s)", c[leftchild(leftchild(objecttype))].s); sprintf(temp2, "%d", get_size(objecttype)); // replaced object_sizeof with newer more accurate get_size } else { sprintf(temp, "(by size of object type %d)", astop(objecttype)); sprintf(temp2, "4"); // to do: array of arrays } stack_emit("", "INDEX", temp2, "", temp); } #else { int objecttype = leftchild(TYPEINFO(leftchild(ap))); // array of objecttype stack_code(leftchild(ap), -1,-1,-1); // Now work out the offset and add stack_code(rightchild(ap), -1,-1,-1); sprintf(temp, "(by size of array element)"); sprintf(temp2, "%d", get_size(objecttype)); stack_emit("", "INDEX", temp2, "", temp); } #endif break; case AST_Member: // need to use offset_of(type information) #ifdef BROKEN stack_code(leftchild(ap), -1,-1,-1); { int offset = -1; if (astop(leftchild(ap)) == AST_TAG) { if (TYPEINFO(leftchild(ap)) == -1) { fprintf(stderr, "Bad struct\n"); exit(1); } offset = get_member_offset(TYPEINFO(leftchild(ap)), rightchild(ap)); stack_code(mkconst(offset), -1,-1,-1); } else { stack_code(rightchild(ap), -1,-1,-1); // wrong... } } #else {int offset; stack_code(leftchild(ap), -1,-1,-1); // this code fails if rightchild is an array element, eg rec.a[4].i = i // probably also if another struct, eg rec.subrec.i = i fprintf(stderr, "TYPEINFO(leftchild(ap)) = ");show_one_ast(leftchild(ap),-1,-1); fprintf(stderr, "rightchild(ap) = ");show_one_ast(rightchild(ap),-1,-1); offset = get_member_offset(leftchild(ap), rightchild(ap)); stack_code(mkconst(offset), -1,-1,-1); } #endif stack_emit("", "ADD", "", "", "add offset to this member to object base address"); break; case AST_Ptr: ; stack_code(leftchild(ap), -1,-1,-1); stack_emit("", "PUSHI", "", "", "dereference pointer to base of struct"); //fprintf(stdout, "Next thing before 'ADD' is ->field offset - %d\n", rightchild(ap)); //stack_code(rightchild(ap), -1,-1,-1); // this is a field offset too. { int offset; if (astop(leftchild(ap)) == AST_TAG) { if (TYPEINFO(leftchild(ap)) == -1) { fprintf(stderr, "Bad struct\n"); exit(1); } offset = get_member_offset(TYPEINFO(leftchild(ap)), rightchild(ap)); } else if (astop(leftchild(ap)) == AST_TYPE_PointerTo) { offset = get_member_offset(leftchild(leftchild(ap)), rightchild(ap)); } else if (astop(leftchild(ap)) == AST_AddressOf) { offset = get_member_offset(leftchild(TYPEINFO(leftchild(leftchild(ap)))), rightchild(ap)); } else { fprintf(stderr, "astop(leftchild(%d)) = %d [[typeinfo=%d]]\n", ap, astop(leftchild(ap)), TYPEINFO(leftchild(ap))); offset = get_member_offset(leftchild(ap), rightchild(ap)); } // ORIG: stack_code(rightchild(ap), -1,-1,-1); // this is a field offset. May need special handling. // REPLACED WITH: stack_code(mkconst(offset), -1,-1,-1); // Note: would be even better to move this into the ast and apply constant folding/prop optimizations // Also... we need a new construct to allow us to dump &fred+offset instead of doing an add. } stack_emit("", "ADD", "", "", "add offset to this field in the struct"); break; case AST_AddressOf: // if not simple, push '&' down left branches until you hit an actual object? // TO DO: IndirectThrough(AddressOf(x)) == x ap = leftchild(ap); if (astop(ap)==AST_TAG) { stack_emit("", "PUSH", "&", c[leftchild(ap)].s, "AddressOf()"); } else if (astop(ap)==AST_TYPE_Struct) { // TO DO: check arrays as well - and ++ is also a real problem stack_emit("", "PUSH", "&", c[leftchild(ap)].s, "AddressOf()"); } else if (astop(ap)==AST_IndirectThrough) { // eg an int * parameter to a procedure stack_code(leftchild(ap), -1,-1,-1); } else if (children(ap) == 2) { leftchild(ap) = mkmonop(AST_AddressOf, leftchild(ap)); stack_code(ap, -1,-1,-1); } else { fprintf(stderr, "Internal error, not sure how to handle AddressOf(@ %d:)\n",ap); exit(1); } break; case AST_AssignTo: if (astop(leftchild(ap))==AST_TAG) { stack_code(rightchild(ap), -1,-1,-1); ap = leftchild(ap); // TO DO: Not debugged yet if (astop(TYPEINFO(ap)) == AST_TYPE_ArrayOf) { stack_emit("", "POPI", "", c[leftchild(ap)].s, "store indirect"); } else if (astop(TYPEINFO(ap)) == -1) { stack_emit("", "POP", "", c[leftchild(ap)].s, "(implied external?)"); } else { sprintf(temp, "[[typeinfo=%d]]", astop(TYPEINFO(ap))); stack_emit("", "POP", "", c[leftchild(ap)].s, temp); } } else { case AST_AddAss: case AST_SubAss: case AST_MulAss: case AST_DivAss: case AST_ModAss: case AST_BitLshAss: case AST_BitRshAss: case AST_BitAndAss: case AST_BitOrAss: case AST_BitXorAss: stack_emit("", "", "", "", "next is addressof(LHS)"); stack_code(mkmonop(AST_AddressOf, leftchild(ap)), -1,-1,-1); stack_code(rightchild(ap), -1,-1,-1); stack_emit("", code, "", "", "*(TOS-1) (op?)= TOS"); } break; case AST_Const: // TO DO: this will change later, when we create a constant table. (Esp for strings) stack_emit("", "PUSH", "#", escape(c[leftchild(ap)].s, c[leftchild(ap)].t), "AST_Const"); break; case AST_CommaSEQ: stack_code(leftchild(ap), -1,-1,-1); if (rightchild(ap) != -1) { stack_emit("", "POP", "", "", "void"); // TO DO: this is a quick & buggy hack stack_code(rightchild(ap), -1,-1,-1); // keep the last value in a comma sequence } // to assign to a variable if needed break; case AST_DefProc: ; // {AST_DefProc, 4, "AST_DefProc", "DefProc", "", 7}, // DefProc: scope/qual/type/ptr-seq, id, params, body // to do: add ,parms,rsult to PROC ? (Like at point of call) stack_emit("", "PROC", c[leftchild(rightchild(ap))].s, "", "AST_DefProc"); stack_code(nthchild(ap, 2), -1,-1,-1); // COMMA-LIST OF DEFPARAMS stack_code(nthchild(ap, 2+1), -1,-1,-1); // maybe reset labs to all -1's? // TO DO: if it is a procedure, plant a 'return' after the end of the body /* if (fn result type == void) */ stack_emit("", "RET", "", "0", "Return by dropping through end of proc"); // TO DO: if it was a function, plant code to generate a 'missing result from function' error message break; case AST_DefParam: // no code generated for DefParam; AST enry is only used by caller for type information break; case AST_ReceiveParam: { // param name (ident), param type, link to next ReceiveParam // leftchild of an identifier is the tag. sprintf(temp, "[[typeinfo=%d]]", astop(rightchild(ap))); stack_emit("", "PARAM", "", c[leftchild(leftchild(ap))].s, temp); if (nthchild(ap, 2) != -1) stack_code(nthchild(ap, 2), -1,-1,-1); } break; case AST_Call: ; // left: VAR right: UseParam chain { int UseParam; int Var; int DefProc; int count; int results; char callee[128]; assert(astop(rightchild(ap)) == AST_UseParam); // (I think a (void) param list is a UseParam -1 -1) Var = leftchild(ap); UseParam = rightchild(ap); count = 0; while (astop(UseParam) == AST_UseParam) { count += 1; UseParam = rightchild(UseParam); } DefProc = nthchild(ap, 2); assert(DefProc != -1); if (rightchild(leftchild(DefProc)) == 0 /*void*/) results = 0; else results = 1 /* Assume all results in a single register for now*/; sprintf(callee, "%s,%d,%d", c[leftchild(Var)].s, count-1, results); stack_emit("", "PRECALL", "", callee, "in case we need to reserve space etc"); stack_code(rightchild(ap), -1,-1,-1); // push parameters */ sprintf(callee, "%s,%d,%d", c[leftchild(Var)].s, count-1, results); stack_emit("", "CALL", "", callee, "AST_Call"); } break; case AST_ReturnResult: stack_code(leftchild(ap), -1,-1,-1); stack_emit("", "RET", "", "1", "Return a result"); break; case AST_Return: // TO DO: make sure a check is made once the AST is built to confirm that void procs // don't return results, and non-void procs do - and that they're of the right type. // (and appropriatelt typecast if need be) stack_emit("", "RET", "", "0", "Return"); break; // TO DO: labels should be renamed using the context of a block to avoid clashes // EVEN BETTER! ... replace *all* user labels with __Lnnnnn internal labels! // and if we lay out the record so that the numeric label is in the same slot // as the destination of a continue or break, we can merge this in with code // to save a redundant branch from an if statement case AST_Label: stack_emit(c[leftchild(leftchild(ap))].s, "", "", "", "AST_Label"); break; case AST_Goto: stack_emit("", "B", "", c[leftchild(leftchild(ap))].s, "AST_Goto"); break; case AST_C_While: sprintf(temp, "__L%0d", nthchild(ap, 2+1)); // continue lab stack_emit(temp, "", "", "", "CONTINUE: AST_C_While"); stack_code(leftchild(ap), nthchild(ap, 2+2),nthchild(ap, 2+0),nthchild(ap, 2+2)); // condition sprintf(temp, "__L%0d", nthchild(ap, 2+2)); // drop through if cond true stack_emit(temp, "", "", "", "LOOP: body");// start of body lab (ie code following the condition) stack_code(rightchild(ap), -1,-1,-1); // body sprintf(temp, "__L%0d", nthchild(ap, 2+1)); // continue lab stack_emit("", "B", "", temp, "LOOP: AST_C_While"); sprintf(temp, "__L%0d", nthchild(ap, 2+0)); // exit lab stack_emit(temp, "", "", "", "EXIT: AST_C_While"); break; case AST_C_DoWhile: ; sprintf(temp, "__L%0d", nthchild(ap, 2+2)); // branch back lab stack_emit(temp, "", "", "", "START: AST_C_DoWhile"); stack_code(leftchild(ap), -1,-1,-1); // body sprintf(temp, "__L%0d", nthchild(ap, 2+1)); // continue lab stack_emit(temp, "", "", "", "CONTINUE: AST_C_DoWhile"); stack_code(rightchild(ap), nthchild(ap, 2+2),nthchild(ap, 2+0),nthchild(ap, 2+0)); // condition //sprintf(temp, "__L%0d", nthchild(ap, 2+2)); // branch back lab //stack_emit("", "BT", "", temp, "LOOP: AST_C_While"); sprintf(temp, "__L%0d", nthchild(ap, 2+0)); // exit lab stack_emit(temp, "", "", "", "EXIT: AST_C_DoWhile"); break; case AST_C_ForLoop: // init-expr test-expr cont-expr body break continue branch_back body stack_code(leftchild(ap), -1,-1,-1); // init expr sprintf(temp, "__L%0d", nthchild(ap, 2+4)); // loopback lab stack_emit(temp, "", "", "", "START: AST_C_ForLoop"); stack_code(rightchild(ap), nthchild(ap, 2+5),nthchild(ap, 2+2),nthchild(ap, 2+5)); // test expr sprintf(temp, "__L%0d", nthchild(ap, 2+5)); // body lab stack_emit(temp, "", "", "", "BODY: AST_C_ForLoop"); stack_code(nthchild(ap, 2+1), -1,-1,-1); // body sprintf(temp, "__L%0d", nthchild(ap, 2+3)); // continue lab stack_emit(temp, "", "", "", "CONTINUE: AST_C_ForLoop"); stack_code(nthchild(ap, 2), -1,-1,-1); // cont expr sprintf(temp, "__L%0d", nthchild(ap, 2+4)); // loopback lab stack_emit("", "B", "", temp, "LOOP: AST_C_ForLoop"); sprintf(temp, "__L%0d", nthchild(ap, 2+2)); // break lab stack_emit(temp, "", "", "", "EXIT: AST_C_ForLoop"); break; case AST_C_Break: // also handled in AST_IFTHEN for efficiency sprintf(temp, "__L%0d", leftchild(ap)); stack_emit("", "B", "", temp, "AST_C_Break"); break; case AST_C_Continue: sprintf(temp, "__L%0d", leftchild(ap)); stack_emit("", "B", "", temp, "AST_C_Continue"); break; case AST_Cond: // near-identical to AST_IFTHENELSE! { int cond = leftchild(ap); int thenpart = rightchild(ap); int elsepart = nthchild(ap, 2); { int thenlab = temp_label(); int donelab = temp_label(); int elselab = temp_label(); stack_code(cond, thenlab, elselab, thenlab); sprintf(temp, "__L%0d", thenlab); stack_emit(temp, "", "", "", "?-lab"); //put_label(thenlab); stack_code(thenpart, -1, -1, -1); // push the then value sprintf(temp, "__L%0d", donelab); stack_emit("", "B", "", temp, ""); //put_goto(donelab); sprintf(temp, "__L%0d", elselab); stack_emit(temp, "", "", "", ":-lab"); //put_label(elselab); stack_code(elsepart, -1, -1, -1); sprintf(temp, "__L%0d", donelab); stack_emit(temp, "", "", "", "donelab"); //put_label(donelab); } } break; case AST_LE: ; case AST_GT: ; case AST_LT: ; case AST_GE: ; case AST_EQ: ; case AST_NE: ; { if ((oper == AST_NE) && ((astop(leftchild(ap)) == AST_Const) && (astop(rightchild(ap)) == AST_Const)) && (rightchild(leftchild(ap)) != rightchild(rightchild(ap)))) { sprintf(temp, "__L%0d", thenlab); if (thenlab != donelab) { stack_emit("", "B", "", temp, ""); //put_goto(thenlab); } } else if ((oper == AST_EQ) && ((astop(leftchild(ap)) == AST_Const) && (astop(rightchild(ap)) == AST_Const)) && (rightchild(leftchild(ap)) == rightchild(rightchild(ap)))) { sprintf(temp, "__L%0d", thenlab); if (thenlab != donelab) { stack_emit("", "B", "", temp, ""); //put_goto(thenlab); } } else if ((oper == AST_NE) && ((astop(leftchild(ap)) == AST_Const) && (astop(rightchild(ap)) == AST_Const)) && (rightchild(leftchild(ap)) == rightchild(rightchild(ap)))) { sprintf(temp, "__L%0d", elselab); if (elselab != donelab) { stack_emit("", "B", "", temp, ""); //put_goto(elselab); } } else if ((oper == AST_EQ) && ((astop(leftchild(ap)) == AST_Const) && (astop(rightchild(ap)) == AST_Const)) && (rightchild(leftchild(ap)) != rightchild(rightchild(ap)))) { sprintf(temp, "__L%0d", elselab); if (elselab != donelab) { stack_emit("", "B", "", temp, ""); //put_goto(elselab); } } else { stack_code(leftchild(ap), -1,-1,-1); stack_code(rightchild(ap), -1,-1,-1); stack_emit("", code, "", "", ""); // dump CMP<op> if (thenlab != donelab) { sprintf(temp, "__L%0d", thenlab); stack_emit("", "BT", "", temp, ""); //put_ifgoto(root, thenlab, TRUE); } if (elselab != donelab) { sprintf(temp, "__L%0d", elselab); stack_emit("", "BF", "", temp, ""); //put_ifgoto(root, elselab, FALSE); } } } break; // TO DO: dump BNOT for this, and move this to AST_UShortcutBoolNot case AST_UBoolNot: stack_code(leftchild(ap), elselab,thenlab,donelab); // reversed destinations for true/false return; case AST_BoolAnd: // some shortcut nodes are turned into boolean nodes by optimisations case AST_BoolOr: // when the sequence is short and is assigned to a variable and has no side-effects stack_code(leftchild(ap), -1,-1,-1); stack_code(rightchild(ap), -1,-1,-1); stack_emit("", code, "", "", opname); return; case AST_ShortcutBoolAnd: case AST_ShortcutBoolOr: { int truelab = thenlab; int falselab = elselab; int dropthrough = donelab; { int thenlab; // WOO HOO! big scoping rules test when we bootstrap this! int elselab; int donelab; int nexttestlab = temp_label(); if ((astop(ap) == AST_BoolAnd) || (astop(ap) == AST_ShortcutBoolAnd)) { thenlab = nexttestlab; elselab = falselab; } else { thenlab = truelab; elselab = nexttestlab; } donelab = nexttestlab; stack_code(leftchild(ap), thenlab, elselab, donelab); /* if the first one was true, drop through to next part of && and check it too */ sprintf(temp, "__L%0d", nexttestlab); stack_emit(temp, "", "", "", "nexttestlab"); //put_label(nexttestlab); stack_code(rightchild(ap), truelab, falselab, dropthrough); /* drop through may be to truelab; if not (because we're nested), jump to truelab */ } } break; case AST_IFTHEN: ; { int cond = leftchild(ap); int thenpart = rightchild(ap); { int thenlab = temp_label(); int donelab = temp_label(); if ((astop(thenpart) == AST_C_Break) || (astop(thenpart) == AST_C_Continue)) { // TO DO: AST_Goto int breaklab = leftchild(rightchild(ap)); stack_code(cond, breaklab, donelab, donelab); } else { stack_code(cond, thenlab, donelab, thenlab); //put_label(thenlab); sprintf(temp, "__L%0d", thenlab); stack_emit(temp, "", "", "", "thenlab"); stack_code(thenpart, -1,-1,-1); sprintf(temp, "__L%0d", donelab); stack_emit(temp, "", "", "", "donelab"); } } } break; case AST_IFTHENELSE: ; { int cond = leftchild(ap); int thenpart = rightchild(ap); int elsepart = nthchild(ap, 2); { int thenlab = temp_label(); int donelab = temp_label(); int elselab = temp_label(); stack_code(cond, thenlab, elselab, thenlab); sprintf(temp, "__L%0d", thenlab); stack_emit(temp, "", "", "", "thenlab"); //put_label(thenlab); stack_code(thenpart, -1,-1,-1); sprintf(temp, "__L%0d", donelab); stack_emit("", "B", "", temp, ""); //put_goto(donelab); sprintf(temp, "__L%0d", elselab); stack_emit(temp, "", "", "", "elselab"); //put_label(elselab); stack_code(elsepart, -1,-1,-1); sprintf(temp, "__L%0d", donelab); stack_emit(temp, "", "", "", "donelab"); //put_label(donelab); } } break; case AST_Switch: ; { // currently implemented by a skip chain. Decomposing into groups // of semi-compact jump-table entries to improve efficiency // will be a good student exercise. int next = nthchild(ap, 2+1); int switch_table = temp_label(); // leftchild = switch value, rightchild = compound statement // 1st extra field is for target of "break" statement, // 2nd extra field is link to chain of case statements, // 3rd extra field is default case // output the entry sequence/jump table stack_code(leftchild(ap), -1,-1,-1); sprintf(temp, "__L%0d", switch_table); stack_emit("", "SWITCH", "", temp, "special opcodes handle DUP of TOS"); stack_emit(temp, "", "", "", "jump table start"); // dump the block contents while (next != -1) { // 'next' is in case statement format stack_code(leftchild(next), -1,-1,-1); // TO DO: need a function to return the const, // then make it into a proper table. sprintf(temp, "__L%0d", nthchild(next,2)); stack_emit("", "CASE", "", temp, ""); next = rightchild(next); } next = nthchild(ap, 2+2); // TO DO: Run-time error: if no "default:" was given, generate a jump to // an error procedure for 'missing case label' if ((next != -1) && (rightchild(next) != -1)) { // this check is untested sprintf(temp, "__L%0d", rightchild(next)); stack_emit("", "DEFAULT", "", temp, ""); } else { stack_emit("", "DEFAULT", "", "__missing_case_label__", ""); } stack_code(rightchild(ap), -1,-1,-1); // dump a label for the 'break' statements to jump to sprintf(temp, "__L%0d", nthchild(ap, 2)); stack_emit(temp, "", "", "", "breaking from the switch comes here"); } break; case AST_Case: // leftchild = <n>, // rightchild = link to next case // last = internal number for label // dump an internal label for the jump table sprintf(temp, "__L%0d", nthchild(ap,2)); stack_emit(temp, "", "", "", "Case element"); break; case AST_DefaultCase: // leftchild = link to next case // rightchild = internal number for label // dump an internal label for the jump table sprintf(temp, "__L%0d", rightchild(ap)); stack_emit(temp, "", "", "", "Default case"); break; default: if ((oper < AST_BASE) || (oper > AST_TOP)) { fprintf(stderr, "* WARNING: genstack oper=%d branched via 'default:' - bad oper or coding error?\n", oper); } else { fprintf(stderr, "* WARNING: genstack oper=%s branched via 'default:' - add a case for it.\n", op[oper-AST_BASE].Diag_Name); } exit(0); case AST_Add: ; case AST_Sub: ; case AST_Mul: ; case AST_Div: ; case AST_Mod: ; case AST_Post_Inc: ; case AST_Pre_Inc: ; case AST_Post_Dec: ; case AST_Pre_Dec: ; case AST_Type: ; case AST_UNeg: ; case AST_UPos: ; case AST_UBitNot: ; case AST_BitAnd: ; case AST_BitOr: ; case AST_BitXor: ; case AST_BitLsh: ; case AST_BitRsh: ; case AST_UseParam: ; case AST_IndirectThrough: ; case AST_SizeOf: ; case AST_Cast: ; case AST_Sourceline: ; case AST_LINENO: ; case AST_REDIRECT: ; case AST_LINEAR_BLOB: ; // Not sure if we need the AST_TYPE_* entries here? { int child = 0; if ((count <= 2) && (*code != '\0')) { if (count >= 1) stack_code(leftchild(ap), -1,-1,-1); // monop if (count >= 2) stack_code(rightchild(ap), -1,-1,-1); // binop stack_emit("", code, "", "", opname); return; } this = 1<<(count-1); for (;;) { if (child == count) break; if (disp&this) stack_code(nthchild(ap, child), -1,-1,-1); this = this >> 1; child += 1; } if (oper != AST_UseParam) stack_emit("", "", "", "", opname); } break; } fflush(stdout); return; } void stack_data(void) { // Output the string and constant table. // possibly also an initialised static/own table? } #ifdef DEBUG_DRAW_TREES void draw_selected_trees(int p) { int count = 0; int i; int disp; int this; if ((p == -1) || (astop(p) == -1)) return; // recursively process all children, doing the folds on the way back up // once grandchildren constant expressions have already been folded: if ((astop(p) < AST_BASE) || (astop(p) > AST_TOP)) { fprintf(stderr, "* Internal Error: p=%d astop(p) - %d\n", p, astop(p)); exit(1); } disp = op[astop(p)-AST_BASE].Display_Children; this = 1<<(children(p)-1); for (i = 0; i < children(p); i++) { if (astop(nthchild(p, i)) == AST_SHOWTREE) { // "? statement" causes the tree for statement to be printed before code generation draw_tree(leftchild(nthchild(p, i))); nthchild(p, i) = leftchild(nthchild(p, i)); } else if ((nthchild(p, i) != -1) && (disp&this)) { draw_selected_trees(nthchild(p, i)); while (astop(nthchild(p, i)) == AST_REDIRECT) nthchild(p, i) = leftchild(nthchild(p,i)); } this = this>>1; } } #endif int main(int argc, char **argv) { int opt_3address = FALSE, opt_debug = FALSE, opt_stack = FALSE, opt_c = FALSE, opt_execute = FALSE, opt_optimiser = FALSE; char *s; /* Handle program arguments */ /* Get clean version of executable name. Should work on most existing systems (2006) */ strcpy(prognames, argv[0]); progname = prognames; if ((s = strrchr(progname, '/')) != NULL) progname = s+1; // Unix if ((s = strrchr(progname, '\\')) != NULL) progname = s+1; // M$ if ((s = strrchr(progname, ']')) != NULL) progname = s+1; // Dec if ((s = strrchr(progname, ';')) != NULL) *s = '\0'; // Version no's if (((s = strrchr(progname, '.')) != NULL) && (strcasecmp(s, ".exe") == 0)) *s = '\0'; if (((s = strrchr(progname, '.')) != NULL) && (strcasecmp(s, ".com") == 0)) *s = '\0'; moreopt: if ((argc >= 2) && (*argv[1] == '-') && (argv[1][2] == '\0') ) { if (argv[1][1] == 'O') { argv++; argc--; opt_optimiser = TRUE; goto moreopt; } if (argv[1][1] == 'd') { argv++; argc--; debug = TRUE; opt_debug = TRUE; goto moreopt; } if (argv[1][1] == '3') { argv++; argc--; opt_3address = TRUE; goto moreopt; } if (argv[1][1] == 's') { argv++; argc--; opt_stack = TRUE; goto moreopt; } if (argv[1][1] == 'c') { argv++; argc--; opt_c = TRUE; goto moreopt; } if (argv[1][1] == 'e') { argv++; argc--; opt_execute = TRUE; goto moreopt; } if (argv[1][1] == 'h') { fprintf(stderr, "%s:\n", progname); fprintf(stderr, "\t-3\tgenerate low-level 3-address code using c\n"); fprintf(stderr, "\t-c\tgenerate high-level c translation\n"); fprintf(stderr, "\t-s\tgenerate stack-based assembly code\n"); fprintf(stderr, "\t-e\texecute directly\n"); fprintf(stderr, "\t-d\tdebug\n"); fprintf(stderr, "\t-h\thelp (this info)\n"); exit(0); } } if (argc != 2) { fprintf(stderr, "syntax: %s [-3cdehs] filename\n", progname); exit(1); } if (!(opt_3address || opt_c || opt_execute || opt_stack)) opt_stack = TRUE; if (argv[1][0] == '-' && argv[1][1] == '\0') { sourcefile = fopen("/dev/null", "r"); } else { sourcefile = fopen(argv[1], "r"); } if (sourcefile == NULL) { fprintf(stderr, "%s: %s - %s\n", progname, strerror(errno), argv[1]); exit(errno); } if (opt_execute) { outfile = stdout; } else { char *s; sprintf(outname, "%s", argv[1]); s = strrchr(outname, '.'); if (s == NULL) s = outname+strlen(outname); if (opt_3address || opt_c) { sprintf(s, "%s", ".c"); } else if (opt_stack) { sprintf(s, "%s", ".asm"); } else { fprintf(stderr, "Won't\n"); exit(123); // shouldn't happen } //outfile = fopen(outname, "w"); //if (outfile == NULL) { // fprintf(stderr, "%s: cannot output to %s - %s\n", progname, outname, strerror(errno)); //} } curfile = argv[1]; startline = TRUE; whitespace = TRUE; /* Lexical scan */ line_reconstruction(); // Effectively, lexing. dump_source(); /* Call the parser */ //resume: if (!parse(PHRASE_BASE, 0)) { /* Attempt to print a sensible error if the parse failed */ if (bestparse == nextfree) { fprintf(stderr, "\"%s\", Line %d, Col %d: Premature end of file while looking for %s\n", argv[1], c[bestparse].l, c[bestparse].col+1, looking_for); exit(1); } else { error_line(c[bestparse].l); fprintf(stderr, "\"%s\", Line %d, Col %d: Syntax error while looking for %s near ", argv[1], c[bestparse].l, c[bestparse].col+1, looking_for); fprintf(stderr, "%s\n", escape(c[bestparse].s,c[bestparse].t)); exit(1); } } else { int rslt_errs = 0; int T[4]; int program; //fprintf(stderr, "c[%d], AST[%d], A[%d], stringpool[%d]\n", nextfree, nexttrip, next_free_a, nextstring); program = build_ast(0); // only compile if all the source parsed OK T[1] = program; T[2] = -1; // not a hole. This is a terminator. T[3] = BLOCK_EXTERNALS; program = mk(AST_Scope, 3, T); //fprintf(stderr, "c[%d], AST[%d], A[%d], stringpool[%d]\n", nextfree, nexttrip, next_free_a, nextstring); fflush(stdout); print_all_trips(); // sequentially //fprintf(stderr, "-------------\n"); {int count; do { count = fold_constant_expressions(program); while (astop(program) == AST_REDIRECT) program = leftchild(program); count += fold_identities(program); while (astop(program) == AST_REDIRECT) program = leftchild(program); } while (count > 0); } fprintf(stderr, "assign_loop_labels(%d)\n", program); assign_loop_labels(program, -1, -1, -1); while (astop(program) == AST_REDIRECT) program = leftchild(program); fprintf(stderr, "build_switch_table()\n"); build_switch_table(program, -1); // must come after folding fprintf(stderr, "build_block_scope()\n"); build_block_scope(program, -1, -1); fprintf(stderr, "populate_types()\n"); populate_types(program); // must be after block scopes. not so sure about folding (which in fact // could probably take advantage of the info that this step adds) fprintf(stderr, "check_function_results()\n"); check_function_results(program, -1, &rslt_errs); fprintf(stderr, "tweak_booleans()\n"); tweak_booleans(program); while (astop(program) == AST_REDIRECT) program = leftchild(program); fprintf(stderr, "=============\n"); show_ast(program, 0, -1); // in pre-order traverse order #ifdef DEBUG_DRAW_TREES draw_selected_trees(program); #endif fprintf(stderr, "stack_code()\n"); stack_data(); stack_code(program, -1, -1, -1); fprintf(stderr, "\n"); fflush(stdout); } exit(0); /* NOT REACHED */ return(1); }