// This code outputs the tables generated by takeon.c It is included within takeon.c //static int verbose = FALSE; //#define PARM_NO_FLEX 1 // Enable this to avoid the use of flex // arrays and to speed up execution. #include "flex.h" // All input comes through these two procedures: // *** Will need a stack of source line/column as well!!!: static int file_offset = 0, source_line = 1, source_column = 1, last_line_last_column = 1; void fatal_(int error, int line) { fprintf(stderr, "%s:%d:%d: Error: ", input_name[input_level], source_line, source_column); if (error == WEOF) { fprintf(stderr, "Premature end of file at line %d." " Grammar should end with a 'E' command." " (detected in %s, line %d)\n", source_line, __FILE__, line); if (source_line == 1) { fprintf(stderr, "(You're not by any chance trying to compile using tcc," " are you? Its wchar_t support seems to be very broken...)" "\n"); } } else if (error == 1) { // fail silently as problem already reported } else if (error == '0') { fprintf(stderr, "Syntax error at line %d while looking for a digit." " (detected in %s, line %d)\n", source_line, __FILE__, line); } else if (error == '{') { fprintf(stderr, "Error at line %d - {...} code blocks can only follow a P = ... statement, before the ';'" " (detected in %s, line %d)\n", source_line, __FILE__, line); } else if (error > ' ') { fprintf(stderr, "Syntax error at line %d while looking for a '%c'." " (detected in %s, line %d)\n", source_line, error, __FILE__, line); } else { fprintf(stderr, "Syntax error at line %d (detected in %s, line %d)\n", source_line, __FILE__, line); } exit(EXIT_FAILURE); } #define fatal(n) fatal_(n,__LINE__) // Need something similar for warnings. wint_t get_wide_char(FILE *f) { // Did not check for WEOF, but maybe we should do it anyway just to be on the safe side. wint_t c = fgetwc(f); // *** ALL *** input goes through here, so it's the perfect place to implement include files. if (c == WEOF) { if (input_level > 0) { fclose(input_file[input_level]); //fprintf(stderr, "\n ----------------------------------- END %s ------------------------------------\n", input_name[input_level]); if (debug_takeon) fprintf(stderr, "Returning from include file %s to %s\n", input_name[input_level], input_name[input_level-1]); input_level -= 1; // restore I/O context file_offset = input_offset[input_level]; source_line = input_line[input_level]; source_column = input_column[input_level]; last_line_last_column = input_last_line_last_column[input_level]; return get_wide_char(input_file[input_level]); } else { // top-level input return WEOF; } } if (debug_takeon) fprintf(stderr, "%lc", c); if (c == '\n') { source_line += 1; last_line_last_column = source_column; source_column = 1; } else source_column += 1; //if (verbose) fputwc(c, stderr); file_offset += 1; return c; } void unget_wide_char(wint_t c, FILE *f) { ungetwc(c, f); file_offset -= 1; if (c == '\n') { source_line -= 1; source_column = last_line_last_column; } else { source_column -= 1; //if (verbose) fputwc(L'\b', stderr); if (debug_takeon) fprintf(stderr, "\b \b"); } } static wint_t next_wide_char(void) { wint_t c = get_wide_char(input_file[input_level]); if (c == WEOF) { fatal(WEOF); } return c; } static wint_t nonspace(void) { for (;;) { wint_t c = get_wide_char(input_file[input_level]); if (c == WEOF) fatal(WEOF); if (!isspace(c)) return(c); } } // Every entry in the grammar consists of a type code and a data value, usually // an index into some array or other. // Type of an entry in the main grammar array gram[]: typedef unsigned long int ENTRY; typedef int KEYWORD_INDEX; typedef int REGEXP_INDEX; typedef int LITSTR_INDEX; typedef int GRAMMAR_INDEX; #define NEGATED_PHRASE (1U<<24U) #define GUARD_PHRASE (1U<<25U) #define WHITESPACE_ALLOWED (1U<<26U) #define GRAMMAR_TYPE_SHIFT 27U #define GRAMMAR_TYPE_MASK 31U #define INDEX_MASK 0x7FFFFFU // We have enough spare bits in a grammar table entry // to support types with values 1 through 31. // // I never use 0 in enumerations like this as it // helps catch errors from variables which have // not had a type explicitly set in them. #define BASE_BIP_TYPE 1U #define BASE_PHRASE_TYPE 2U #define BASE_SEMANTIC_TYPE 3U #define BASE_KEYWORD_TYPE 4U #define BASE_CHAR_TYPE 5U #define BASE_UTF32CHAR_TYPE 6U #define BASE_STRING_TYPE 7U #define BASE_UTF32STRING_TYPE 8U #define BASE_REGEXP_TYPE 9U #define BASE_OPTION_TYPE 10U #define BASE_COUNT_OF_ALTS 11U #define BASE_COUNT_OF_PHRASES 12U #define BASE_ALT_NUMBER 13U #define BIP_TYPE (BASE_BIP_TYPE <>GRAMMAR_TYPE_SHIFT)&GRAMMAR_TYPE_MASK)) // These are not hard limits. The Flex structure can expand to as large // as necessary. These are only here 1) in case we're using fixed-size // arrays rather than flex arrays, or 2) to limit runaway coding errors. // These MAX sizes do not propogate to the header file. #define MAX_GRAMMAR (1024*16) #define MAX_PHRASES (1024*4) // extra during debugging #define MAX_KEYWORDS 1024 #define MAX_REGEXPS 1024 #define MAX_LIT (2048*128) #define MAX_COMMENT 20000 #define MAX_C 300000 #define MAX_BIPS 64 static int LARGEST_ALT = 0; static int BIP_BASE = 0; static int PHRASE_BASE = 0; static int SEMANTIC_BASE = 0; static int AST_BASE = 0; // Apart from StringPool, which is used internally, these arrays are // the ones which are eventually written out to the header file as // const arrays. Internally we refer to strings by an index into the // stringpool, but in the header file, the arrays of strings are literal // strings. (Except in the grammar where we uses indices into the individual // arrays, of regexps, keywords, etc.) // All strings are stored here. typedef int STRINGINDEX; static DECLARE(StringPool, wchar_t, MAX_LIT); #define _StringPool(x) WRITE(x,StringPool,wchar_t) #define StringPool(x) READ(x,StringPool,wchar_t) static STRINGINDEX Str_nextfree = 0; #define String(x) &_StringPool(x) // Filter '-' out of strings so they can be used as C variables, // and convert spaces in identifiers to '_' wchar_t *CString(STRINGINDEX x) { STRINGINDEX Result = Str_nextfree; wint_t wc; do { wc = StringPool(x); x++; if (wc == '-' || wc == ' ') wc = '_'; _StringPool(Str_nextfree++) = wc; } while (wc != '\0'); return String(Result); } // DUE TO PROBLEMS SETTING UP GRAM VALUES PROPERLY, I'M SEPARATING BIP/PHRASE/C INTO THEIR OWN TABLES AND INDEXES FROM 0. // For the name of B<...> code. static DECLARE(bip_phrasename, STRINGINDEX, MAX_C); /* STRINGINDEX is an index into *** StringPool *** */ #define _bip_phrasename(x) WRITE(x,bip_phrasename,STRINGINDEX) #define bip_phrasename(x) READ(x,bip_phrasename,STRINGINDEX) // Map of user-supplied BIP number to a consecutive sequence used internally static DECLARE(bip_map, ENTRY, MAX_BIPS); #define _bip_map(x) WRITE(x,bip_map,ENTRY) #define bip_map(x) READ(x,bip_map,ENTRY) static int NEXT_FREE_BIPNO = 0, NUM_BIPS = 0; int current_def_bipno=unassigned, current_internal_bipno=unassigned, current_user_bipno=unassigned; // For the offset into the grammar where the sequentially-numbered P is stored. static DECLARE(sequential_phrase_no_to_grammar_index, ENTRY, MAX_PHRASES); #define _sequential_phrase_no_to_grammar_index(x) WRITE(x,sequential_phrase_no_to_grammar_index,ENTRY) #define sequential_phrase_no_to_grammar_index(x) READ(x,sequential_phrase_no_to_grammar_index,ENTRY) // For the name of B<> and P<...> definitions. And later also C<> definitions will be added. // Maps the location in the grammar where a phrase is stored // to the sequential phrase number, i.e. a mapping of G_x to P_x // NOT YET USED. static DECLARE(grammar_index_to_sequential_phrase_number, int, MAX_PHRASES); #define _grammar_index_to_sequential_phrase_number(x) WRITE(x,grammar_index_to_sequential_phrase_number,int) #define grammar_index_to_sequential_phrase_number(x) READ(x,grammar_index_to_sequential_phrase_number,int) // As above, but maps to the phrase name. static DECLARE(phrasename, STRINGINDEX, MAX_PHRASES); #define _phrasename(x) WRITE(x,phrasename,STRINGINDEX) #define phrasename(x) READ(x,phrasename,STRINGINDEX) int current_def_simple_phraseno=unassigned; // For the comment attached to each P<...> definition. //static DECLARE(xcomment, STRINGINDEX, MAX_COMMENT); //#define _xcomment(x) WRITE(x,xcomment,STRINGINDEX) //#define xcomment(x) READ(x,xcomment,STRINGINDEX) //STRINGINDEX current_comment; static int NEXT_FREE_SIMPLE_PHRASENO = 0, NUM_SIMPLE_PHRASES = 0; STRINGINDEX current_def_phrasename = unassigned; STRINGINDEX current_use_phrasename = unassigned; // For the name of C<...> code. static DECLARE(semantic_phrasename, STRINGINDEX, MAX_C); #define _semantic_phrasename(x) WRITE(x,semantic_phrasename,STRINGINDEX) #define semantic_phrasename(x) READ(x,semantic_phrasename,STRINGINDEX) // For the body of C<...> code. static DECLARE(semantic_code, STRINGINDEX, MAX_C); #define _semantic_code(x) WRITE(x,semantic_code,STRINGINDEX) #define semantic_code(x) READ(x,semantic_code,STRINGINDEX) static int NEXT_FREE_SEMANTIC_PHRASENO = 0, NUM_SEMANTIC_PHRASES; int current_def_semantic_phraseno = 0; // For the body of P<...> code. Initialised to NULL as each phrase is declared. static DECLARE(ast_code, STRINGINDEX, MAX_PHRASES); #define _ast_code(x) WRITE(x,ast_code,STRINGINDEX) #define ast_code(x) READ(x,ast_code,STRINGINDEX) // For unnamed external blocks: static DECLARE(initcode, wchar_t, MAX_C); #define _initcode(x) WRITE(x,initcode,wchar_t) #define initcode(x) READ(x,initcode,wchar_t) static int initcode_nextfree = 0; // For the array of keywords static DECLARE(keyword, STRINGINDEX, MAX_KEYWORDS); #define _keyword(x) WRITE(x,keyword,STRINGINDEX) #define keyword(x) READ(x,keyword,STRINGINDEX) static KEYWORD_INDEX NEXT_FREE_KEYWORD = 0, NUM_KEYWORDS = 0; STRINGINDEX current_keyword=unassigned; // For the array of regular expressions static DECLARE(regexps, STRINGINDEX, MAX_REGEXPS); #define _regexps(x) WRITE(x,regexps,STRINGINDEX) #define regexps(x) READ(x,regexps,STRINGINDEX) static REGEXP_INDEX NEXT_FREE_REGEXP = 0, NUM_REGEXPS = 0; STRINGINDEX current_regexp=unassigned; // The main grammar table: static DECLARE(gram, ENTRY, MAX_GRAMMAR); #define _gram(x) WRITE(x,gram,ENTRY) #define gram(x) READ(x,gram,ENTRY) static int NEXT_FREE_GRAMMAR_SLOTNO = 0, NUM_GRAMMAR; #define NUM_PHRASES (NUM_BIPS + NUM_SIMPLE_PHRASES + NUM_SEMANTIC_PHRASES) // These could be (inline) functions if we need to avoid this construct. #define MaxINT(a,b) ({int A = a, B = b; A>B?A:B;}) /* avoid evaluating params twice. */ #define MinINT(a,b) ({int A = a, B = b; A= 1023)) fatal(ends); if (c == L'\\') { // \\ \' \" c = get_wide_char(input_file[input_level]); if (c == WEOF) fatal(WEOF); if (ferror(input_file[input_level])) fatal(ends); if (c != L'\\') { _StringPool(Str_nextfree++) = L'\\'; _StringPool(Str_nextfree) = L'\0'; } } else if (c == ends) { _StringPool(Str_nextfree++) = L'\0'; return temp; } _StringPool(Str_nextfree++) = c; _StringPool(Str_nextfree) = L'\0'; } } #ifdef NOT_USED static void add_str(char *s) { // not wide. int c; while ((c = *s++) != '\0') { _StringPool(Str_nextfree++) = c; _StringPool(Str_nextfree) = L'\0'; } } #endif static STRINGINDEX c_code_block(void) { // There is similar code in takeon.c for the main init block :-( // There is a problem with this! The code has to be output to a C // file in one context, and to a set of strings for another. Need // to check that everything is handled properly. // TO DO: I believe there is currently a problem if there is an unbalanced single or // double quote in a comment within a C code block in a .g file. // THE PROBLEM MAY BE FROM /* ... */ STYLE OF COMMENTS WHICH ARE NOT HANDLED YET. int c, level = 0; STRINGINDEX here = Str_nextfree; // code block should be stored as-is - no escaping. The escaping // will be done on output if needed. //add_str("\n#ifdef X_AST\n"); // THIS EXPERIMENTAL FEATURE IS NOT IDEAL. Wrapping around entire body is the wrong place. for (;;) { c = next_wide_char(); if (c == '/') { _StringPool(Str_nextfree++) = c; c = next_wide_char(); if (c == '/') { // comment for (;;) { _StringPool(Str_nextfree++) = c; c = next_wide_char(); if (c == '\n') break; } _StringPool(Str_nextfree++) = c; continue; } } if (c == '\'') { _StringPool(Str_nextfree++) = c; // output the opening ' c = next_wide_char(); for (;;) { while (c == '\\') { _StringPool(Str_nextfree++) = c; // output '\' c = next_wide_char(); _StringPool(Str_nextfree++) = c; // output escaped char c = next_wide_char(); // get anoter char } // c is now definitely not a \? escaped character _StringPool(Str_nextfree++) = c; if (c == '\'') break; c = next_wide_char(); } continue; } if (c == '"') { _StringPool(Str_nextfree++) = c; c = next_wide_char(); for (;;) { if (c == '\\') { _StringPool(Str_nextfree++) = c; c = next_wide_char(); _StringPool(Str_nextfree++) = c; c = next_wide_char(); continue; } _StringPool(Str_nextfree++) = c; if (c == '"') break; c = next_wide_char(); } continue; } if (c == '{') { if (debug_takeon) fprintf(stderr, "\b[%d]{", level); } else if (c == '}') { if (debug_takeon) fprintf(stderr, "\b[%d]}", level); } if (c == '{') level += 1; if ((c == '}') && (level == 0)) { // add_str("\n#endif // X_AST\n"); break; } if (c == '}') level -= 1; if (c == '{' || c == '}') { if (debug_takeon) fprintf(stderr, "[%d]", level); } _StringPool(Str_nextfree++) = c; } _StringPool(Str_nextfree++) = '\0'; return here; } static KEYWORD_INDEX keyword_code(STRINGINDEX newkeyword) { // buggy if newkeyword is the empty string :-( FIX! TO DO Presumably regexp_code *could* have the same issue // Fault is probably in String() or wherever this string was entered into the StringPool. int i; if (pass == 0) { _keyword(NEXT_FREE_KEYWORD) = newkeyword; } for (i = 0; i <= NEXT_FREE_KEYWORD; i++) { if (wcscmp(String(newkeyword), String(keyword(i))) == 0) { if (pass == 0) break; return KEYWORD_TYPE | i; } } if (pass == 0) { if (i == NEXT_FREE_KEYWORD) NEXT_FREE_KEYWORD++; if (NEXT_FREE_KEYWORD >= NUM_KEYWORDS) NUM_KEYWORDS = NEXT_FREE_KEYWORD; return KEYWORD_TYPE | i; } else { fprintf(stderr, "keyword_code - internal error: cannot find \"%ls\" in keyword table.\n", String(newkeyword)); exit(EXIT_FAILURE); } } static REGEXP_INDEX regexp_code(STRINGINDEX newregexp) { int i; if (pass == 0) { _regexps(NEXT_FREE_REGEXP) = newregexp; } for (i = 0; i <= NEXT_FREE_REGEXP; i++) { if (wcscmp(String(newregexp), String(regexps(i))) == 0) { if (pass == 0) break; return REGEXP_TYPE | i; } } if (pass == 0) { if (i == NEXT_FREE_REGEXP) NEXT_FREE_REGEXP++; if (NEXT_FREE_REGEXP >= NUM_REGEXPS) NUM_REGEXPS = NEXT_FREE_REGEXP; return REGEXP_TYPE | i; } else { fprintf(stderr, "regexp_code - internal error: cannot find \"%ls\" in regexp table.\n", String(newregexp)); exit(EXIT_FAILURE); } } // The output routines take a file as a parameter to implement the // multi-pass aspect - on all but the output pass, the output file // is redirected to a sink device (e.g. /dev/null) static void print_type(FILE *f, int type) { switch (type<\n"); fprintf(f, "\n"); fprintf(f, "#ifndef TRUE\n"); fprintf(f, "#define TRUE (0==0)\n"); fprintf(f, "#endif\n"); fprintf(f, "\n"); fprintf(f, "#ifndef FALSE\n"); fprintf(f, "#define FALSE (0!=0)\n"); fprintf(f, "#endif\n"); fprintf(f, "\n"); fprintf(f, "typedef int (*parsefn)(void);\n"); fprintf(f, "\n"); fprintf(f, "#define LARGEST_ALT %d" " // Max number of phrases in any Alt: 0 (Reserved), 1:%d\n\n", LARGEST_ALT+1+2, LARGEST_ALT+2); // A modern C preprocessor trick to convert a #define to a string: // If this construct isn't supported, just enter the definitions // manually, but beware that by having a second copy here there is // a possibility that the two definitions can get out of sync if // the master copy above is changed. #define _textof(x) #x #define textof(x) _textof(x) fprintf(f, "#define NEGATED_PHRASE %s\n", textof(NEGATED_PHRASE)); fprintf(f, "#define GUARD_PHRASE %s\n", textof(GUARD_PHRASE)); fprintf(f, "#define WHITESPACE_ALLOWED %s\n", textof(WHITESPACE_ALLOWED)); // <-- may change to use "O<...> = " fprintf(f, "#define GRAMMAR_TYPE_SHIFT %s\n", textof(GRAMMAR_TYPE_SHIFT)); fprintf(f, "#define GRAMMAR_TYPE_MASK %s\n", textof(GRAMMAR_TYPE_MASK)); fprintf(f, "#define BIP_TYPE %s\n", textof(BIP_TYPE)); fprintf(f, "#define PHRASE_TYPE %s\n", textof(PHRASE_TYPE)); fprintf(f, "#define SEMANTIC_TYPE %s\n", textof(SEMANTIC_TYPE)); fprintf(f, "#define KEYWORD_TYPE %s\n", textof(KEYWORD_TYPE)); fprintf(f, "#define CHAR_TYPE %s\n", textof(CHAR_TYPE)); fprintf(f, "#define UTF32CHAR_TYPE %s\n", textof(UTF32CHAR_TYPE)); fprintf(f, "#define STRING_TYPE %s\n", textof(STRING_TYPE)); fprintf(f, "#define UTF32STRING_TYPE %s\n", textof(UTF32STRING_TYPE)); fprintf(f, "#define REGEXP_TYPE %s\n", textof(REGEXP_TYPE)); fprintf(f, "#define OPTION_TYPE %s\n", textof(OPTION_TYPE)); fprintf(f, "#define COUNT_OF_ALTS %s\n", textof(COUNT_OF_ALTS)); fprintf(f, "#define COUNT_OF_PHRASES %s\n", textof(COUNT_OF_PHRASES)); fprintf(f, "#define ALT_NUMBER %s\n", textof(ALT_NUMBER)); fprintf(f, "#define INDEX_MASK %s\n", textof(INDEX_MASK)); fprintf(f, "// (We have room for types 1..31U)\n"); fprintf(f, "#define PhraseType(idx) %s\n", textof(PhraseType(idx))); fprintf(f, "\n"); // NEXT_FREE_BIPNO refers to the internal sequence number, for example, if there were // only two BIPs referenced in a grammar: B=42; and B=69; then // the internal BIP numbers for those two would be 0 and 1 respectively, with // 'NEXT_FREE_BIPNO' being set to 2. BIP(0) would be 42 and BIP(1) would be 69. // The B_ constant should be the number the grammar writer sees, i. 42 or 69. // This range compression parallels what I used to do for phrases as well, and // I'm wondering if it contributes unnecessary complexity to the code, and that // I should make BIP numbers sparse as I've done for regular phrase numbers. PHRASE_BASE = BIP_BASE+NUM_BIPS; SEMANTIC_BASE = PHRASE_BASE + NUM_SIMPLE_PHRASES; AST_BASE = SEMANTIC_BASE + NUM_SEMANTIC_PHRASES; fprintf(f, "\n"); fprintf(f, "#define BIP_BASE %d\n", BIP_BASE); fprintf(f, "#define PHRASE_BASE %d\n", PHRASE_BASE); fprintf(f, "#define SEMANTIC_BASE %d\n", SEMANTIC_BASE); fprintf(f, "#define AST_BASE %d\n\n", AST_BASE); fprintf(f, "#define NUM_BIPS %d\n", NUM_BIPS); fprintf(f, "#define NUM_SIMPLE_PHRASES %d\n", NUM_SIMPLE_PHRASES); fprintf(f, "#define NUM_SEMANTIC_PHRASES %d\n", NUM_SEMANTIC_PHRASES); fprintf(f, "#define NUM_PHRASES" " (NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES)\n\n"); fprintf(f, "#define NUM_KEYWORDS %d\n", NUM_KEYWORDS); fprintf(f, "#define NUM_REGEXPS %d\n", NUM_REGEXPS); fprintf(f, "#define NUM_GRAMMAR %d\n", NUM_GRAMMAR); fprintf(f, "\n"); // Current code revision: we're using separate namespaces for bips, phrases, and semantic code. for (i = 0; i < NUM_BIPS; i++) { fprintf(f, "#define B_%ls %ld\n", CString(bip_phrasename(i)), bip_map(i)); } for (i = 0; i < NUM_SIMPLE_PHRASES; i++) { fprintf(f, "#define P_%ls %d\n", CString(phrasename(i)), i +NUM_BIPS ); // <--- staying compatible with original for now. } for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, "#define S_%ls %d\n", CString(semantic_phrasename(i)), i); } fprintf(f, "\n"); fprintf(f, "extern const int bip_map[NUM_BIPS];\n"); fprintf(f, "extern const int sequential_phrase_no_to_grammar_index[NUM_SIMPLE_PHRASES];\n"); fprintf(f, "extern const wchar_t *phrasename[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES];\n\n"); fprintf(f, "extern const wchar_t *semantic_phrasename[NUM_SEMANTIC_PHRASES];\n"); fprintf(f, "extern const wchar_t *semantic_code[NUM_SEMANTIC_PHRASES];\n"); fprintf(f, "extern const wchar_t *ast_code[NUM_SIMPLE_PHRASES];\n"); fprintf(f, "extern const wchar_t *xcomment[NUM_PHRASES];\n"); fprintf(f, "extern const wchar_t *keyword[NUM_KEYWORDS];\n"); fprintf(f, "extern const wchar_t *regexps[NUM_REGEXPS];\n"); fprintf(f, "\n"); fprintf(f, "extern const int gram[NUM_GRAMMAR];\n"); for (i = 0; i < NUM_SIMPLE_PHRASES; i++) { int grammar_index = sequential_phrase_no_to_grammar_index(i); fprintf(f, "#define G_%ls %d\n", CString(phrasename(i)), grammar_index); } fprintf(f, "\n"); fprintf(f, "extern parsefn parsetime[NUM_SEMANTIC_PHRASES];\n"); for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, "extern int parse_%ls(void);\n", CString(semantic_phrasename(i))); } fprintf(f, "\n"); fprintf(f, "#ifndef SUPPRESS_DATA\n"); fprintf(f, "const wchar_t *phrasename[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES] = {\n"); for (i = 0; i < NUM_BIPS; i++) { fprintf(f, " /*%d+%d*/ L\"%ls\" /*%ld*/,\n", 0, i, String(bip_phrasename(i)), bip_map(i)); } for (i = 0; i < NUM_SIMPLE_PHRASES; i++) { //int grammar_index = sequential_phrase_no_to_grammar_index(i); fprintf(f, " /*%d+%d*/ L\"%ls\",\n", NUM_BIPS, i, String(phrasename(i))); } for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, " /*%d+%d*/ L\"%ls\",\n", NUM_BIPS+NUM_SIMPLE_PHRASES, i, String(semantic_phrasename(i))); } fprintf(f, "};\n"); fprintf(f, "const wchar_t *phrasename_c[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES] = {\n"); for (i = 0; i < NUM_BIPS; i++) { fprintf(f, " /*%d+%d*/ L\"%ls\" /*%ld*/,\n", 0, i, CString(bip_phrasename(i)), bip_map(i)); } for (i = 0; i < NUM_SIMPLE_PHRASES; i++) { //int grammar_index = sequential_phrase_no_to_grammar_index(i); fprintf(f, " /*%d+%d*/ L\"%ls\",\n", NUM_BIPS, i, CString(phrasename(i))); } for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, " /*%d+%d*/ L\"%ls\",\n", NUM_BIPS+NUM_SIMPLE_PHRASES, i, CString(semantic_phrasename(i))); } fprintf(f, "};\n"); fprintf(f, "const int bip_map[NUM_BIPS] = {\n"); for (i = 0; i < NUM_BIPS; i++) { fprintf(f, " /*%d*/ %ld,\n", i, bip_map(i)); } fprintf(f, "};\n"); fprintf(f, "const int sequential_phrase_no_to_grammar_index[NUM_SIMPLE_PHRASES] = {\n"); { for (i = 0; i < NUM_SIMPLE_PHRASES; i++) { int grammar_index = sequential_phrase_no_to_grammar_index(i); fprintf(f, " G_%ls, /*%d*/\n", CString(phrasename(i)), grammar_index); } } fprintf(f, "};\n"); fprintf(f, "\n"); fprintf(f, "const wchar_t *semantic_phrasename[NUM_SEMANTIC_PHRASES] = {\n"); for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, " /*%d*/ L\"%ls\",\n", i, String(semantic_phrasename(i))); } fprintf(f, "};\n\n"); fprintf(f, "const wchar_t *semantic_code[NUM_SEMANTIC_PHRASES] = {\n"); for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, " /*%d*/ L\"", i); // Hope this is correct. It *was* then something broke. Checking now. escape(f, String(semantic_code(i)), L"\"\n \""); fprintf(f, "\",\n"); } fprintf(f, "};\n\n"); fprintf(f, "parsefn parsetime[NUM_SEMANTIC_PHRASES] = {\n"); for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) { fprintf(f, " /*%d*/ &parse_%ls,\n", i, CString(semantic_phrasename(i))); } fprintf(f, "};\n\n"); fprintf(f, "// Comments are stored so that they can be re-inserted, should\n"); fprintf(f, "// we need to regenerate a grammar.g file from this header file.\n"); fprintf(f, "const wchar_t *xcomment[NUM_PHRASES] = {\n"); for (i = 0; i < NUM_PHRASES; i++) { fprintf(f, " /*%3d*/ ", i); //if (xcomment(i) < 0 /* TO DO */ /* == NULL*/) { fprintf(f, " NULL"); //} else { // fprintf(f, " L\""); // escape(f, String(xcomment(i)), L"\"\n \""); // fprintf(f, "\""); //} fprintf(f, ",\n"); } fprintf(f, "};\n"); fprintf(f, "const wchar_t *ast_code[NUM_SIMPLE_PHRASES] = {\n"); for (i = 0; i < NUM_SIMPLE_PHRASES; i++) { fprintf(f, " /*%ls*/ L\"", CString(phrasename(i))); if (ast_code(i) != -1) escape(f, String(ast_code(i)), L"\"\n \""); fprintf(f, "\",\n"); } fprintf(f, "};\n\n"); fprintf(f, "const wchar_t *keyword[NUM_KEYWORDS] = {\n"); for (i = 0; i < NUM_KEYWORDS; i++) { fprintf(f, " /*%3d*/ L\"", i); escape(f, String(keyword(i)), NULL); fprintf(f, "\",\n"); } fprintf(f, "};\n"); fprintf(f, "const wchar_t *regexps[NUM_REGEXPS] = {\n"); for (i = 0; i < NUM_REGEXPS; i++) { fprintf(f, " /*%d*/ L\"%ls\",\n", i, String(regexps(i))); } fprintf(f, "};\n"); fprintf(f, "const int gram[NUM_GRAMMAR /* %d */] = {\n", NUM_GRAMMAR); for (i = 0; i < NUM_GRAMMAR; i++) { int p; for (p = 0; p < NUM_SIMPLE_PHRASES; p++) { if (sequential_phrase_no_to_grammar_index(p) == i) { fprintf(f, "\n// P<%ls> = ...;\n", String(phrasename(p))); } } int type = PhraseType(gram(i)); int negated = gram(i) & NEGATED_PHRASE; int guard = gram(i) & GUARD_PHRASE; int whitespace = gram(i) & WHITESPACE_ALLOWED; int index = gram(i) & INDEX_MASK; //fprintf(stderr, "Index = %d i = %d\n", index, i); fprintf(f, " /*%3d*/ ", i); if ((type<>GRAMMAR_TYPE_SHIFT); if (index == 0 /* B_EOF */) whitespace = 1; // BIPs should allow whitespace before. // Well, at least EOF should. Not sure about any others. } else { print_type(f, type); } print_bool(f, negated, "NEGATED_PHRASE "); print_bool(f, guard, "GUARD_PHRASE "); print_bool(f, whitespace, "WHITESPACE_ALLOWED"); if ((type< = %ld;\n", CString(bip_phrasename(i)), bip_map(i)); } fprintf(f, "\n// E\n"); fprintf(f, "#endif // SUPPRESS_DATA\n"); fprintf(f, "#endif // _GRAMMAR_H_\n"); }