/* BUG?: If a rule is empty ("") the @1.text value is not plugged properly? */ #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <string.h> #ifndef EXIT_FAILURE #define EXIT_FAILURE 1 #define EXIT_SUCCESS 0 #endif #define TRUE (0==0) #define FALSE (0!=0) #define MAX_STRING 512 /*** Making toktype an enum means that it can't contain EOF :-( ***/ typedef enum { ENDCODE, CODE, CHAR, LABEL, TAG, STRING, OPENCOMMENT, CLOSECOMMENT, LBRACE, RBRACE, RULE, VBAR, SEMI, DOLLAR, AT } toktype; /*** #define toktype int ***/ static int regid = 0; static FILE *regs = NULL; toktype token(FILE * f, char *s, int incomment, int incodesection) { int i, c, c2; s[1] = '\0'; s[0] = c = fgetc(f); if (c == EOF) return (EOF); if (c == '%') { s[2] = '\0'; s[1] = c2 = fgetc(f); if (c2 == '{') { return (CODE); } else if (c2 == '}') { return (ENDCODE); } ungetc(c2, f); return (CHAR); } else if (isalpha(c)) { i = 1; for (;;) { c = fgetc(f); if (!isalpha(c)) break; s[i] = c; i += 1; s[i] = '\0'; } if (c != ':') { ungetc(c, f); return (TAG); } s[i] = c; i += 1; s[i] = '\0'; return (LABEL); } else if (c == '<') { i = 1; for (;;) { c = fgetc(f); if (!(isalpha(c) || (c == '!' || c == '?'))) break; s[i] = c; i += 1; s[i] = '\0'; } if (c != '>') { ungetc(c, f); return (TAG); } s[i] = c; i += 1; s[i] = '\0'; return (RULE); } else if (c == '/') { s[2] = '\0'; s[1] = c2 = fgetc(f); if (c2 == '*') { return (OPENCOMMENT); } s[1] = '\0'; ungetc(c2, f); return (CHAR); } else if (c == '*') { s[2] = '\0'; s[1] = c2 = fgetc(f); if (c2 == '/') { return (CLOSECOMMENT); } s[1] = '\0'; ungetc(c2, f); return (CHAR); } else if (c == '$') { s[2] = '\0'; s[1] = c2 = fgetc(f); if (c2 == '$' || isdigit(c2)) { if (c2 == '$') s[1] = '0'; return (DOLLAR); } s[1] = '\0'; ungetc(c2, f); return (CHAR); } else if (c == '@') { s[2] = '\0'; s[1] = c2 = fgetc(f); if (isdigit(c2)) { if ((c2 == '$') || (c2 == '@')) s[1] = '0'; return (AT); } s[1] = '\0'; ungetc(c2, f); return (CHAR); } else if (c == '{') { return (LBRACE); } else if (c == '|') { return (VBAR); } else if (c == ';') { return (SEMI); } else if (c == '}') { return (RBRACE); } else if ((c == '"') && (!incomment)) { i = 1; for (;;) { c = fgetc(f); if (c == '\\') { s[i] = c; i += 1; s[i] = '\0'; c = fgetc(f); if ((!incodesection) && (c == '"')) { i -= 1; s[i] = c; i += 1; } else { s[i] = c; i += 1; s[i] = '\0'; } } else if (c == '"') { s[i] = c; i += 1; s[i] = '\0'; break; } else { s[i] = c; i += 1; s[i] = '\0'; } } return (STRING); } else { return (CHAR); } return (0); } static char hexdig[17] = "0123456789abcdef"; char * str_to_regexp(char *u) { /* Can't be used twice in the one printf */ static char ss[MAX_STRING]; char *s = ss; int c; *s++ = *u++; *s++ = '^'; for (;;) { c = *u++; if (c == '\0') break; if (c < 32 || c >= 127) { *s++ = '\\'; *s++ = 'x'; *s++ = hexdig[(c >> 4)&15]; *s++ = hexdig[c & 15]; } else { if (c == '\\' || ((c == '"') && (*u != '\0'))) *s++ = '\\'; *s++ = c; } } *s = '\0'; fprintf(regs, " {%s, NULL},\n", ss); sprintf(ss, "%d", regid++); return (ss); } char * string(char *u) { /* Can't be used twice in the one printf */ static char ss[MAX_STRING]; char *s = ss; int c; for (;;) { c = *u++; if (c == '\0') break; if (c < 32 || c >= 127) { *s++ = '\\'; *s++ = '\\'; *s++ = 'x'; *s++ = hexdig[(c >> 4)&15]; *s++ = hexdig[c & 15]; } else { if ((c == '\\') || (c == '"')) *s++ = '\\'; *s++ = c; } } *s = '\0'; return (ss); } int main(int argc, char **argv) { char mainentry[MAX_STRING], indent[MAX_STRING], trueh[MAX_STRING], parserfile[MAX_STRING], efile[MAX_STRING], regsfile[MAX_STRING], coverfile[MAX_STRING], cfile[MAX_STRING], hfile[MAX_STRING], s[1024], cur_name[32], rules[MAX_STRING]; FILE *gram, *code, *head, *temp, *coverage; toktype c; int bracecount, generation, rulecount, maxrules = 0; if (argc != 2) { fprintf(stderr, "syntax: tacc gramname\n"); exit(EXIT_FAILURE); } #ifdef __riscos sprintf(indent, "echo indent c.%s", argv[1]); sprintf(efile, "e.%s", argv[1]); sprintf(cfile, "c.%s", argv[1]); sprintf(hfile, "h.%s", argv[1]); sprintf(trueh, "%s.h", argv[1]); sprintf(parserfile, "%s", "TMPTMPTMP"); sprintf(regsfile, "%s", "TMPXXXTMP"); sprintf(coverfile, "%s", "TMPYYYTMP"); #else sprintf(indent, "echo indent %s.c", argv[1]); sprintf(efile, "%s.e", argv[1]); sprintf(cfile, "%s.c", argv[1]); sprintf(hfile, "%s.h", argv[1]); sprintf(trueh, "%s.h", argv[1]); sprintf(parserfile, "%s", "TMPTMP.TMP"); sprintf(regsfile, "%s", "TMPXXX.TMP"); sprintf(coverfile, "%s", "TMPYYY.TMP"); #endif gram = fopen(efile, "r"); code = fopen(cfile, "w"); head = fopen(hfile, "w"); temp = fopen(parserfile, "w"); regs = fopen(regsfile, "w"); coverage = fopen(coverfile, "w"); if (gram == NULL || code == NULL || head == NULL || coverage == NULL) { fprintf(stderr, "File problems\n"); exit(EXIT_FAILURE); } mainentry[0] = '\0'; fprintf(code, "#include <stdio.h>\n"); fprintf(code, "#include <string.h>\n"); fprintf(code, "#include <stdlib.h>\n"); fprintf(code, "#include <ctype.h>\n"); fprintf(code, "#ifdef MEMDEBUG\n"); fprintf(code, "#include <mnemosyn.h>\n"); fprintf(code, "#endif\n"); fprintf(code, "#define TRUE (0==0)\n"); fprintf(code, "#define FALSE (0!=0)\n"); fprintf(code, "#include \"%s\"\n", "regexp.h"); fprintf(code, "#include \"%s\"\n", trueh); fprintf(code, "#ifdef NO_MAIN\n"); fprintf(code, "\nextern char **argv;\nextern int argc;\n\n"); fprintf(code, "\nextern char *yy_memptr;\n"); fprintf(code, "\nextern char *yy_END_OF_FILE = NULL;\n"); fprintf(code, "#else\n"); fprintf(code, "\nchar **argv = (char **)NULL;\nint argc = 0;\n\n"); fprintf(code, "\nchar *yy_memptr = NULL;\n"); fprintf(code, "\nchar *yy_END_OF_FILE = NULL;\n"); fprintf(code, "#endif /* MAIN */\n"); fprintf(code, "#include \"%s\"\n", "taccutil.h"); c = token(gram, s, FALSE, FALSE); for (;;) { static int dumped = FALSE; switch (c) { case CODE: for (;;) { c = token(gram, s, FALSE, FALSE); if (c == ENDCODE) break; fputs(s, head); } if (!dumped) { dumped = TRUE; /* Now obsolete - in a header file */ } break; case OPENCOMMENT: fputs(s, code); for (;;) { c = token(gram, s, TRUE, FALSE); /* Strings handled * differently in comments */ fputs(s, code); if (c == CLOSECOMMENT) break; } break; case RBRACE: fputs(s, code); fprintf(stderr, "Unexpected rbrace?\n"); break; case TAG: case CHAR: fputs(s, code); break; case LABEL: fprintf(stderr, "%s\n", s); generation = 1; rulecount = 0; strcpy(cur_name, s); cur_name[strlen(s) - 1] = '\0'; if (*mainentry == '\0') strcpy(mainentry, cur_name); fprintf(temp, "\nint %s_parse(YYTYPE **__arg)\n{\n", cur_name); fprintf(temp, "YYTYPE __temp;\n"); fprintf(temp, "int i;\n"); fprintf(temp, "long __pos;\n\n"); fprintf(temp, "void *__stack;\n\n"); fprintf(temp, "*__arg = NULL; /* Initialise in case of failure */\n"); fprintf(temp, "/*if (!_optimise)*/ for (i = 0; i < MAX_RULES; i++) __temp.execute.arg[i] = NULL;\n"); fprintf(temp, "if (!_optimise) debug_enter(\"%s_parse - trying option 1\");\n", cur_name); fprintf(temp, "__stack = stackmark();\n"); fprintf(temp, "if (note_backtrack(&__pos)\n"); *rules = '\0'; break; case STRING: rulecount += 1; if (rulecount > maxrules) maxrules = rulecount; fprintf(temp, "&& _regexp_parse(savepat, %s, &__temp.execute.arg[%d])\n", str_to_regexp(s), rulecount); strcat(rules, string(s)); strcat(rules, " "); break; case RULE: rulecount += 1; s[strlen(s) - 1] = '\0'; if (strncmp(s, "<!", 2) == 0) { fprintf(temp, "&& not(%s_parse,&__temp.execute.arg[%d]) /* Lazy eval */\n", &s[2], rulecount); } else if (strncmp(s, "<?", 2) == 0) { fprintf(temp, "&& is(%s_parse,&__temp.execute.arg[%d]) /* Lazy eval */\n", &s[2], rulecount); } else { fprintf(temp, "&& %s_parse(&__temp.execute.arg[%d])\n", &s[1], rulecount); } if (rulecount > maxrules) maxrules = rulecount; strcat(rules, s); strcat(rules, "> "); break; case SEMI: fprintf(temp, ") {\n"); fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse found option %d -- %s\");\n", cur_name, generation, rules); fprintf(temp, " *__arg = makeparsetree(__temp, %s_%d, %d, \"%s\", __LINE__);\n", cur_name, generation, rulecount, cur_name); fprintf(temp, " return(TRUE);\n"); fprintf(temp, " }\n"); fprintf(temp, " do_backtrack(__pos);\n"); fprintf(temp, " stackrelease(__stack);\n"); fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse failed\");\n", cur_name); fprintf(temp, " return(FALSE);\n"); fprintf(temp, "}\n"); break; case VBAR: *rules = '\0'; fprintf(temp, ") {\n"); fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse found option %d -- %s\");\n", cur_name, generation, rules); fprintf(temp, " *__arg = makeparsetree(__temp, %s_%d, %d, \"%s\", __LINE__);\n", cur_name, generation, rulecount, cur_name); fprintf(temp, " return(TRUE);\n"); fprintf(temp, " }\n"); fprintf(temp, " do_backtrack(__pos);\n"); fprintf(temp, " stackrelease(__stack);\n"); generation += 1; fprintf(temp, " /*if (!_optimise)*/ for (i = 0; i < MAX_RULES; i++) __temp.execute.arg[i] = NULL;\n"); fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse - trying option %d\"); lev += 1;\n", cur_name, generation); fprintf(temp, "\nif (note_backtrack(&__pos)\n"); rulecount = 0; break; case LBRACE: fprintf(code, "\nint %s_%d(YYTYPE *__arg)\n{\nint __debug = SEEN_%s_%d = debug_execute_enter(", cur_name, generation, cur_name, generation); fprintf(code, "\"Executing procedure %s_%d -- arg at %%p\", __arg);\n", cur_name, generation); fprintf(head, "\nint %s_%d(YYTYPE *__arg);\n", cur_name, generation); if (generation == 1) { fprintf(head, "\nint %s_parse(YYTYPE **__arg);\n", cur_name); } fprintf(head, "static int SEEN_%s_%d = FALSE;\n", cur_name, generation); fprintf(coverage, "if ((cover_testing) && (!SEEN_%s_%d)) LogWarning(ProgName, \"%s\", 0, \"Alternative %d of rule %s not tested\\n\");\n", cur_name, generation, efile, generation, cur_name); bracecount = 1; for (;;) { c = token(gram, s, FALSE, TRUE); /* Allow \" through as \" */ if (c == OPENCOMMENT) { fputs(s, code); for (;;) { c = token(gram, s, TRUE, FALSE); /* Strings handled * differently in * comments */ if (c == CLOSECOMMENT) break; fputs(s, code); } fputs(s, code); } else if (c == LBRACE) { bracecount += 1; fputs(s, code); } else if (c == RBRACE) { bracecount -= 1; if (bracecount == 0) { fprintf(code, "\nif (!_optimise) debug_execute_exit(\"%s_%d\");\nreturn(TRUE);\n", cur_name, generation); } fputs(s, code); } else if (c == DOLLAR) { /* removed '*' from (USERTYPE *) below */ fprintf(code, "((USERTYPE)(__arg->execute.arg[%ld]->execute.user_value))", atol(&s[1])); } else if (c == AT) { fprintf(code, "__arg->execute.arg[%ld]->execute", atol(&s[1])); } else { /* Assume STRING? */ fputs(s, code); } if (bracecount == 0) break; } break; case EOF: break; default: fprintf(stderr, "Forgot %d!!!\n", c); fputs(s, code); break; } if (c == EOF) break; c = token(gram, s, FALSE, FALSE); } fclose(gram); fclose(coverage); fprintf(temp, "\n\nstatic int cover_test(void)\n"); fprintf(temp, "{\n"); coverage = fopen(coverfile, "r"); for (;;) { c = fgetc(coverage); if (c == EOF) break; fputc(c, temp); } fclose(coverage); fprintf(temp, "}\n"); fprintf(temp, "#ifndef NO_MAIN /* Get this by default */\n"); fprintf(temp, "\nint main(int local_argc, char **local_argv)\n"); fprintf(temp, "{\n"); fprintf(temp, "YYTYPE *root; /* Or could be a single YYTYPE, plus an explicit makeparsetree() */\n"); fprintf(temp, "int i;\n"); fprintf(temp, " argv = local_argv; argc = local_argc;\n"); fprintf(temp, " /* Should open file yyin here; do it in first entry" " routine *only* if\n it recognises the null phrase \"\" */\n"); fprintf(temp, " if (%s_parse(&root)) {\n", mainentry); fprintf(temp, " dump_parsetree(root);\n"); fprintf(temp, " execute_parsetree(root);\n"); fprintf(temp, " } else {\n"); fprintf(temp, " fprintf(stderr, \"Parse fail\\n\");\n"); fprintf(temp, " for (i = 0; i < %d; i++) if (savepat[i].r != NULL) free(savepat[i].r);\n", regid); fprintf(temp, " exit(EXIT_FAILURE);\n"); fprintf(temp, " }\n"); fprintf(temp, " /*free_parsetree(root);*/ /*free(root);*/\n"); fprintf(temp, " cover_test();\n"); fprintf(temp, " exit(EXIT_SUCCESS);\n"); fprintf(temp, "}\n"); fprintf(temp, "#endif /* not NO_MAIN */\n"); fprintf(temp, "\nstatic void _regexp_freeall(void) {\n"); fprintf(temp, "int i;\n"); fprintf(temp, " for (i = 0; i < %d; i++) if (savepat[i].r != NULL) free(savepat[i].r);\n", regid); fprintf(temp, "}\n"); fclose(temp); fclose(regs); temp = fopen(parserfile, "r"); for (;;) { c = fgetc(temp); if (c == EOF) break; fputc(c, code); } fclose(temp); fprintf(head, "\n#define MAX_RULES %d\n", maxrules + 1); fclose(code); temp = fopen(regsfile, "r"); fprintf(head, "static patpair savepat[%d] = {\n", regid); for (;;) { c = fgetc(temp); if (c == EOF) break; fputc(c, head); } fprintf(head, "};\n"); fclose(temp); fclose(head); fprintf(stderr, "System: %s\n", indent); system(indent); remove(parserfile); remove(regsfile); remove(coverfile); return (EXIT_SUCCESS); }