/* BUG?: If a rule is empty ("") the @1.text value is not plugged properly? */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#define EXIT_SUCCESS 0
#endif
#define TRUE (0==0)
#define FALSE (0!=0)
#define MAX_STRING 512
/***
Making toktype an enum means that it can't contain EOF :-(
***/
typedef enum {
ENDCODE, CODE, CHAR, LABEL, TAG, STRING,
OPENCOMMENT, CLOSECOMMENT, LBRACE, RBRACE,
RULE, VBAR, SEMI, DOLLAR, AT
} toktype;
/***
#define toktype int
***/
static int regid = 0;
static FILE *regs = NULL;
toktype
token(FILE * f, char *s, int incomment, int incodesection)
{
int i, c, c2;
s[1] = '\0';
s[0] = c = fgetc(f);
if (c == EOF)
return (EOF);
if (c == '%') {
s[2] = '\0';
s[1] = c2 = fgetc(f);
if (c2 == '{') {
return (CODE);
} else if (c2 == '}') {
return (ENDCODE);
}
ungetc(c2, f);
return (CHAR);
} else if (isalpha(c)) {
i = 1;
for (;;) {
c = fgetc(f);
if (!isalpha(c))
break;
s[i] = c;
i += 1;
s[i] = '\0';
}
if (c != ':') {
ungetc(c, f);
return (TAG);
}
s[i] = c;
i += 1;
s[i] = '\0';
return (LABEL);
} else if (c == '<') {
i = 1;
for (;;) {
c = fgetc(f);
if (!(isalpha(c) || (c == '!' || c == '?')))
break;
s[i] = c;
i += 1;
s[i] = '\0';
}
if (c != '>') {
ungetc(c, f);
return (TAG);
}
s[i] = c;
i += 1;
s[i] = '\0';
return (RULE);
} else if (c == '/') {
s[2] = '\0';
s[1] = c2 = fgetc(f);
if (c2 == '*') {
return (OPENCOMMENT);
}
s[1] = '\0';
ungetc(c2, f);
return (CHAR);
} else if (c == '*') {
s[2] = '\0';
s[1] = c2 = fgetc(f);
if (c2 == '/') {
return (CLOSECOMMENT);
}
s[1] = '\0';
ungetc(c2, f);
return (CHAR);
} else if (c == '$') {
s[2] = '\0';
s[1] = c2 = fgetc(f);
if (c2 == '$' || isdigit(c2)) {
if (c2 == '$')
s[1] = '0';
return (DOLLAR);
}
s[1] = '\0';
ungetc(c2, f);
return (CHAR);
} else if (c == '@') {
s[2] = '\0';
s[1] = c2 = fgetc(f);
if (isdigit(c2)) {
if ((c2 == '$') || (c2 == '@'))
s[1] = '0';
return (AT);
}
s[1] = '\0';
ungetc(c2, f);
return (CHAR);
} else if (c == '{') {
return (LBRACE);
} else if (c == '|') {
return (VBAR);
} else if (c == ';') {
return (SEMI);
} else if (c == '}') {
return (RBRACE);
} else if ((c == '"') && (!incomment)) {
i = 1;
for (;;) {
c = fgetc(f);
if (c == '\\') {
s[i] = c;
i += 1;
s[i] = '\0';
c = fgetc(f);
if ((!incodesection) && (c == '"')) {
i -= 1; s[i] = c; i += 1;
} else {
s[i] = c;
i += 1;
s[i] = '\0';
}
} else if (c == '"') {
s[i] = c;
i += 1;
s[i] = '\0';
break;
} else {
s[i] = c;
i += 1;
s[i] = '\0';
}
}
return (STRING);
} else {
return (CHAR);
}
return (0);
}
static char hexdig[17] = "0123456789abcdef";
char *
str_to_regexp(char *u)
{ /* Can't be used twice in the one printf */
static char ss[MAX_STRING];
char *s = ss;
int c;
*s++ = *u++;
*s++ = '^';
for (;;) {
c = *u++;
if (c == '\0')
break;
if (c < 32 || c >= 127) {
*s++ = '\\'; *s++ = 'x'; *s++ = hexdig[(c >> 4)&15]; *s++ = hexdig[c & 15];
} else {
if (c == '\\' || ((c == '"') && (*u != '\0')))
*s++ = '\\';
*s++ = c;
}
}
*s = '\0';
fprintf(regs, " {%s, NULL},\n", ss);
sprintf(ss, "%d", regid++);
return (ss);
}
char *
string(char *u)
{ /* Can't be used twice in the one printf */
static char ss[MAX_STRING];
char *s = ss;
int c;
for (;;) {
c = *u++;
if (c == '\0')
break;
if (c < 32 || c >= 127) {
*s++ = '\\'; *s++ = '\\'; *s++ = 'x'; *s++ = hexdig[(c >> 4)&15]; *s++ = hexdig[c & 15];
} else {
if ((c == '\\') || (c == '"'))
*s++ = '\\';
*s++ = c;
}
}
*s = '\0';
return (ss);
}
int
main(int argc, char **argv)
{
char mainentry[MAX_STRING], indent[MAX_STRING], trueh[MAX_STRING], parserfile[MAX_STRING], efile[MAX_STRING],
regsfile[MAX_STRING], coverfile[MAX_STRING], cfile[MAX_STRING], hfile[MAX_STRING], s[1024], cur_name[32],
rules[MAX_STRING];
FILE *gram, *code, *head, *temp, *coverage;
toktype c;
int bracecount, generation, rulecount, maxrules = 0;
if (argc != 2) {
fprintf(stderr, "syntax: tacc gramname\n");
exit(EXIT_FAILURE);
}
#ifdef __riscos
sprintf(indent, "echo indent c.%s", argv[1]);
sprintf(efile, "e.%s", argv[1]);
sprintf(cfile, "c.%s", argv[1]);
sprintf(hfile, "h.%s", argv[1]);
sprintf(trueh, "%s.h", argv[1]);
sprintf(parserfile, "%s", "TMPTMPTMP");
sprintf(regsfile, "%s", "TMPXXXTMP");
sprintf(coverfile, "%s", "TMPYYYTMP");
#else
sprintf(indent, "echo indent %s.c", argv[1]);
sprintf(efile, "%s.e", argv[1]);
sprintf(cfile, "%s.c", argv[1]);
sprintf(hfile, "%s.h", argv[1]);
sprintf(trueh, "%s.h", argv[1]);
sprintf(parserfile, "%s", "TMPTMP.TMP");
sprintf(regsfile, "%s", "TMPXXX.TMP");
sprintf(coverfile, "%s", "TMPYYY.TMP");
#endif
gram = fopen(efile, "r");
code = fopen(cfile, "w");
head = fopen(hfile, "w");
temp = fopen(parserfile, "w");
regs = fopen(regsfile, "w");
coverage = fopen(coverfile, "w");
if (gram == NULL || code == NULL || head == NULL || coverage == NULL) {
fprintf(stderr, "File problems\n");
exit(EXIT_FAILURE);
}
mainentry[0] = '\0';
fprintf(code, "#include <stdio.h>\n");
fprintf(code, "#include <string.h>\n");
fprintf(code, "#include <stdlib.h>\n");
fprintf(code, "#include <ctype.h>\n");
fprintf(code, "#ifdef MEMDEBUG\n");
fprintf(code, "#include <mnemosyn.h>\n");
fprintf(code, "#endif\n");
fprintf(code, "#define TRUE (0==0)\n");
fprintf(code, "#define FALSE (0!=0)\n");
fprintf(code, "#include \"%s\"\n", "regexp.h");
fprintf(code, "#include \"%s\"\n", trueh);
fprintf(code, "#ifdef NO_MAIN\n");
fprintf(code, "\nextern char **argv;\nextern int argc;\n\n");
fprintf(code, "\nextern char *yy_memptr;\n");
fprintf(code, "\nextern char *yy_END_OF_FILE = NULL;\n");
fprintf(code, "#else\n");
fprintf(code, "\nchar **argv = (char **)NULL;\nint argc = 0;\n\n");
fprintf(code, "\nchar *yy_memptr = NULL;\n");
fprintf(code, "\nchar *yy_END_OF_FILE = NULL;\n");
fprintf(code, "#endif /* MAIN */\n");
fprintf(code, "#include \"%s\"\n", "taccutil.h");
c = token(gram, s, FALSE, FALSE);
for (;;) {
static int dumped = FALSE;
switch (c) {
case CODE:
for (;;) {
c = token(gram, s, FALSE, FALSE);
if (c == ENDCODE)
break;
fputs(s, head);
}
if (!dumped) {
dumped = TRUE; /* Now obsolete - in a header file */
}
break;
case OPENCOMMENT:
fputs(s, code);
for (;;) {
c = token(gram, s, TRUE, FALSE); /* Strings handled
* differently in comments */
fputs(s, code);
if (c == CLOSECOMMENT)
break;
}
break;
case RBRACE:
fputs(s, code);
fprintf(stderr, "Unexpected rbrace?\n");
break;
case TAG:
case CHAR:
fputs(s, code);
break;
case LABEL:
fprintf(stderr, "%s\n", s);
generation = 1;
rulecount = 0;
strcpy(cur_name, s);
cur_name[strlen(s) - 1] = '\0';
if (*mainentry == '\0')
strcpy(mainentry, cur_name);
fprintf(temp, "\nint %s_parse(YYTYPE **__arg)\n{\n", cur_name);
fprintf(temp, "YYTYPE __temp;\n");
fprintf(temp, "int i;\n");
fprintf(temp, "long __pos;\n\n");
fprintf(temp, "void *__stack;\n\n");
fprintf(temp, "*__arg = NULL; /* Initialise in case of failure */\n");
fprintf(temp, "/*if (!_optimise)*/ for (i = 0; i < MAX_RULES; i++) __temp.execute.arg[i] = NULL;\n");
fprintf(temp, "if (!_optimise) debug_enter(\"%s_parse - trying option 1\");\n", cur_name);
fprintf(temp, "__stack = stackmark();\n");
fprintf(temp, "if (note_backtrack(&__pos)\n");
*rules = '\0';
break;
case STRING:
rulecount += 1;
if (rulecount > maxrules)
maxrules = rulecount;
fprintf(temp, "&& _regexp_parse(savepat, %s, &__temp.execute.arg[%d])\n",
str_to_regexp(s), rulecount);
strcat(rules, string(s));
strcat(rules, " ");
break;
case RULE:
rulecount += 1;
s[strlen(s) - 1] = '\0';
if (strncmp(s, "<!", 2) == 0) {
fprintf(temp, "&& not(%s_parse,&__temp.execute.arg[%d]) /* Lazy eval */\n", &s[2], rulecount);
} else if (strncmp(s, "<?", 2) == 0) {
fprintf(temp, "&& is(%s_parse,&__temp.execute.arg[%d]) /* Lazy eval */\n", &s[2], rulecount);
} else {
fprintf(temp, "&& %s_parse(&__temp.execute.arg[%d])\n", &s[1], rulecount);
}
if (rulecount > maxrules)
maxrules = rulecount;
strcat(rules, s);
strcat(rules, "> ");
break;
case SEMI:
fprintf(temp, ") {\n");
fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse found option %d -- %s\");\n",
cur_name, generation, rules);
fprintf(temp, " *__arg = makeparsetree(__temp, %s_%d, %d, \"%s\", __LINE__);\n",
cur_name, generation, rulecount, cur_name);
fprintf(temp, " return(TRUE);\n");
fprintf(temp, " }\n");
fprintf(temp, " do_backtrack(__pos);\n");
fprintf(temp, " stackrelease(__stack);\n");
fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse failed\");\n", cur_name);
fprintf(temp, " return(FALSE);\n");
fprintf(temp, "}\n");
break;
case VBAR:
*rules = '\0';
fprintf(temp, ") {\n");
fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse found option %d -- %s\");\n",
cur_name, generation, rules);
fprintf(temp, " *__arg = makeparsetree(__temp, %s_%d, %d, \"%s\", __LINE__);\n",
cur_name, generation, rulecount, cur_name);
fprintf(temp, " return(TRUE);\n");
fprintf(temp, " }\n");
fprintf(temp, " do_backtrack(__pos);\n");
fprintf(temp, " stackrelease(__stack);\n");
generation += 1;
fprintf(temp, " /*if (!_optimise)*/ for (i = 0; i < MAX_RULES; i++) __temp.execute.arg[i] = NULL;\n");
fprintf(temp, " if (!_optimise) debug_exit(\"%s_parse - trying option %d\"); lev += 1;\n",
cur_name, generation);
fprintf(temp, "\nif (note_backtrack(&__pos)\n");
rulecount = 0;
break;
case LBRACE:
fprintf(code, "\nint %s_%d(YYTYPE *__arg)\n{\nint __debug = SEEN_%s_%d = debug_execute_enter(",
cur_name, generation, cur_name, generation);
fprintf(code, "\"Executing procedure %s_%d -- arg at %%p\", __arg);\n",
cur_name, generation);
fprintf(head, "\nint %s_%d(YYTYPE *__arg);\n", cur_name, generation);
if (generation == 1) {
fprintf(head, "\nint %s_parse(YYTYPE **__arg);\n", cur_name);
}
fprintf(head, "static int SEEN_%s_%d = FALSE;\n", cur_name, generation);
fprintf(coverage, "if ((cover_testing) && (!SEEN_%s_%d)) LogWarning(ProgName, \"%s\", 0, \"Alternative %d of rule %s not tested\\n\");\n", cur_name, generation, efile, generation, cur_name);
bracecount = 1;
for (;;) {
c = token(gram, s, FALSE, TRUE); /* Allow \" through as \" */
if (c == OPENCOMMENT) {
fputs(s, code);
for (;;) {
c = token(gram, s, TRUE, FALSE); /* Strings handled
* differently in
* comments */
if (c == CLOSECOMMENT)
break;
fputs(s, code);
}
fputs(s, code);
} else if (c == LBRACE) {
bracecount += 1;
fputs(s, code);
} else if (c == RBRACE) {
bracecount -= 1;
if (bracecount == 0) {
fprintf(code, "\nif (!_optimise) debug_execute_exit(\"%s_%d\");\nreturn(TRUE);\n", cur_name, generation);
}
fputs(s, code);
} else if (c == DOLLAR) { /* removed '*' from (USERTYPE *) below */
fprintf(code, "((USERTYPE)(__arg->execute.arg[%ld]->execute.user_value))", atol(&s[1]));
} else if (c == AT) {
fprintf(code, "__arg->execute.arg[%ld]->execute", atol(&s[1]));
} else { /* Assume STRING? */
fputs(s, code);
}
if (bracecount == 0)
break;
}
break;
case EOF:
break;
default:
fprintf(stderr, "Forgot %d!!!\n", c);
fputs(s, code);
break;
}
if (c == EOF)
break;
c = token(gram, s, FALSE, FALSE);
}
fclose(gram);
fclose(coverage);
fprintf(temp, "\n\nstatic int cover_test(void)\n");
fprintf(temp, "{\n");
coverage = fopen(coverfile, "r");
for (;;) {
c = fgetc(coverage);
if (c == EOF)
break;
fputc(c, temp);
}
fclose(coverage);
fprintf(temp, "}\n");
fprintf(temp, "#ifndef NO_MAIN /* Get this by default */\n");
fprintf(temp, "\nint main(int local_argc, char **local_argv)\n");
fprintf(temp, "{\n");
fprintf(temp, "YYTYPE *root; /* Or could be a single YYTYPE, plus an explicit makeparsetree() */\n");
fprintf(temp, "int i;\n");
fprintf(temp, " argv = local_argv; argc = local_argc;\n");
fprintf(temp, " /* Should open file yyin here; do it in first entry"
" routine *only* if\n it recognises the null phrase \"\" */\n");
fprintf(temp, " if (%s_parse(&root)) {\n", mainentry);
fprintf(temp, " dump_parsetree(root);\n");
fprintf(temp, " execute_parsetree(root);\n");
fprintf(temp, " } else {\n");
fprintf(temp, " fprintf(stderr, \"Parse fail\\n\");\n");
fprintf(temp, " for (i = 0; i < %d; i++) if (savepat[i].r != NULL) free(savepat[i].r);\n", regid);
fprintf(temp, " exit(EXIT_FAILURE);\n");
fprintf(temp, " }\n");
fprintf(temp, " /*free_parsetree(root);*/ /*free(root);*/\n");
fprintf(temp, " cover_test();\n");
fprintf(temp, " exit(EXIT_SUCCESS);\n");
fprintf(temp, "}\n");
fprintf(temp, "#endif /* not NO_MAIN */\n");
fprintf(temp, "\nstatic void _regexp_freeall(void) {\n");
fprintf(temp, "int i;\n");
fprintf(temp, " for (i = 0; i < %d; i++) if (savepat[i].r != NULL) free(savepat[i].r);\n", regid);
fprintf(temp, "}\n");
fclose(temp);
fclose(regs);
temp = fopen(parserfile, "r");
for (;;) {
c = fgetc(temp);
if (c == EOF)
break;
fputc(c, code);
}
fclose(temp);
fprintf(head, "\n#define MAX_RULES %d\n", maxrules + 1);
fclose(code);
temp = fopen(regsfile, "r");
fprintf(head, "static patpair savepat[%d] = {\n", regid);
for (;;) {
c = fgetc(temp);
if (c == EOF)
break;
fputc(c, head);
}
fprintf(head, "};\n");
fclose(temp);
fclose(head);
fprintf(stderr, "System: %s\n", indent);
system(indent);
remove(parserfile);
remove(regsfile);
remove(coverfile);
return (EXIT_SUCCESS);
}