/* BUG?: If a rule is empty ("") the @1.text value is not plugged properly? */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#define EXIT_SUCCESS 0
#endif

#define TRUE (0==0)
#define FALSE (0!=0)

#define MAX_STRING 512

/***
Making toktype an enum means that it can't contain EOF :-(
***/
typedef enum {
    ENDCODE, CODE, CHAR, LABEL, TAG, STRING,
    OPENCOMMENT, CLOSECOMMENT, LBRACE, RBRACE,
    RULE, VBAR, SEMI, DOLLAR, AT
}               toktype;
/***
#define toktype int
***/
static int regid = 0;
static FILE *regs = NULL;


toktype 
token(FILE * f, char *s, int incomment, int incodesection)
{
    int             i, c, c2;
    s[1] = '\0';
    s[0] = c = fgetc(f);
    if (c == EOF)
        return (EOF);
    if (c == '%') {
        s[2] = '\0';
        s[1] = c2 = fgetc(f);
        if (c2 == '{') {
            return (CODE);
        } else if (c2 == '}') {
            return (ENDCODE);
        }
        ungetc(c2, f);
        return (CHAR);
    } else if (isalpha(c)) {
        i = 1;
        for (;;) {
            c = fgetc(f);
            if (!isalpha(c))
                break;
            s[i] = c;
            i += 1;
            s[i] = '\0';
        }
        if (c != ':') {
            ungetc(c, f);
            return (TAG);
        }
        s[i] = c;
        i += 1;
        s[i] = '\0';
        return (LABEL);
    } else if (c == '<') {
        i = 1;
        for (;;) {
            c = fgetc(f);
            if (!(isalpha(c) || (c == '!' || c == '?')))
                break;
            s[i] = c;
            i += 1;
            s[i] = '\0';
        }
        if (c != '>') {
            ungetc(c, f);
            return (TAG);
        }
        s[i] = c;
        i += 1;
        s[i] = '\0';
        return (RULE);
    } else if (c == '/') {
        s[2] = '\0';
        s[1] = c2 = fgetc(f);
        if (c2 == '*') {
            return (OPENCOMMENT);
        }
        s[1] = '\0';
        ungetc(c2, f);
        return (CHAR);
    } else if (c == '*') {
        s[2] = '\0';
        s[1] = c2 = fgetc(f);
        if (c2 == '/') {
            return (CLOSECOMMENT);
        }
        s[1] = '\0';
        ungetc(c2, f);
        return (CHAR);
    } else if (c == '$') {
        s[2] = '\0';
        s[1] = c2 = fgetc(f);
        if (c2 == '$' || isdigit(c2)) {
            if (c2 == '$')
                s[1] = '0';
            return (DOLLAR);
        }
        s[1] = '\0';
        ungetc(c2, f);
        return (CHAR);
    } else if (c == '@') {
        s[2] = '\0';
        s[1] = c2 = fgetc(f);
        if (isdigit(c2)) {
            if ((c2 == '$') || (c2 == '@'))
                s[1] = '0';
            return (AT);
        }
        s[1] = '\0';
        ungetc(c2, f);
        return (CHAR);
    } else if (c == '{') {
        return (LBRACE);
    } else if (c == '|') {
        return (VBAR);
    } else if (c == ';') {
        return (SEMI);
    } else if (c == '}') {
        return (RBRACE);
    } else if ((c == '"') && (!incomment)) {
        i = 1;
        for (;;) {
            c = fgetc(f);
            if (c == '\\') {
                s[i] = c;
                i += 1;
                s[i] = '\0';
                c = fgetc(f);
                if ((!incodesection) && (c == '"')) {
                  i -= 1; s[i] = c; i += 1;
                } else {
                  s[i] = c;
                  i += 1;
                  s[i] = '\0';
                }
            } else if (c == '"') {
                s[i] = c;
                i += 1;
                s[i] = '\0';
                break;
            } else {
                s[i] = c;
                i += 1;
                s[i] = '\0';
            }
        }
        return (STRING);
    } else {
        return (CHAR);
    }
    return (0);
}
static char hexdig[17] = "0123456789abcdef";

char           *
str_to_regexp(char *u)
{                               /* Can't be used twice in the one printf */
    static char     ss[MAX_STRING];
    char           *s = ss;
    int             c;
    *s++ = *u++;
    *s++ = '^';
    for (;;) {
        c = *u++;
        if (c == '\0')
            break;
        if (c < 32 || c >= 127) {
          *s++ = '\\'; *s++ = 'x'; *s++ = hexdig[(c >> 4)&15]; *s++ = hexdig[c & 15];
        } else {
          if (c == '\\' || ((c == '"') && (*u != '\0')))
              *s++ = '\\';
          *s++ = c;
        }
    }
    *s = '\0';
    fprintf(regs, "   {%s, NULL},\n", ss);
    sprintf(ss, "%d", regid++);
    return (ss);
}

char           *
string(char *u)
{                               /* Can't be used twice in the one printf */
    static char     ss[MAX_STRING];
    char           *s = ss;
    int             c;
    for (;;) {
        c = *u++;
        if (c == '\0')
            break;
        if (c < 32 || c >= 127) {
          *s++ = '\\'; *s++ = '\\'; *s++ = 'x'; *s++ = hexdig[(c >> 4)&15]; *s++ = hexdig[c & 15];
        } else {
          if ((c == '\\') || (c == '"'))
              *s++ = '\\';
          *s++ = c;
        }
    }
    *s = '\0';
    return (ss);
}

int 
main(int argc, char **argv)
{
    char            mainentry[MAX_STRING], indent[MAX_STRING], trueh[MAX_STRING], parserfile[MAX_STRING], efile[MAX_STRING],
                    regsfile[MAX_STRING], coverfile[MAX_STRING], cfile[MAX_STRING], hfile[MAX_STRING], s[1024], cur_name[32],
                    rules[MAX_STRING];
    FILE           *gram, *code, *head, *temp, *coverage;
    toktype         c;
    int             bracecount, generation, rulecount, maxrules = 0;
    if (argc != 2) {
        fprintf(stderr, "syntax: tacc gramname\n");
        exit(EXIT_FAILURE);
    }
#ifdef __riscos
    sprintf(indent, "echo indent c.%s", argv[1]);
    sprintf(efile, "e.%s", argv[1]);
    sprintf(cfile, "c.%s", argv[1]);
    sprintf(hfile, "h.%s", argv[1]);
    sprintf(trueh, "%s.h", argv[1]);
    sprintf(parserfile, "%s", "TMPTMPTMP");
    sprintf(regsfile, "%s", "TMPXXXTMP");
    sprintf(coverfile, "%s", "TMPYYYTMP");
#else
    sprintf(indent, "echo indent %s.c", argv[1]);
    sprintf(efile, "%s.e", argv[1]);
    sprintf(cfile, "%s.c", argv[1]);
    sprintf(hfile, "%s.h", argv[1]);
    sprintf(trueh, "%s.h", argv[1]);
    sprintf(parserfile, "%s", "TMPTMP.TMP");
    sprintf(regsfile, "%s", "TMPXXX.TMP");
    sprintf(coverfile, "%s", "TMPYYY.TMP");
#endif
    gram = fopen(efile, "r");
    code = fopen(cfile, "w");
    head = fopen(hfile, "w");
    temp = fopen(parserfile, "w");
    regs = fopen(regsfile, "w");
    coverage = fopen(coverfile, "w");
    if (gram == NULL || code == NULL || head == NULL || coverage == NULL) {
        fprintf(stderr, "File problems\n");
        exit(EXIT_FAILURE);
    }
    mainentry[0] = '\0';
    fprintf(code, "#include <stdio.h>\n");
    fprintf(code, "#include <string.h>\n");
    fprintf(code, "#include <stdlib.h>\n");
    fprintf(code, "#include <ctype.h>\n");
    fprintf(code, "#ifdef MEMDEBUG\n");
    fprintf(code, "#include <mnemosyn.h>\n");
    fprintf(code, "#endif\n");
    fprintf(code, "#define  TRUE (0==0)\n");
    fprintf(code, "#define  FALSE (0!=0)\n");

    fprintf(code, "#include \"%s\"\n", "regexp.h");
    fprintf(code, "#include \"%s\"\n", trueh);

    fprintf(code, "#ifdef NO_MAIN\n");
    fprintf(code, "\nextern char **argv;\nextern int argc;\n\n");
    fprintf(code, "\nextern char *yy_memptr;\n");
    fprintf(code, "\nextern char *yy_END_OF_FILE = NULL;\n");
    fprintf(code, "#else\n");
    fprintf(code, "\nchar **argv = (char **)NULL;\nint argc = 0;\n\n");
    fprintf(code, "\nchar *yy_memptr = NULL;\n");
    fprintf(code, "\nchar *yy_END_OF_FILE = NULL;\n");
    fprintf(code, "#endif /* MAIN */\n");

    fprintf(code, "#include \"%s\"\n", "taccutil.h");
    c = token(gram, s, FALSE, FALSE);
    for (;;) {
        static int dumped = FALSE;
        switch (c) {
        case CODE:
            for (;;) {
                c = token(gram, s, FALSE, FALSE);
                if (c == ENDCODE)
                    break;
                fputs(s, head);
            }
            if (!dumped) {
              dumped = TRUE; /* Now obsolete - in a header file */
            }
            break;

        case OPENCOMMENT:
            fputs(s, code);
            for (;;) {
                c = token(gram, s, TRUE, FALSE);       /* Strings handled
                                                 * differently in comments */
                fputs(s, code);
                if (c == CLOSECOMMENT)
                    break;
            }
            break;

        case RBRACE:
            fputs(s, code);
            fprintf(stderr, "Unexpected rbrace?\n");
            break;

        case TAG:
        case CHAR:
            fputs(s, code);
            break;
        case LABEL:
            fprintf(stderr, "%s\n", s);
            generation = 1;
            rulecount = 0;
            strcpy(cur_name, s);
            cur_name[strlen(s) - 1] = '\0';
            if (*mainentry == '\0')
                strcpy(mainentry, cur_name);
            fprintf(temp, "\nint %s_parse(YYTYPE **__arg)\n{\n", cur_name);
            fprintf(temp, "YYTYPE __temp;\n");
            fprintf(temp, "int i;\n");
            fprintf(temp, "long __pos;\n\n");
            fprintf(temp, "void *__stack;\n\n");
            fprintf(temp, "*__arg = NULL; /* Initialise in case of failure */\n");
            fprintf(temp, "/*if (!_optimise)*/ for (i = 0; i < MAX_RULES; i++) __temp.execute.arg[i] = NULL;\n");
            fprintf(temp, "if (!_optimise) debug_enter(\"%s_parse - trying option 1\");\n", cur_name);
            fprintf(temp, "__stack = stackmark();\n");
            fprintf(temp, "if (note_backtrack(&__pos)\n");
            *rules = '\0';
            break;
        case STRING:
            rulecount += 1;
            if (rulecount > maxrules)
                maxrules = rulecount;
            fprintf(temp, "&& _regexp_parse(savepat, %s, &__temp.execute.arg[%d])\n",
                    str_to_regexp(s), rulecount);
            strcat(rules, string(s));
            strcat(rules, " ");
            break;

        case RULE:
            rulecount += 1;
            s[strlen(s) - 1] = '\0';
            if (strncmp(s, "<!", 2) == 0) {
               fprintf(temp, "&& not(%s_parse,&__temp.execute.arg[%d]) /* Lazy eval */\n", &s[2], rulecount);
            } else if (strncmp(s, "<?", 2) == 0) {
               fprintf(temp, "&& is(%s_parse,&__temp.execute.arg[%d]) /* Lazy eval */\n", &s[2], rulecount);
            } else {
               fprintf(temp, "&& %s_parse(&__temp.execute.arg[%d])\n", &s[1], rulecount);
            }
            if (rulecount > maxrules)
                maxrules = rulecount;
            strcat(rules, s);
            strcat(rules, "> ");
            break;

        case SEMI:
            fprintf(temp, ")  {\n");
            fprintf(temp, "      if (!_optimise) debug_exit(\"%s_parse found option %d -- %s\");\n",
                    cur_name, generation, rules);
            fprintf(temp, "      *__arg = makeparsetree(__temp, %s_%d, %d, \"%s\", __LINE__);\n",
                    cur_name, generation, rulecount, cur_name);
            fprintf(temp, "      return(TRUE);\n");
            fprintf(temp, "   }\n");
            fprintf(temp, "   do_backtrack(__pos);\n");
            fprintf(temp, "   stackrelease(__stack);\n");
            fprintf(temp, "   if (!_optimise) debug_exit(\"%s_parse failed\");\n", cur_name);
            fprintf(temp, "   return(FALSE);\n");
            fprintf(temp, "}\n");
            break;

        case VBAR:
            *rules = '\0';
            fprintf(temp, ")  {\n");
            fprintf(temp, "      if (!_optimise) debug_exit(\"%s_parse found option %d -- %s\");\n",
                    cur_name, generation, rules);
            fprintf(temp, "      *__arg = makeparsetree(__temp, %s_%d, %d, \"%s\", __LINE__);\n",
                    cur_name, generation, rulecount, cur_name);
            fprintf(temp, "      return(TRUE);\n");
            fprintf(temp, "   }\n");
            fprintf(temp, "   do_backtrack(__pos);\n");
            fprintf(temp, "   stackrelease(__stack);\n");
            generation += 1;
            fprintf(temp, "   /*if (!_optimise)*/ for (i = 0; i < MAX_RULES; i++) __temp.execute.arg[i] = NULL;\n");
            fprintf(temp, "   if (!_optimise) debug_exit(\"%s_parse - trying option %d\"); lev += 1;\n",
                    cur_name, generation);
            fprintf(temp, "\nif (note_backtrack(&__pos)\n");
            rulecount = 0;
            break;

        case LBRACE:
            fprintf(code, "\nint %s_%d(YYTYPE *__arg)\n{\nint __debug = SEEN_%s_%d = debug_execute_enter(",
                    cur_name, generation, cur_name, generation);
            fprintf(code, "\"Executing procedure %s_%d -- arg at %%p\", __arg);\n",
                    cur_name, generation);
            fprintf(head, "\nint %s_%d(YYTYPE *__arg);\n", cur_name, generation);
            if (generation == 1) {
                fprintf(head, "\nint %s_parse(YYTYPE **__arg);\n", cur_name);
            }
            fprintf(head, "static int SEEN_%s_%d = FALSE;\n", cur_name, generation);
            fprintf(coverage, "if ((cover_testing) && (!SEEN_%s_%d)) LogWarning(ProgName, \"%s\", 0, \"Alternative %d of rule %s not tested\\n\");\n", cur_name, generation, efile, generation, cur_name);
            bracecount = 1;
            for (;;) {
                c = token(gram, s, FALSE, TRUE); /* Allow \" through as \" */
                if (c == OPENCOMMENT) {
                    fputs(s, code);
                    for (;;) {
                        c = token(gram, s, TRUE, FALSE);       /* Strings handled
                                                         * differently in
                                                         * comments */
                        if (c == CLOSECOMMENT)
                            break;
                        fputs(s, code);
                    }
                    fputs(s, code);
                } else if (c == LBRACE) {
                    bracecount += 1;
                    fputs(s, code);
                } else if (c == RBRACE) {
                    bracecount -= 1;
                    if (bracecount == 0) {
                        fprintf(code, "\nif (!_optimise) debug_execute_exit(\"%s_%d\");\nreturn(TRUE);\n", cur_name, generation);
                    }
                    fputs(s, code);
                } else if (c == DOLLAR) { /* removed '*' from (USERTYPE *) below */
                    fprintf(code, "((USERTYPE)(__arg->execute.arg[%ld]->execute.user_value))", atol(&s[1]));
                } else if (c == AT) {
                    fprintf(code, "__arg->execute.arg[%ld]->execute", atol(&s[1]));
                } else { /* Assume STRING? */
                    fputs(s, code);
                }
                if (bracecount == 0)
                    break;
            }
            break;

        case EOF:
            break;
        default:
            fprintf(stderr, "Forgot %d!!!\n", c);
            fputs(s, code);
            break;
        }
        if (c == EOF)
            break;
        c = token(gram, s, FALSE, FALSE);
    }
    fclose(gram);
    fclose(coverage);
    fprintf(temp, "\n\nstatic int cover_test(void)\n");
    fprintf(temp, "{\n");
    coverage = fopen(coverfile, "r");
    for (;;) {
        c = fgetc(coverage);
        if (c == EOF)
            break;
        fputc(c, temp);
    }
    fclose(coverage);
    fprintf(temp, "}\n");
    fprintf(temp, "#ifndef NO_MAIN  /* Get this by default */\n");
    fprintf(temp, "\nint main(int local_argc, char **local_argv)\n");
    fprintf(temp, "{\n");
    fprintf(temp, "YYTYPE *root; /* Or could be a single YYTYPE, plus an explicit makeparsetree() */\n");
    fprintf(temp, "int i;\n");
    fprintf(temp, "   argv = local_argv; argc = local_argc;\n");
    fprintf(temp, "   /* Should open file yyin here; do it in first entry"
      " routine *only* if\n   it recognises the null phrase \"\" */\n");
    fprintf(temp, "   if (%s_parse(&root)) {\n", mainentry);
    fprintf(temp, "      dump_parsetree(root);\n");
    fprintf(temp, "      execute_parsetree(root);\n");
    fprintf(temp, "   } else {\n");
    fprintf(temp, "      fprintf(stderr, \"Parse fail\\n\");\n");
    fprintf(temp, "      for (i = 0; i < %d; i++) if (savepat[i].r != NULL) free(savepat[i].r);\n", regid);
    fprintf(temp, "      exit(EXIT_FAILURE);\n");
    fprintf(temp, "   }\n");
    fprintf(temp, "   /*free_parsetree(root);*/ /*free(root);*/\n");
    fprintf(temp, "   cover_test();\n");
    fprintf(temp, "   exit(EXIT_SUCCESS);\n");
    fprintf(temp, "}\n");
    fprintf(temp, "#endif /* not NO_MAIN */\n");
    fprintf(temp, "\nstatic void _regexp_freeall(void) {\n");
    fprintf(temp, "int i;\n");
    fprintf(temp, "   for (i = 0; i < %d; i++) if (savepat[i].r != NULL) free(savepat[i].r);\n", regid);
    fprintf(temp, "}\n");
    fclose(temp);
    fclose(regs);
    temp = fopen(parserfile, "r");
    for (;;) {
        c = fgetc(temp);
        if (c == EOF)
            break;
        fputc(c, code);
    }
    fclose(temp);
    fprintf(head, "\n#define MAX_RULES %d\n", maxrules + 1);
    fclose(code);
    temp = fopen(regsfile, "r");
    fprintf(head, "static patpair savepat[%d] = {\n", regid);
    for (;;) {
        c = fgetc(temp);
        if (c == EOF)
            break;
        fputc(c, head);
    }
    fprintf(head, "};\n");
    fclose(temp);
    fclose(head);
    fprintf(stderr, "System: %s\n", indent);
    system(indent);
    remove(parserfile);
    remove(regsfile);
    remove(coverfile);
    return (EXIT_SUCCESS);
}