%{ /* Wordroff.l */ /* Bug: attspell defines these as words: AA00021 vangogh.berkeley.edu Removing roff gunk. Any line starting with a "." or "'" .if t .if n .ds XX .de XX .nr XX Skip the following directives within text: \fX \f(XY \s+n \s-n \(XX \f[XXXXXXXXXXXX] \*[XXXXXXXXXXXX] \*( \*XXX */ #include #include #include /*extern YY_CHAR *yytext;*/ #define EOFTOK 0 #define WHITESPACE 256 #define DIRECTIVE 257 #define NEWLINE 259 #define WORD 260 #define COMMENT 261 int yylex(void); static int ignore; static int cur_line = 1, cur_col = 1; /* 'real-world' conventions */ %} %% \\f[a-zA-Z0-9] { return(WHITESPACE); /* \fX */ } \\f\([a-zA-Z0-9][a-zA-Z0-9] { return(WHITESPACE); /* \f(XY */ } \\s[\-+] { return(WHITESPACE); /* \s+n \s-n */ } \\\([a-zA-Z0-9][a-zA-Z0-9] { return(WHITESPACE); /* \(XX */ } \\[f\*]\[(a-zA-Z0-9\-)*\] { return(WHITESPACE); /* \f[XXXXXX] \*[XXXXX] */ } \\\*\( { return(WHITESPACE); /* \*( */ } \\\*[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9] { /* ********** the missing \ before the * above was the bug! */ return(WHITESPACE); /* \*XXX */ } [a-zA-Z](\-[a-zA-Z]|[a-zA-Z'])+ { /* '+' rather than '*' above means single-letter words are ignored! It's pretty good---it handles dashes like these! */ return(WORD); } \n[\.\'][a-zA-Z][a-zA-Z][\11\040-\377]* { return(COMMENT); } \n { return(NEWLINE); } [\1-\177] { return(WHITESPACE); } %% void output_word(char *word, int line, int col, char *fileinf) { char *p = word+strlen(word)-1; while ((*p == '\'') || (*p == '-')) *p-- = '\0'; /*printf("%sLine %d, Col %d: %s\n", fileinf, line, col, word);*/ printf("%s\n", word); fflush(stdout); } int main(int argc, char **argv) { char fname[256]; int token; if (argc == 2) { if (freopen(argv[1], "r", stdin) == NULL) { fprintf(stderr, "wordroff: cannot open input file %s\n", argv[1]); exit(0); } sprintf(fname, "\"%s\", ", argv[1]); } else { *fname = '\0'; } ignore = (0!=0); while ((token = yylex()) != 0) { if (token == COMMENT) ignore = (0==0); else if (token == NEWLINE) { ignore = (0!=0); cur_line += 1; cur_col = 0; } else if (token == WORD) { if (!ignore) output_word(yytext, cur_line, cur_col, fname); } cur_col += strlen(yytext); } }