// FOR DOCUMENTATION, SEE MY BLOG POST:
// http://techennui.blogspot.com/2007/11/quick-hack-17-in-series-of-42-inlining.html
// Expands LaTeX \newcommand macros to allow submission of documents
// to print services which do not allow user-defined macros.
// Valid input formats are:
// \newcommand{\whatever}{Replacement text}
// \newcommand{\whatever}[2]{Expand #1 and #2 but not \#1 or even $\#1$}
// - anything else ought to be passed through verbatim; if an inurmountable
// error is detected, the program exits with a non-0 return code.
// The purpose of this utility is similar to:
// http://winedt.org/Macros/LaTeX/uncommand.php
// which I wasn't aware of when I wrote it. Though I would like to see how
// well that program handles the test input file, to see if it does the
// right thing with some of the more complex definitions :-)
//
// See also http://texcatalogue.sarovar.org/entries/de-macro.html
// and http://www.mackichan.com/index.html?techtalk/685.htm~mainFrame
#include <stdio.h>
#include <stdlib.h> /* exit() */
#include <string.h> /* strcpy(), strcmp() */
#include <ctype.h>
#define TRUE (0==0)
#define FALSE (0!=0)
// Sorry, only 8-bit char sets supported.
#define NEWCOMMANDLEN 14 /* strlen("\\renewcommand")+1 */
/* Will later make these dynamic - quick hack for now */
#define MAXMACRONAMELEN 80
#define MAXMACROBODYLEN 1024
#define MAXARGLEN 1024
#define MAXCOMMANDS 1024
#define MAX_MACRO_EXPANSION (32*1024)
#define _PROTECTED_ 256
#define _PARAMETER_ 512
static int NEXTFREEMACRO = 0;
static int THIS_COMMAND = MAXCOMMANDS-1;
static char macro[MAXMACRONAMELEN][MAXCOMMANDS];
static int body[MAXMACROBODYLEN][MAXCOMMANDS];
static int args[MAXCOMMANDS];
char actual[MAXARGLEN][10]; // used at point of macro call.
char curcommand[NEWCOMMANDLEN] = { '\0' };
static int in_comment = FALSE;
void intcpy(int *dest, int *source)
{
while ((*dest++ = *source++) != 0) ;
}
#ifndef BITS
#define BITS 15
#endif
#define BUFFERSIZE (1<<BITS)
#define CIRCULAR (BUFFERSIZE-1)
static int buffer[BUFFERSIZE]; // deliberately int, not char, for _protected_
static int get_index = 0, put_index = 0;
// nasty pushback buffer because we always insert text *before* the current 'get' pointer
// and step the get pointer backwards. This is OK if we insert a whole string at a time
// but if we push back two strings in a row, they can be inserted out of order unless
// we're very careful! I.e. not as simple as the usual put/get from a cyclic buffer :-(
int get_next_char(void)
{
int c;
if (get_index == put_index) return(fgetc(stdin));
c = buffer[get_index]; get_index = (get_index+1)&CIRCULAR;
return c;
}
int locate_macro_name(char *def)
{
int i = 0;
for (;;) {
if (i == NEXTFREEMACRO) break;
if (strcmp(def, macro[i]) == 0) break;
i += 1;
if (i == MAXCOMMANDS) {
fprintf(stderr, "Coding error #1. Aborted.\n");
exit(1);
}
}
return i;
}
void reinsert_char(int c)
{
buffer[put_index] = c;
put_index = (put_index+1)&CIRCULAR;
if (put_index == get_index) {
fprintf(stderr, "Sorry, a large expansion ran me out of space. Please recompile with -DBITS=%d\n", BITS+1);
exit(1);
}
}
void unread_char(int c) // PUT AT *HEAD* OF RE-READ BUFFER. JUST LIKE ungetc(stdin, c)
{
get_index = (get_index-1)&CIRCULAR;
buffer[get_index] = c;
if (put_index == get_index) {
fprintf(stderr, "Sorry, a large expansion ran me out of space. Please recompile with -DBITS=%d\n", BITS+1);
exit(1);
}
}
void unread_string(char *s)
{
char *start = s;
while (*s != '\0') s += 1;
for (;;) {s -= 1; unread_char(*s); if (s == start) break;}
}
char *get_command(int c)
{
static char w[MAXMACRONAMELEN];
char *wp = w;
for (;;) { *wp++ = c; c = get_next_char(); if (!isalpha(c)) break; }
unread_char(c);
*wp = '\0';
return w;
}
int next_non_comment_char(void)
{
int c;
for (;;) {
c = get_next_char();
if (c == '%') {
for (;;) {
if (c == '\n') break;
c = get_next_char();
}
continue; // try again
} else {
break;
}
}
return c;
}
int next_non_comment_non_space_char(void)
{
int c;
for (;;) {
c=next_non_comment_char();
if (!isspace(c)) return(c);
}
}
void learn_body(void)
{
int c;
c = next_non_comment_non_space_char();
if (c != '{') {
// single token. Not yet handled.
} else {
// READ BODY UP TO AND INCLUDING FINAL '}' BUT NOT BEYOND
static int expansion[MAXMACROBODYLEN];
int *ep = expansion;
int c, depth = 0;
for (;;) {
c = get_next_char(); // We'll include comments in the macro expansion *but* must be careful not to count braces within comments
if (c == '\\') {
*ep++ = c;
c = get_next_char();
*ep++ = c;
} else if (c == '%') {
// Copy rest of comment
for (;;) {
*ep++ = c;
if (c == '\n') break;
c = get_next_char();
}
} else {
// regular character - proess it normally:
if (c == '{') depth += 1;
if ((c == '}') && (depth == 0)) break;
if (c == '}') depth -= 1;
if (c == '#') {
c = get_next_char(); // '1' .. '9'
*ep++ = c - '1' + _PARAMETER_; // INTERNAL CODE FOR #1, #2, ... #9
} else *ep++ = c;
}
}
*ep = '\0';
intcpy(body[NEXTFREEMACRO], expansion);
NEXTFREEMACRO = NEXTFREEMACRO + 1; // We now have all the pieces.
// Need to add check to see if we've busted the array bounds.
}
}
int learn_argcount(void)
{
int c, argcount;
c = get_next_char();
argcount = c-'0'; // eg "[3]" -> 3
// VERIFY THAT isdigit(c)
c = get_next_char(); // ']'
// VERIFY THAT c == ']'
return argcount;
}
void learn_keyword(void)
{
static char name[MAXMACRONAMELEN];
char *cp = name;
int c, argcount;
c = next_non_comment_non_space_char();
if (c != '\\') {
fprintf(stderr, "Problem at \"\\%s{%c\" <-- last char should be a '\\' (was ascii %d)\n", curcommand, c, c);
exit(1);
}
for (;;) {
c = next_non_comment_non_space_char();
if (!isalpha(c)) break; // or isalnum? Are numbers allowed in TeX words? Probably not.
*cp++ = c;
}
*cp = '\0';
if (c != '}') {
fprintf(stderr, "Problem at \"\\%s{\\%s%c\" <-- last char should be a '}' (was ascii %d)\n",
curcommand, name, c, c);
exit(1);
}
strcpy(macro[NEXTFREEMACRO], name);
// NOW READ ARG COUNT IF PRESENT FOLLOWED BY BODY
c = next_non_comment_non_space_char();
if (c == '[') {
argcount = learn_argcount(); // reads n and the final ']'
} else {
reinsert_char(c);
argcount = 0;
}
args[NEXTFREEMACRO] = argcount;
learn_body();
}
void learn_macro(void)
{
int c;
c = next_non_comment_non_space_char();
if (c == '{') {
learn_keyword(); // reads \word and the final '}'
} else {
// badly formatted definition
fprintf(stdout, "\\%s", curcommand);
unread_char(c);
}
}
void expand_macro(void)
{
// READ ARGS IF NEEDED, THEN EXPAND.
static char temp_buffer[MAX_MACRO_EXPANSION];
char *pp; int *fp; char *ap; // put pointer, fetch pointer, arg pointer
int c, param, i, argcount = args[THIS_COMMAND];
// following text should be args between {}s... (or nothing, if argcount is 0)
// fprintf(stdout, "%% COMPLEX EXPANSION OF \\%s WITH %d ARGS\n", macro[THIS_COMMAND], args[THIS_COMMAND]); // add %c? - do tests and check
for (i = 0; i < argcount; i++) {
c = get_next_char();
if (c == '{') {
// READ PARAM INTO actual[i]
char *ap = actual[i];
for (;;) {
c = get_next_char(); // IS THIS A BUG? DO I NEED TO HANDLE \} OR MULTI-LINE ? % COMMENTS?
if (c == '}') break;
*ap++ = c;
}
*ap = '\0';
// fprintf(stdout, "%% Got actual parameter #%d: %s\n", i+1, actual[i]);
} else {
// parameter is a single atom - not handled!
fprintf(stderr, "Sorry - I expected a {} parameter (#%d) to \\%s but found '%c'\n", i+1, macro[THIS_COMMAND], c);
fprintf(stderr, "This is either a program bug or you need to edit the source text to add {}'s\n");
exit(1);
}
}
// NOW EXPAND THE BODY, SUBSTITUTING ARGS 1..n AS NECESSARY
// THIS IS WHERE WE NEED TO BE EXTRA CAREFUL ABOUT PUSHBACK ORDER!!!!
// THE EXPANDED BODY MAY CONTAIN MORE TEXT TO BE EXPANDED.
fp = body[THIS_COMMAND];
pp = temp_buffer;
for (;;) {
c = *fp++;
if (c == '\0') break;
if ((c&_PARAMETER_) != 0) {
param = (c&255); // 1..9 - TeX counts from 1 up I think. Pre-processed at defn time to 0..n-1
// NEED RANGE CHECK, IF INVALID #n GIVEN - BETTER TO CHECK AT DEFN TIME THOUGH!
ap = actual[param];
for (;;) {
if (*ap == '\0') break;
*pp++ = *ap++;
}
} else *pp++ = c;
}
*pp = '\0';
unread_string(temp_buffer);
// there is a pending char (whatever followed the \word) at getptr.
// we have to put our expansion *before* getptr
}
void handle_word(char *s)
{
if ((strcmp(s, "newcommand") == 0) || (strcmp(s, "renewcommand") == 0)) {
strcpy(curcommand, s);
learn_macro();
} else if ((THIS_COMMAND = locate_macro_name(s)) < NEXTFREEMACRO) {
expand_macro();
} else {
// IGNORE UNKNOWN
fprintf(stdout, "\\%s", s);
}
}
int main(int argc, char **argv)
{
char *command;
int i, c;
for (i = 0; i < MAXCOMMANDS; i++) macro[0][i]= '\0';
for (;;) {
c = get_next_char();
if (c == EOF) break;
if (in_comment) {
fputc(c, stdout);
if (c == '\n') {
in_comment = FALSE;
}
} else if (c == '\\') {
c = get_next_char();
if (isalpha(c)) {
// Handle TeX word
command = get_command(c);
handle_word(command);
} else {
fprintf(stdout, "\\%c", c);
}
} else if (c == '%') {
fputc(c, stdout);
in_comment = TRUE;
} else {
fputc(c, stdout);
}
}
// clean up
exit(0);
return(1);
}