/*
 *                     RCS file input
 */
/*********************************************************************************
 *                     Lexical Analysis.
 *                     Character mapping table,
 *                     hashtable, Lexinit, nextlex, getlex, getkey,
 *                     getid, getnum, readstring, printstring, savestring,
 *                     checkid, serror, fatserror, error, faterror, warn, diagnose
 *                     fflsbuf, puts, fprintf
 *                     Testprogram: define LEXDB
 *********************************************************************************
 *
 * Copyright (C) 1982 by Walter F. Tichy
 *                       Purdue University
 *                       Computer Science Department
 *                       West Lafayette, IN 47907
 *
 * All rights reserved. No part of this software may be sold or distributed
 * in any form or by any means without the prior written permission of the
 * author.
 * Report problems and direct all inquiries to Tichy@purdue (ARPA net).
 */

/* $Log:	C.rcslex $
 * Revision 1.1  90/05/22  21:53:43  pmoore
 * Initial revision
 * 
 * Revision 4.4  87/12/18  11:44:47  narten
 * fixed to use "varargs" in "fprintf"; this is required if it is to
 * work on a SPARC machine such as a Sun-4
 *
 * Revision 4.3  87/10/18  10:37:18  narten
 * Updating version numbers. Changes relative to 1.1 actually relative
 * to version 4.1
 *
 * Revision 1.3  87/09/24  14:00:17  narten
 * Sources now pass through lint (if you ignore printf/sprintf/fprintf
 * warnings)
 *
 * Revision 1.2  87/03/27  14:22:33  jenkins
 * Port to suns
 *
 * Revision 1.1  84/01/23  14:50:33  kcs
 * Initial revision
 *
 * Revision 4.1  83/03/25  18:12:51  wft
 * Only changed $Header to $Id.
 *
 * Revision 3.3  82/12/10  16:22:37  wft
 * Improved error messages, changed exit status on error to 1.
 *
 * Revision 3.2  82/11/28  21:27:10  wft
 * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
 * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
 * properly in case there is an IO-error (e.g., file system full).
 *
 * Revision 3.1  82/10/11  19:43:56  wft
 * removed unused label out:;
 * made sure all calls to getc() return into an integer, not a char.
 */

#undef LEXDB
/* version LEXDB is for testing the lexical analyzer. The testprogram
 * reads a stream of lexemes, enters the revision numbers into the
 * hashtable, and prints the recognized tokens. Keywords are recognized
 * as identifiers.
 */

#include "rcsbase.h"
#include <stdarg.h>

/* character mapping table */
enum tokens map[] = {
	EOFILE,			/* this will end up at ctab[-1] */
	UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
	UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN,
	UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
	UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
	SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE,
	LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE,
	DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
	DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST,
	AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
	LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
	LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
	LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER,
	ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
	LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
	LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
	LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN
};




struct hshentry *nexthsh;	/* pointer to next hashtable-entry, set by
				 * lookup */

enum tokens nexttok;		/* next token, set by nextlex                    */

int hshenter			/* if true, next suitable lexeme will be
				 * entered */
= true;				/* into the symbol table. Handle with care.      */
int nextc;			/* next input character, initialized by
				 * Lexinit  */

int eof				/* end-of-file indicator, set to >0 on end of
				 * file */
= 0;
int line			/* current line-number of input                  */
= 1;
int nerror			/* counter for errors                            */
= 0;
int nwarn			/* counter for warnings                          */
= 0;
char *cmdid			/* command identification for error messages     */
= nil;
int quietflag			/* indicates quiet mode                          */
= false;
FILE *finptr;			/* input file descriptor                         */

FILE *frewrite;			/* file descriptor for echoing input             */

int rewriteflag;		/* indicates whether to echo to frewrite         */

char StringTab[strtsize];	/* string table and heap               */

char *NextString		/* pointer to next identifier in StringTab */
= nil;
char *Topchar			/* pointer to next free byte in StringTab */
= &StringTab[0];		/* set by nextlex, lookup                */
struct hshentry hshtab[hshsize];/* hashtable                             */

/* Function: Looks up the character string pointed to by NextString in the
 * hashtable. If the string is not present, a new entry for it is created.
 * If the string is present, TopChar is moved back to save the space for
 * the string, and NextString is set to point to the original string.
 * In any case, the address of the corresponding hashtable entry is placed
 * into nexthsh.
 * Algorithm: Quadratic hash, covering all entries.
 * Assumptions: NextString points at the first character of the string.
 * Topchar points at the first empty byte after the string.
 */
void lookup(void)
{
	register int ihash;	/* index into hashtable */
	register char *sp, *np;
	int c, delta, final, FirstScan;	/* loop control */

	/* calculate hash code */
	sp = NextString;
	ihash = 0;
	while (*sp)
		ihash += *sp++;

	/* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
	c = 0;
	delta = 1;
	final = (hshsize - 1) / 2;
	FirstScan = true;	/* first loop */

	for (;;)
	{
		ihash = (ihash + c) % hshsize;	/* next index */

		if (hshtab[ihash].num == nil)
		{
			/* empty slot found */
			hshtab[ihash].num = NextString;
			nexthsh = &hshtab[ihash];	/* save hashtable
							 * address */
#                       ifdef LEXDB
			VOID printf("\nEntered: %s at %d ", nexthsh->num, ihash);
#                       endif
			return;
		}
		/* compare strings */
		sp = NextString;
		np = hshtab[ihash].num;
		while (*sp == *np++)
		{
			if (*sp == 0)
			{
				/* match found */
				nexthsh = &hshtab[ihash];
				Topchar = NextString;
				NextString = nexthsh->num;
				return;
			}
			else
				sp++;
		}

		/* neither empty slot nor string found */
		/* calculate next index and repeat */
		if (c != final)
			c += delta;
		else
		{
			if (FirstScan)
			{
				/* set up second sweep */
				delta = -1;
				final = 1;
				FirstScan = false;
			}
			else
			{
				fatserror("Hashtable overflow");
			}
		}
	}
}


/* Function: Initialization of lexical analyzer:
 * initializes the hastable,
 * initializes nextc, nexttok if finptr != NULL
 */
void Lexinit(void)
{
	register int i;

	for (i = hshsize - 1; i >= 0; i--)
	{
		hshtab[i].num = nil;
	}

	hshenter = true;
	eof = 0;
	line = 1;
	nerror = 0;
	nwarn = 0;
	NextString = nil;
	Topchar = &StringTab[0];
	if (finptr)
	{
		nextc = GETC(finptr, frewrite, rewriteflag);	/* initial character */
		nextlex();	/* initial token */
	}
	else
	{
		nextc = '\0';
		nexttok = EOFILE;
	}
}

/* Function: Reads the next token and sets nexttok to the next token code.
 * Only if the hshenter==true, a revision number is entered into the
 * hashtable and a pointer to it is placed into nexthsh.
 * This is useful for avoiding that dates are placed into the hashtable.
 * For ID's and NUM's, NextString is set to the character string in the
 * string table. Assumption: nextc contains the next character.
 */
void nextlex(void)
{
	register c;
	register char *sp;
	register enum tokens d;

	if (eof)
	{
		nexttok = EOFILE;
		return;
	}
loop:
	switch (nexttok = ctab[nextc])
	{

	case UNKN:
	case IDCHAR:
	case PERIOD:
		serror("unknown Character: %c", nextc);
		nextc = GETC(finptr, frewrite, rewriteflag);
		goto loop;

	case NEWLN:
		line++;
#               ifdef LEXDB
		VOID putchar('\n');
#               endif
		/* Note: falls into next case */

	case SPACE:
		nextc = GETC(finptr, frewrite, rewriteflag);
		goto loop;

	case EOFILE:
		eof++;
		nexttok = EOFILE;
		return;

	case DIGIT:
		NextString = sp = Topchar;
		*sp++ = nextc;
		while ((d = ctab[c = GETC(finptr, frewrite, rewriteflag)]) == DIGIT ||
		       d == PERIOD)
		{
			*sp++ = c;	/* 1.2. and 1.2 are different */
		}
		*sp++ = '\0';
		if (sp >= StringTab + strtsize)
		{
			/* may have written outside stringtable already */
			fatserror("Stringtable overflow");
		}
		Topchar = sp;
		nextc = c;
		if (hshenter == true)
			lookup();	/* lookup updates NextString, Topchar */
		nexttok = NUM;
		return;


	case LETTER:
		NextString = sp = Topchar;
		*sp++ = nextc;
		while ((d = ctab[c = GETC(finptr, frewrite, rewriteflag)]) == LETTER ||
		       d == DIGIT || d == IDCHAR)
		{
			*sp++ = c;
		}
		*sp++ = '\0';
		if (sp >= StringTab + strtsize)
		{
			/* may have written outside stringtable already */
			fatserror("Stringtable overflow");
		}
		Topchar = sp;
		nextc = c;
		nexttok = ID;	/* may be ID or keyword */
		return;

	case SBEGIN:		/* long string */
		nexttok = STRING;
		/* note: only the initial SBEGIN has been read */
		/* read the string, and reset nextc afterwards */
		return;

	default:
		nextc = GETC(finptr, frewrite, rewriteflag);
		return;
	}
}

/* Function: Checks if nexttok is the same as token. If so,
 * advances the input by calling nextlex and returns true.
 * otherwise returns false.
 * Doesn't work for strings and keywords; loses the character string for ids.
 */
int getlex(enum tokens token)
{
	if (nexttok == token)
	{
		nextlex();
		return (true);
	}
	else
		return (false);
}

/* Function: If the current token is a keyword identical to key,
 * getkey advances the input by calling nextlex and returns true;
 * otherwise returns false.
 */
int getkey(char *key)
{
	register char *s1, *s2;

	if (nexttok == ID)
	{
		s1 = key;
		s2 = NextString;
		while (*s1 == *s2++)
			if (*s1++ == '\0')
			{
				/* match found */
				Topchar = NextString;	/* reset Topchar */
				nextlex();
				return (true);
			}
	}
	return (false);
}

/* Function: Checks if nexttok is an identifier. If so,
 * advances the input by calling nextlex and returns a pointer
 * to the identifier; otherwise returns nil.
 * Treats keywords as identifiers.
 */
char *getid(void)
{
	register char *name;
	if (nexttok == ID)
	{
		name = NextString;
		nextlex();
		return name;
	}
	else
		return nil;
}

/* Function: Checks if nexttok is a number. If so,
 * advances the input by calling nextlex and returns a pointer
 * to the hashtable entry. Otherwise returns nil.
 * Doesn't work if hshenter is false.
 */
struct hshentry *getnum(void)
{
	register struct hshentry *num;
	if (nexttok == NUM)
	{
		num = nexthsh;
		nextlex();
		return num;
	}
	else
		return nil;
}

/* Function: Skip over characters until terminating single SDELIM.
 * If rewriteflag==true, copy every character read to frewrite.
 * Does not advance nextlex at the end.
 */
void readstring(void)
{
	register c;
	if (rewriteflag)
	{
		/* copy string verbatim to frewrite */
		while ((c = putc(getc(finptr), frewrite)) != EOF)
		{
			if (c == SDELIM)
			{
				if ((c = putc(getc(finptr), frewrite)) != SDELIM)
				{
					/* end of string */
					nextc = c;
					return;
				}
			}
		}
	}
	else
	{
		/* skip string */
		while ((c = getc(finptr)) != EOF)
		{
			if (c == SDELIM)
			{
				if ((c = getc(finptr)) != SDELIM)
				{
					/* end of string */
					nextc = c;
					return;
				}
			}
		}
	}
	nextc = c;
	error("Unterminated string");
}

/* Function: copy a string to stdout, until terminated with a single SDELIM.
 * Does not advance nextlex at the end.
 */
void printstring(void)
{
	register c;
	while ((c = getc(finptr)) != EOF)
	{
		if (c == SDELIM)
		{
			if ((c = getc(finptr)) != SDELIM)
			{
				/* end of string */
				nextc = c;
				return;
			}
		}
		VOID putchar(c);
	}
	nextc = c;
	error("Unterminated string");
}

/* Function: Copies a string terminated with SDELIM from file finptr to buffer
 * target, but not more than length bytes. If the string is longer than
 * length, the extra characters are skipped. The string may be empty, in which
 * case a '\0' is placed into target.
 * Double SDELIM is replaced with SDELIM.
 * If rewriteflag==true, the string is also copied unchanged to frewrite.
 * Returns the length of the saved string.
 * Does not advance nextlex at the end.
 */
int savestring(char *target, int length)
{
	register char *tp, *max;
	register c;

	tp = target;
	max = target + length;	/* max is one too large */
	while ((c = GETC(finptr, frewrite, rewriteflag)) != EOF)
	{
		*tp++ = c;
		if (c == SDELIM)
		{
			if ((c = GETC(finptr, frewrite, rewriteflag)) != SDELIM)
			{
				/* end of string */
				*(tp - 1) = '\0';
				nextc = c;
				return tp - target;
			}
		}
		if (tp >= max)
		{
			/* overflow */
			error("string buffer overflow -- truncating string");
			target[length - 1] = '\0';
			/* skip rest of string */
			while ((c = GETC(finptr, frewrite, rewriteflag)) != EOF)
			{
				if ((c == SDELIM) && ((c = GETC(finptr, frewrite, rewriteflag)) != SDELIM))
				{
					/* end of string */
					nextc = c;
					return length;
				}
			}
			nextc = c;
			error("Can't find %c to terminate string before end of file", SDELIM);
			return length;
		}
	}
	nextc = c;
	error("Can't find %c to terminate string before end of file", SDELIM);
	return length;
}

/* Function: Check whether the string starting at id is an identifier and
 * return a pointer to the last char of the identifer. White space, delim
 * and '\0' are legal delimeters. Aborts the program if not a legal
 * identifier. Useful for checking commands.
 */
char *checkid(char *id, char delim)
{
	register enum tokens d;
	register char *temp;
	register char c, tc;

	temp = id;
	if (ctab[*id] == LETTER)
	{
		while ((d = ctab[c = (*++id)]) == LETTER || d == DIGIT || d == IDCHAR);
		if (c != ' ' && c != '\t' && c != '\n' && c != '\0' && c != delim)
		{
			/* append \0 to end of id before error message */
			tc = c;
			while ((c = (*++id)) != ' ' && c != '\t' && c != '\n' && c != '\0' && c != delim);
			*id = '\0';
			faterror("Invalid character %c in identifier %s", tc, temp);
			return nil;
		}
		else
			return id;
	}
	else
	{
		/* append \0 to end of id before error message */
		while ((c = (*++id)) != ' ' && c != '\t' && c != '\n' && c != '\0' && c != delim);
		*id = '\0';
		faterror("Identifier %s does not start with letter", temp);
		return nil;
	}
}

#pragma -v1			/* hint to the compiler to check f/s/printf
				 * format */
/* Function: non-fatal syntax error */
void serror(char *e,...)
{
	va_list ap;
	va_start(ap, e);
	nerror++;
	VOID fprintf(stderr, "%s error, line %d: ", cmdid, line);
	VOID vfprintf(stderr, e, ap);
	VOID putc('\n', stderr);
	va_end(ap);
}

/* Function: non-fatal error */
void error(char *e,...)
{
	va_list ap;
	va_start(ap, e);
	nerror++;
	VOID fprintf(stderr, "%s error: ", cmdid);
	VOID vfprintf(stderr, e, ap);
	VOID putc('\n', stderr);
	va_end(ap);
}

/* Function: fatal syntax error */
void fatserror(char *e,...)
{
	va_list ap;
	va_start(ap, e);
	nerror++;
	VOID fprintf(stderr, "%s error, line %d: ", cmdid, line);
	VOID vfprintf(stderr, e, ap);
	VOID fprintf(stderr, "\n%s aborted\n", cmdid);
	va_end(ap);
	VOID cleanup();
	exit(1);
}

/* Function: fatal error, terminates program after cleanup */
void faterror(char *e,...)
{
	va_list ap;
	va_start(ap, e);
	nerror++;
	VOID fprintf(stderr, "%s error: ", cmdid);
	VOID vfprintf(stderr, e, ap);
	VOID fprintf(stderr, "\n%s aborted\n", cmdid);
	va_end(ap);
	VOID cleanup();
	exit(1);
}

/* Function: prints a warning message */
void warn(char *e,...)
{
	va_list ap;
	va_start(ap, e);
	nwarn++;
	VOID fprintf(stderr, "%s warning: ", cmdid);
	VOID vfprintf(stderr, e, ap);
	VOID putc('\n', stderr);
	va_end(ap);
}

/* Function: prints a diagnostic message */
void diagnose(char *e,...)
{
	va_list ap;

	if (!quietflag)
	{
		va_start(ap, e);
		VOID vfprintf(stderr, e, ap);
		VOID putc('\n', stderr);
		va_end(ap);
	}
}

#pragma -v0			/* back to default */

/* Function: Flush iop.
 * Same routine as __flsbuf in stdio, but aborts program on error.
 */
int fflsbuf(int c, register FILE * iop)
{
	register result;
	if ((result = __flsbuf(c, iop)) == EOF)
		faterror("write error");
	return result;
}


/* Function: Put string s on file iop, abort on error.
 * Same as puts in stdio, but with different putc macro.
 */
int fputs(const char *s, FILE * iop)
{
	register int r;
	register int c;

	while ((c = *s++) != 0)
		r = putc(c, iop);
	return (r);
}

/* Function: formatted output. Same as fprintf in stdio,
 * but aborts program on error
 */
int fprintf(FILE * iop, const char *fmt,...)
{
	register int value;
	va_list ap;

	va_start(ap, fmt);
	value = vfprintf(iop, fmt, ap);
	if (ferror(iop))
	{
		faterror("write error");
		value = EOF;
	}
	va_end(ap);
	return value;
}


#ifdef LEXDB
/* test program reading a stream of lexems and printing the tokens.
 */

int main(int argc, char *argv[])
{
	cmdid = "lextest";
	if (argc < 2)
	{
		VOID fputs("No input file\n", stderr);
		exit(1);
	}
	if ((finptr = fopen(argv[1], "r")) == NULL)
	{
		faterror("Can't open input file %s\n", argv[1]);
	}
	Lexinit();
	rewriteflag = false;
	while (nexttok != EOFILE)
	{
		switch (nexttok)
		{

		case ID:
			VOID printf("ID: %s", NextString);
			break;

		case NUM:
			if (hshenter == true)
				VOID printf("NUM: %s, index: %d", nexthsh->num, nexthsh - hshtab);
			else
				VOID printf("NUM, unentered: %s", NextString);
			hshenter = !hshenter;	/* alternate between dates
						 * and numbers */
			break;

		case COLON:
			VOID printf("COLON");
			break;

		case SEMI:
			VOID printf("SEMI");
			break;

		case STRING:
			readstring();
			VOID printf("STRING");
			break;

		case UNKN:
			VOID printf("UNKN");
			break;

		default:
			VOID printf("DEFAULT");
			break;
		}
		VOID printf(" | ");
		nextlex();
	}
	VOID printf("\nEnd of lexical analyzer test\n");

	return 0;
}

/* dummy cleanup routine */
void cleanup(void)
{
}


#endif
