#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "english.h"
#define FALSE (0)
#define TRUE (!0)
/*
** English to Phoneme translation.
**
** Rules are made up of four parts:
**
** The left context.
** The text to match.
** The right context.
** The phonemes to substitute for the matched text.
**
** Procedure:
**
** Seperate each block of letters (apostrophes included)
** and add a space on each side. For each unmatched
** letter in the word, look through the rules where the
** text to match starts with the letter in the word. If
** the text to match is found and the right and left
** context patterns also match, output the phonemes for
** that rule and skip to the next unmatched letter.
**
**
** Special Context Symbols:
**
** # One or more vowels
** : Zero or more consonants
** ^ One consonant.
** . One of B, D, V, G, J, L, M, N, R, W or Z (voiced
** consonants)
** % One of ER, E, ES, ED, ING, ELY (a suffix)
** (Right context only)
** + One of E, I or Y (a "front" vowel)
*/
#ifndef ORIGINAL
char *copystr(s)
char *s;
{
char *p;
p = malloc(strlen(s)+1);
strcpy(p, s);
return(p);
}
int read_one_rule(rfile, left, mid, right, rep)
FILE *rfile;
char *left;
char *mid;
char *right;
char *rep;
{
char line[128], *s, *p;
for (;;)
{
if (fgets(line, 128, rfile) == NULL)
return(FALSE);
if (*line != '*') break;
}
s = line;
p = s;
while (*s != '|')
{
if (isalpha(*s)) *s = toupper(*s);
if (*s == '<') *s = ' '; /* nothing */
s++;
}
*s++ = '\0';
strcpy(left, p);
p = s;
while (*s != '|')
{
if (isalpha(*s)) *s = toupper(*s);
s++;
}
*s++ = '\0';
strcpy(mid, p);
p = s;
while (*s != '=')
{
if (*s == '>') *s = ' '; /* nothing */
s++;
}
*s++ = '\0';
strcpy(right, p);
p = s;
if (*s >= ' ')
{
s += 1;
while (*s > ' ') s++;
}
*s++ = '\0';
strcpy(rep, p);
return(TRUE);
}
void Init_Rules()
{
#define max_rules 100 /* for now */
FILE *rulefile;
char left[128], mid[128], right[128], rep[128];
int lastrule[27] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int i;
rulefile = fopen("rules-navy", "r");
if (rulefile == NULL)
{
fprintf(stderr, "Cannot open rule file 'rules-navy'\n");
exit(1);
}
Rules = malloc(27*sizeof(Rule *));
Rules[0] = punct_rules = malloc(max_rules * sizeof(Rule));
Rules[1] = A_rules = malloc(max_rules * sizeof(Rule));
Rules[2] = B_rules = malloc(max_rules * sizeof(Rule));
Rules[3] = C_rules = malloc(max_rules * sizeof(Rule));
Rules[4] = D_rules = malloc(max_rules * sizeof(Rule));
Rules[5] = E_rules = malloc(max_rules * sizeof(Rule));
Rules[6] = F_rules = malloc(max_rules * sizeof(Rule));
Rules[7] = G_rules = malloc(max_rules * sizeof(Rule));
Rules[8] = H_rules = malloc(max_rules * sizeof(Rule));
Rules[9] = I_rules = malloc(max_rules * sizeof(Rule));
Rules[10] = J_rules = malloc(max_rules * sizeof(Rule));
Rules[11] = K_rules = malloc(max_rules * sizeof(Rule));
Rules[12] = L_rules = malloc(max_rules * sizeof(Rule));
Rules[13] = M_rules = malloc(max_rules * sizeof(Rule));
Rules[14] = N_rules = malloc(max_rules * sizeof(Rule));
Rules[15] = O_rules = malloc(max_rules * sizeof(Rule));
Rules[16] = P_rules = malloc(max_rules * sizeof(Rule));
Rules[17] = Q_rules = malloc(max_rules * sizeof(Rule));
Rules[18] = R_rules = malloc(max_rules * sizeof(Rule));
Rules[19] = S_rules = malloc(max_rules * sizeof(Rule));
Rules[20] = T_rules = malloc(max_rules * sizeof(Rule));
Rules[21] = U_rules = malloc(max_rules * sizeof(Rule));
Rules[22] = V_rules = malloc(max_rules * sizeof(Rule));
Rules[23] = W_rules = malloc(max_rules * sizeof(Rule));
Rules[24] = X_rules = malloc(max_rules * sizeof(Rule));
Rules[25] = Y_rules = malloc(max_rules * sizeof(Rule));
Rules[26] = Z_rules = malloc(max_rules * sizeof(Rule));
while (read_one_rule(rulefile, left, mid, right, rep))
{
int thisrule;
if (isalpha(*mid) && isupper(*mid))
{
thisrule = *mid - 'A' + 1;
}
else
{
thisrule = 0; /* punct */
}
/*
fprintf(stderr, "Added rule[%d][%d] = {left=%s mid=%s right=%s rep=%s}\n",
thisrule, lastrule[thisrule],
left, mid, right, rep);
*/
Rules[thisrule][lastrule[thisrule]][0] = copystr(left);
Rules[thisrule][lastrule[thisrule]][1] = copystr(mid);
Rules[thisrule][lastrule[thisrule]][2] = copystr(right);
Rules[thisrule][lastrule[thisrule]][3] = copystr(rep);
lastrule[thisrule] += 1;
}
/*
fprintf(stderr, "Rules read\n");
*/
for (i = 0; i < 27; i++)
{
Rules[i][lastrule[i]][0] = Anything;
Rules[i][lastrule[i]][1] = NULL;
Rules[i][lastrule[i]][2] = Anything;
Rules[i][lastrule[i]][3] = Silent;
}
}
#endif
int isvowel(chr)
char chr;
{
return (chr == 'A' || chr == 'E' || chr == 'I' ||
chr == 'O' || chr == 'U');
}
int isconsonant(chr)
char chr;
{
return (isupper(chr) && !isvowel(chr));
}
// #ifndef ORIGINAL -- debugging -DORIGINAL
xlate_word(word)
char word[];
{
int index; /* Current position in word */
int type; /* First letter of match part */
index = 1; /* Skip the initial blank */
do
{
if (isupper(word[index]))
type = word[index] - 'A' + 1;
else
type = 0;
/*
fprintf(stderr, "find rule Rules[%d] at %p -> %p\n", type, &Rules[type], Rules[type]);
*/
index = find_rule(word, index, Rules[type]);
}
while (word[index] != '\0');
}
find_rule(word, index, rules)
char word[];
int index;
Rule *rules;
{
Rule *rule;
char *left, *match, *right, *output;
int remainder;
for (;;) /* Search for the rule */
{
rule = rules++;
match = (*rule)[1];
if (match == 0) /* bad symbol! */
{
fprintf(stderr,
"Error: Can't find rule for: '%c' in \"%s\"\n", word[index], word);
return index+1; /* Skip it! */
}
/*
fprintf(stderr, "rule = %p, (*rule) = %p left=%s mid=%s right=%s rep=%s\n", rule, (*rule),
(*rule)[0], (*rule)[1], (*rule)[2], (*rule)[3]);
*/
for (remainder = index; *match != '\0'; match++, remainder++)
{
if (*match != word[remainder])
break;
}
if (*match != '\0') /* found missmatch */
continue;
/*
printf("\nWord: \"%s\", Index:%4d, Trying: \"%s/%s/%s\" = \"%s\"\n",
word, index, (*rule)[0], (*rule)[1], (*rule)[2], (*rule)[3]);
*/
left = (*rule)[0];
right = (*rule)[2];
if (!leftmatch(left, &word[index-1]))
continue;
/*
printf("leftmatch(\"%s\",\"...%c\") succeded!\n", left, word[index-1]);
*/
if (!rightmatch(right, &word[remainder]))
continue;
/*
printf("rightmatch(\"%s\",\"%s\") succeded!\n", right, &word[remainder]);
*/
output = (*rule)[3];
/*
printf("Success: ");
*/
outstring(output);
return remainder;
}
}
// #endif -- DEBUGGING -DORIGINAL
leftmatch(pattern, context)
char *pattern; /* first char of pattern to match in text */
char *context; /* last char of text to be matched */
{
char *pat;
char *text;
int count;
if (*pattern == '\0') /* null string matches any context */
{
return TRUE;
}
/* point to last character in pattern string */
count = strlen(pattern);
pat = pattern + (count - 1);
text = context;
for (; count > 0; pat--, count--)
{
/* First check for simple text or space */
if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
{
if (*pat != *text)
{
return FALSE;
}
else
{
text--;
continue;
}
}
switch (*pat)
{
case '#': /* One or more vowels */
if (!isvowel(*text))
return FALSE;
text--;
while (isvowel(*text))
text--;
break;
case ':': /* Zero or more consonants */
while (isconsonant(*text))
text--;
break;
case '^': /* One consonant */
if (!isconsonant(*text))
return FALSE;
text--;
break;
case '.': /* B, D, V, G, J, L, M, N, R, W, Z */
if (*text != 'B' && *text != 'D' && *text != 'V'
&& *text != 'G' && *text != 'J' && *text != 'L'
&& *text != 'M' && *text != 'N' && *text != 'R'
&& *text != 'W' && *text != 'Z')
return FALSE;
text--;
break;
case '+': /* E, I or Y (front vowel) */
if (*text != 'E' && *text != 'I' && *text != 'Y')
return FALSE;
text--;
break;
case '%':
default:
fprintf(stderr, "Bad char in left rule: '%c'\n", *pat);
return FALSE;
}
}
return TRUE;
}
rightmatch(pattern, context)
char *pattern; /* first char of pattern to match in text */
char *context; /* last char of text to be matched */
{
char *pat;
char *text;
if (*pattern == '\0') /* null string matches any context */
return TRUE;
pat = pattern;
text = context;
for (pat = pattern; *pat != '\0'; pat++)
{
/* First check for simple text or space */
if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
{
if (*pat != *text)
{
return FALSE;
}
else
{
text++;
continue;
}
}
switch (*pat)
{
case '#': /* One or more vowels */
if (!isvowel(*text))
return FALSE;
text++;
while (isvowel(*text))
text++;
break;
case ':': /* Zero or more consonants */
while (isconsonant(*text))
text++;
break;
case '^': /* One consonant */
if (!isconsonant(*text))
return FALSE;
text++;
break;
case '.': /* B, D, V, G, J, L, M, N, R, W, Z */
if (*text != 'B' && *text != 'D' && *text != 'V'
&& *text != 'G' && *text != 'J' && *text != 'L'
&& *text != 'M' && *text != 'N' && *text != 'R'
&& *text != 'W' && *text != 'Z')
return FALSE;
text++;
break;
case '+': /* E, I or Y (front vowel) */
if (*text != 'E' && *text != 'I' && *text != 'Y')
return FALSE;
text++;
break;
case '%': /* ER, E, ES, ED, ING, ELY (a suffix) */
if (*text == 'E')
{
text++;
if (*text == 'L')
{
text++;
if (*text == 'Y')
{
text++;
break;
}
else
{
text--; /* Don't gobble L */
break;
}
}
else
if (*text == 'R' || *text == 'S'
|| *text == 'D')
text++;
break;
}
else
if (*text == 'I')
{
text++;
if (*text == 'N')
{
text++;
if (*text == 'G')
{
text++;
break;
}
}
return FALSE;
}
else
return FALSE;
default:
fprintf(stderr, "Bad char in right rule:'%c'\n", *pat);
return FALSE;
}
}
return TRUE;
}