/*
 * partition.c	This program takes a wordlist, specified on the command line,
 *		and creates a list of files, word3...word20, each file
 * containing all and only words with that many letters.  Words of 1 and 2
 * letters are thrown away, as are words of more than 20 letters.
 *
 * Each partition file is stored as a list of words with no intervening
 * characters (like NULL or NL).  This is reasonable (and even desirable),
 * since every word in each file is the same length.  Thus, it is easy to
 * seek directly to any word in a partition file, simply by multiplying the
 * index of that word by the length of the word.
 *
 * Copyright (C) 1990 Rob Mayoff.
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h> /* For toupper, islower */
#include <stdlib.h> /* For exit(), qsort() */

#ifndef DICT_PREFIX
#define DICT_PREFIX ""
#endif
#ifndef DICT_INFIX
#define DICT_INFIX ""
#endif
#ifndef DICT_SUFFIX
#define DICT_SUFFIX ""
#endif

/* Assuming you can open lots of files at once is not portable.  You should
   not the number of the highest file opened, and re-pass from there onwards,
   after closing all files from the first pass. */
#define MAX_LETS 21

#define panicif(c, s)\
  if(c) { fprintf(stderr, "%s, Line %d: FAILURE: %s -- %s\n", __FILE__, __LINE__, #c, s); exit(1); }

FILE	*fp[MAX_LETS];

void usage(void)
{
  fprintf(stderr, "Usage: partition wordlist-file\n");
}

typedef struct
{
  unsigned long count;
  char	letter;
} hist_t;
hist_t	histogram[26]; /* Assumed initialised to 0 */

int histCmp(const void *a, const void *b) /* ANSI says const */
{
  return((int)(((hist_t *)b)->count-((hist_t *)a)->count));
  /* Note possible loss of precision (int result) and casts for ANSI */
}

int main(int ac, char **av) /* Sorry, void is illegal */
{
  register int	i, j;
  /* register */char	str[200], *s;
              /* Can't take address of register (str in sprintf) */
  int first_let, max_lets;
  int dirty;

  if(ac==1)
  {
    usage();
    exit(1);
  }

  /* Open the partition files. */

  first_let = 3;

  for (;;) { /******************* Multiple passes */

  /* Open the original word-list. */
  fp[0]=fopen(av[1], "r");
  panicif(fp[0]==NULL, av[1]);

  max_lets=first_let;
  for (;;)
  {
    sprintf(str,
      "%sword%s%d%s", DICT_PREFIX, DICT_INFIX, max_lets, DICT_SUFFIX);
    fp[max_lets]=fopen(str, "wb");
    if (fp[max_lets]==NULL) break;
    max_lets++;
    if (max_lets == MAX_LETS) break;
  }
  fprintf(stderr,
    "Processing words from %d characters to %d characters\n",
    first_let, max_lets-1);

  do
  {
    s=fgets(str, 200, fp[0]);
    if(s==NULL)
      break;
    i=strlen(str);
    /* Delete the newline */
    if(str[i-1]=='\n')
      str[--i]='\0';
    /*fprintf(stderr, "s=%s\n", str);*/
    i=strlen(str);
    if(i<first_let || i>(max_lets-1))
      continue;
    {char *s = str;
      dirty = 0;
      while (*s != '\0') {
        if (islower(*s)) *s = toupper(*s);
        if (!isupper(*s)) dirty = 1;
        s += 1;
      }
    }
    if (dirty == 0) {
      fputs(str, fp[i]);
      for(j=0; j<i; j++)
        histogram[str[j]-'A'].count++;
    }
  } while(!feof(fp[0]));

  for(i=first_let; i<max_lets; i++)
    fclose(fp[i]);

  if (max_lets == MAX_LETS) break;
  first_let = max_lets;

  fclose(fp[0]);

  }          /******************* Multiple passes */

  for(i=0; i<26; i++)
    histogram[i].letter=(char)(i+'A');
  qsort(histogram, 26, sizeof(hist_t), histCmp);
  fp[0]=fopen("histogram", "w");
  panicif(fp[0]==NULL, "histogram");
  for(i=0; i<26; i++)
   putc(histogram[i].letter, fp[0]);
  fclose(fp[0]);

  return(0);
}
