/*
 * partition.c	This program takes a wordlist, specified on the command line,
 *		and creates a list of files, word3...word20, each file
 * containing all and only words with that many letters.  Words of 1 and 2
 * letters are thrown away, as are words of more than 20 letters.
 *
 * Each partition file is stored as a list of words with no intervening
 * characters (like NULL or NL).  This is reasonable (and even desirable),
 * since every word in each file is the same length.  Thus, it is easy to
 * seek directly to any word in a partition file, simply by multiplying the
 * index of that word by the length of the word.
 *
 * Copyright (C) 1990 Rob Mayoff.
 */

#include <stdio.h>

#define panicif(c, s)	if(c) { perror(s); exit(1); }

FILE	*fp[21];

void usage(void)
{
  fprintf(stderr, "Usage: partition wordlist-file\n");
}

typedef struct
{
  unsigned long count;
  char	letter;
} hist_t;
hist_t	histogram[26];

int histCmp(hist_t *a, hist_t *b)
{
  return b->count-a->count;
}

void main(int ac, char **av)
{
  register int	i, j;
  register char	str[200], *s;

  if(ac==1)
  {
    usage();
    exit(1);
  }

  /* Open the original word-list. */
  fp[0]=fopen(av[1], "r");
  panicif(fp[0]==NULL, av[1]);

  /* Open the partition files. */
  for(i=3; i<21; i++)
  {
    sprintf(str, "word%d", i);
    fp[i]=fopen(str, "w");
    panicif(fp[i]==NULL, str);
  }

  do
  {
    s=fgets(str, 200, fp[0]);
    if(s==NULL)
      break;
    i=strlen(str);
    /* Delete the newline */
    if(str[i-1]=='\n')
      str[--i]='\0';
    i=strlen(str);
    if(i<3 || i>20)
      continue;
    fputs(str, fp[i]);
    for(j=0; j<i; j++)
      histogram[str[j]-'A'].count++;
  } while(!feof(fp[0]));

  for(i=3; i<21; i++)
    fclose(fp[i]);
  fclose(fp[0]);

  for(i=0; i<26; i++)
    histogram[i].letter=(char)(i+'A');
  qsort(histogram, 26, sizeof(hist_t), histCmp);
  fp[0]=fopen("histogram", "w");
  panicif(fp[0]==NULL, "histogram");
  for(i=0; i<26; i++)
   putc(histogram[i].letter, fp[0]);
  fclose(fp[0]);

  exit(0);
}
