/* Analyse CSV file, print out most likely files with SSNs

   File format is:
	known9,unknown9,invalid9,known11,unknown11,invalid11,filename
	0,1,0,0,0,0,"c:\DELL\Drivers\R59444\README.HTM"

 */

/* Standard C libs */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>

#ifndef FALSE
#define FALSE (0!=0)
#define TRUE (0==0)
#endif

#define MAXLINE 1024

int debug = FALSE;


void gobble(FILE *f)
{
  int c, i;
  fflush(stderr);
  for (i = 0; i < 60; i++) {
    c = fgetc(f);
    if (c == EOF) break;
    if (c == '\n') break;
    fputc(c, stderr);
  }
  fputc('\n', stderr);
  fflush(stderr);
}


char *staticGetCSVItem(FILE *f)
{
  static char item[MAXLINE], *s;
  int c;
  if (debug) fprintf(stderr, "getcsv:\n");
  s = item;
  for (;;) {
    c = fgetc(f);
    if ((c == '\n') || (c == EOF)) {
      if (debug) fprintf(stderr, "Returning NULL\n");
      return(NULL);
    }
    if (c != ' ') break;
  }
  if (c == '"') {
    // read string-delimited CSV item up to and including comma (or newline?)
    for (;;) {
      c = fgetc(f);
      if (c == '"') break;
      if (c == EOF) break;
      if (c == '\n') {
        ungetc(c, f); break;
      }
      *s++ = c;
    }
    if (c == '\n') {
      fprintf(stderr, "Warning: end of quoted string missing: \n"); gobble(f);
      exit(0);
    }
    *s = '\0';
    c = fgetc(f);
    if (c == ',') {
    } else if (c == '\n') {
      ungetc(c, f);
    } else if (c == '"') {
      fprintf(stderr, "Not implemented: string-delimited CSV items containing two double-quotes\n");
      exit(0);
    } else if (c == EOF) {
      fprintf(stderr, "Unexpected end of file in quoted string\n");
      exit(0);
    } else {
      fprintf(stderr, "Unexpected text at: "); gobble(f);
      exit(0);
    }
  } else {
    // read all text up to and including comma
    for (;;) {
      if (c == ',') break;
      if (c == '\n') {
        ungetc(c, f); break;
      }
      *s++ = c;
      c = fgetc(f);
    }
    *s = '\0';
  }
  if (debug) fprintf(stderr, "Returning '%s'\n", item);
  return(item);
}

void fassert(int test)
{
  if (!test) {
    fprintf(stderr, "* Input csv file is not in expected format:\n");
    fprintf(stderr, "known9,unknown9,invalid9,known11,unknown11,invalid11,filename\n");
    exit(EXIT_FAILURE);
  }
}

int main(int argc, char **argv) {
  char *s;
  int lineno = 0;
  int i, j, num;
  int known9, unknown9, invalid9, known11, unknown11, invalid11;
  char *filename;

  if (argc != 1) {
    fprintf(stderr, "syntax: analyse < csvfile\n");
    exit(EXIT_FAILURE);
  }

  // known9,unknown9,invalid9,known11,unknown11,invalid11,filename
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "known9") == 0);
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "unknown9") == 0);
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "invalid9") == 0);
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "known11") == 0);
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "unknown11") == 0);
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "invalid11") == 0);
  s = staticGetCSVItem(stdin); fassert(strcmp(s, "filename") == 0);
  s = staticGetCSVItem(stdin); fassert(s == NULL);

  for (;;) {
    float score = 0.0;

    lineno += 1;
    s = staticGetCSVItem(stdin); if (s == NULL) break; // end of file
    known9 = atoi(s);
    s = staticGetCSVItem(stdin);
    unknown9 = atoi(s);
    s = staticGetCSVItem(stdin); 
    invalid9 = atoi(s);
    s = staticGetCSVItem(stdin); 
    known11 = atoi(s);
    s = staticGetCSVItem(stdin); 
    unknown11 = atoi(s);
    s = staticGetCSVItem(stdin); 
    invalid11 = atoi(s);
    filename = staticGetCSVItem(stdin); 
    s = staticGetCSVItem(stdin); fassert(s == NULL);
    if (debug) fprintf(stderr, "\n");

    // Now we compute the score!

    // Items in fixed SSN format 123-45-6789
    score += (float)known11;
    score += (float)unknown11 * 0.5;
    score += (float)invalid11 * 0.1;

    // 9-digit strings
    if (unknown9 + invalid9 == 0) {
      score += (float)known9;
    } else {
      float this = 0.0, ratio = 0.0;
      this = (float)known9 / ((float)unknown9 + (float)invalid9); 
      ratio = (float)unknown9 / (float)known9;
      if ((3.0 <= ratio) && (ratio <= 12.0)) this /= 2.0; 
      ratio = (float)invalid9 / (float)known9;
      if ((3.0 <= ratio) && (ratio <= 10.0)) this /= 2.0; 
      score += this;
    }

    if (score > 4.0) fprintf(stdout, "%d,%d,%d,%d,%d,%d,%d,\"%s\"\n",
            (int)(score * 100.0),
            known9, unknown9, invalid9, known11, unknown11, invalid11,
            filename);
  }

  fprintf(stderr, "Exited successfully at line %d\n", lineno);
  exit(EXIT_SUCCESS);
  return(EXIT_FAILURE);
}