/* Analyse CSV file, print out most likely files with SSNs
File format is:
known9,unknown9,invalid9,known11,unknown11,invalid11,filename
0,1,0,0,0,0,"c:\DELL\Drivers\R59444\README.HTM"
*/
/* Standard C libs */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#ifndef FALSE
#define FALSE (0!=0)
#define TRUE (0==0)
#endif
#define MAXLINE 1024
int debug = FALSE;
void gobble(FILE *f)
{
int c, i;
fflush(stderr);
for (i = 0; i < 60; i++) {
c = fgetc(f);
if (c == EOF) break;
if (c == '\n') break;
fputc(c, stderr);
}
fputc('\n', stderr);
fflush(stderr);
}
char *staticGetCSVItem(FILE *f)
{
static char item[MAXLINE], *s;
int c;
if (debug) fprintf(stderr, "getcsv:\n");
s = item;
for (;;) {
c = fgetc(f);
if ((c == '\n') || (c == EOF)) {
if (debug) fprintf(stderr, "Returning NULL\n");
return(NULL);
}
if (c != ' ') break;
}
if (c == '"') {
// read string-delimited CSV item up to and including comma (or newline?)
for (;;) {
c = fgetc(f);
if (c == '"') break;
if (c == EOF) break;
if (c == '\n') {
ungetc(c, f); break;
}
*s++ = c;
}
if (c == '\n') {
fprintf(stderr, "Warning: end of quoted string missing: \n"); gobble(f);
exit(0);
}
*s = '\0';
c = fgetc(f);
if (c == ',') {
} else if (c == '\n') {
ungetc(c, f);
} else if (c == '"') {
fprintf(stderr, "Not implemented: string-delimited CSV items containing two double-quotes\n");
exit(0);
} else if (c == EOF) {
fprintf(stderr, "Unexpected end of file in quoted string\n");
exit(0);
} else {
fprintf(stderr, "Unexpected text at: "); gobble(f);
exit(0);
}
} else {
// read all text up to and including comma
for (;;) {
if (c == ',') break;
if (c == '\n') {
ungetc(c, f); break;
}
*s++ = c;
c = fgetc(f);
}
*s = '\0';
}
if (debug) fprintf(stderr, "Returning '%s'\n", item);
return(item);
}
void fassert(int test)
{
if (!test) {
fprintf(stderr, "* Input csv file is not in expected format:\n");
fprintf(stderr, "known9,unknown9,invalid9,known11,unknown11,invalid11,filename\n");
exit(EXIT_FAILURE);
}
}
int main(int argc, char **argv) {
char *s;
int lineno = 0;
int i, j, num;
int known9, unknown9, invalid9, known11, unknown11, invalid11;
char *filename;
if (argc != 1) {
fprintf(stderr, "syntax: analyse < csvfile\n");
exit(EXIT_FAILURE);
}
// known9,unknown9,invalid9,known11,unknown11,invalid11,filename
s = staticGetCSVItem(stdin); fassert(strcmp(s, "known9") == 0);
s = staticGetCSVItem(stdin); fassert(strcmp(s, "unknown9") == 0);
s = staticGetCSVItem(stdin); fassert(strcmp(s, "invalid9") == 0);
s = staticGetCSVItem(stdin); fassert(strcmp(s, "known11") == 0);
s = staticGetCSVItem(stdin); fassert(strcmp(s, "unknown11") == 0);
s = staticGetCSVItem(stdin); fassert(strcmp(s, "invalid11") == 0);
s = staticGetCSVItem(stdin); fassert(strcmp(s, "filename") == 0);
s = staticGetCSVItem(stdin); fassert(s == NULL);
for (;;) {
float score = 0.0;
lineno += 1;
s = staticGetCSVItem(stdin); if (s == NULL) break; // end of file
known9 = atoi(s);
s = staticGetCSVItem(stdin);
unknown9 = atoi(s);
s = staticGetCSVItem(stdin);
invalid9 = atoi(s);
s = staticGetCSVItem(stdin);
known11 = atoi(s);
s = staticGetCSVItem(stdin);
unknown11 = atoi(s);
s = staticGetCSVItem(stdin);
invalid11 = atoi(s);
filename = staticGetCSVItem(stdin);
s = staticGetCSVItem(stdin); fassert(s == NULL);
if (debug) fprintf(stderr, "\n");
// Now we compute the score!
// Items in fixed SSN format 123-45-6789
score += (float)known11;
score += (float)unknown11 * 0.5;
score += (float)invalid11 * 0.1;
// 9-digit strings
if (unknown9 + invalid9 == 0) {
score += (float)known9;
} else {
float this = 0.0, ratio = 0.0;
this = (float)known9 / ((float)unknown9 + (float)invalid9);
ratio = (float)unknown9 / (float)known9;
if ((3.0 <= ratio) && (ratio <= 12.0)) this /= 2.0;
ratio = (float)invalid9 / (float)known9;
if ((3.0 <= ratio) && (ratio <= 10.0)) this /= 2.0;
score += this;
}
if (score > 4.0) fprintf(stdout, "%d,%d,%d,%d,%d,%d,%d,\"%s\"\n",
(int)(score * 100.0),
known9, unknown9, invalid9, known11, unknown11, invalid11,
filename);
}
fprintf(stderr, "Exited successfully at line %d\n", lineno);
exit(EXIT_SUCCESS);
return(EXIT_FAILURE);
}