/* Analyse CSV file, print out most likely files with SSNs File format is: known9,unknown9,invalid9,known11,unknown11,invalid11,filename 0,1,0,0,0,0,"c:\DELL\Drivers\R59444\README.HTM" */ /* Standard C libs */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <ctype.h> #include <errno.h> #ifndef FALSE #define FALSE (0!=0) #define TRUE (0==0) #endif #define MAXLINE 1024 int debug = FALSE; void gobble(FILE *f) { int c, i; fflush(stderr); for (i = 0; i < 60; i++) { c = fgetc(f); if (c == EOF) break; if (c == '\n') break; fputc(c, stderr); } fputc('\n', stderr); fflush(stderr); } char *staticGetCSVItem(FILE *f) { static char item[MAXLINE], *s; int c; if (debug) fprintf(stderr, "getcsv:\n"); s = item; for (;;) { c = fgetc(f); if ((c == '\n') || (c == EOF)) { if (debug) fprintf(stderr, "Returning NULL\n"); return(NULL); } if (c != ' ') break; } if (c == '"') { // read string-delimited CSV item up to and including comma (or newline?) for (;;) { c = fgetc(f); if (c == '"') break; if (c == EOF) break; if (c == '\n') { ungetc(c, f); break; } *s++ = c; } if (c == '\n') { fprintf(stderr, "Warning: end of quoted string missing: \n"); gobble(f); exit(0); } *s = '\0'; c = fgetc(f); if (c == ',') { } else if (c == '\n') { ungetc(c, f); } else if (c == '"') { fprintf(stderr, "Not implemented: string-delimited CSV items containing two double-quotes\n"); exit(0); } else if (c == EOF) { fprintf(stderr, "Unexpected end of file in quoted string\n"); exit(0); } else { fprintf(stderr, "Unexpected text at: "); gobble(f); exit(0); } } else { // read all text up to and including comma for (;;) { if (c == ',') break; if (c == '\n') { ungetc(c, f); break; } *s++ = c; c = fgetc(f); } *s = '\0'; } if (debug) fprintf(stderr, "Returning '%s'\n", item); return(item); } void fassert(int test) { if (!test) { fprintf(stderr, "* Input csv file is not in expected format:\n"); fprintf(stderr, "known9,unknown9,invalid9,known11,unknown11,invalid11,filename\n"); exit(EXIT_FAILURE); } } int main(int argc, char **argv) { char *s; int lineno = 0; int i, j, num; int known9, unknown9, invalid9, known11, unknown11, invalid11; char *filename; if (argc != 1) { fprintf(stderr, "syntax: analyse < csvfile\n"); exit(EXIT_FAILURE); } // known9,unknown9,invalid9,known11,unknown11,invalid11,filename s = staticGetCSVItem(stdin); fassert(strcmp(s, "known9") == 0); s = staticGetCSVItem(stdin); fassert(strcmp(s, "unknown9") == 0); s = staticGetCSVItem(stdin); fassert(strcmp(s, "invalid9") == 0); s = staticGetCSVItem(stdin); fassert(strcmp(s, "known11") == 0); s = staticGetCSVItem(stdin); fassert(strcmp(s, "unknown11") == 0); s = staticGetCSVItem(stdin); fassert(strcmp(s, "invalid11") == 0); s = staticGetCSVItem(stdin); fassert(strcmp(s, "filename") == 0); s = staticGetCSVItem(stdin); fassert(s == NULL); for (;;) { float score = 0.0; lineno += 1; s = staticGetCSVItem(stdin); if (s == NULL) break; // end of file known9 = atoi(s); s = staticGetCSVItem(stdin); unknown9 = atoi(s); s = staticGetCSVItem(stdin); invalid9 = atoi(s); s = staticGetCSVItem(stdin); known11 = atoi(s); s = staticGetCSVItem(stdin); unknown11 = atoi(s); s = staticGetCSVItem(stdin); invalid11 = atoi(s); filename = staticGetCSVItem(stdin); s = staticGetCSVItem(stdin); fassert(s == NULL); if (debug) fprintf(stderr, "\n"); // Now we compute the score! // Items in fixed SSN format 123-45-6789 score += (float)known11; score += (float)unknown11 * 0.5; score += (float)invalid11 * 0.1; // 9-digit strings if (unknown9 + invalid9 == 0) { score += (float)known9; } else { float this = 0.0, ratio = 0.0; this = (float)known9 / ((float)unknown9 + (float)invalid9); ratio = (float)unknown9 / (float)known9; if ((3.0 <= ratio) && (ratio <= 12.0)) this /= 2.0; ratio = (float)invalid9 / (float)known9; if ((3.0 <= ratio) && (ratio <= 10.0)) this /= 2.0; score += this; } if (score > 4.0) fprintf(stdout, "%d,%d,%d,%d,%d,%d,%d,\"%s\"\n", (int)(score * 100.0), known9, unknown9, invalid9, known11, unknown11, invalid11, filename); } fprintf(stderr, "Exited successfully at line %d\n", lineno); exit(EXIT_SUCCESS); return(EXIT_FAILURE); }