/*

 File:      findssn.c
 Author:    Graham Toal <gtoal@utpa.edu>
 Copyright: University of Texas Pan American
 Created:   200511150948

 Revision history:

            200511150948 Initial version
            200511151307 First release
            200511181514 Fixed state machine to be more strict
                         (it accepted 99-9999999 and 99999-9999 as
                          valid SSN formats)
            200605110853 Added directory traversal rather than one-shot.
*/          static char *version = "200605110853"; /*

 Description:

   This is a text file scanner, which detects strings specifically
   of the exact forms "999-99-9999" or "999999999", which logs
   a summary of the content and count of SSNs in each file
   it scans.  It searches a disk tree hierarchically.

   The program checks that the target is in a valid SSN range,
   to help reduce false positives.  The ratio of valid to invalid
   9 digit numbers is noted, in order to eliminate those
   files with many numbers where some of them would be
   SSN-compatible by chance.  (Still some work to be done on
   automating this better.)

 Limitations:

   Because of the directory traversal, this version is windows-only.
   Later will modify to use <dirent.h> (not available under LCC)
   for Unix systems.  Not sure how to do this on VMS yet.

----------------------------------------------------------
*/

/* Standard C libs */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>

/* non-standard libs for findfirst etc */
#include <io.h>
#include <direct.h>

#ifndef FALSE
#define FALSE (0!=0)
#define TRUE (0==0)
#endif

// This code is a bit hacky in that it uses a lot of
// globals for communication, which ideally would be
// passed as parameters...

static FILE *ssnfile;
static char *progname;
static char *filename;
static int ssncount = 0;
static int boguscount = 0;
static int lineno = 1;
static char current[256];
static int debug = FALSE;
static int maxgroup[1000];


// see http://www.codecomments.com/archive266-2005-3-410229.html,
// http://www.hri.org/info/help/ssninfo.txt,
// and http://www.ssa.gov/employer/highgroup.txt

void init_groups(void) {
  int i;
  for (i = 0; i < 1000; i++) maxgroup[i] = -1;
  // data extracted from http://www.ssa.gov/employer/highgroup.txt using
  // an editor macro
  maxgroup[1] = 4;
  maxgroup[2] = 4;
  maxgroup[3] = 2;
  maxgroup[4] = 6;
  maxgroup[5] = 6;
  maxgroup[6] = 6;
  maxgroup[7] = 6;
  maxgroup[8] = 90;
  maxgroup[9] = 88;
  maxgroup[10] = 90;
  maxgroup[11] = 90;
  maxgroup[12] = 88;
  maxgroup[13] = 88;
  maxgroup[14] = 88;
  maxgroup[15] = 88;
  maxgroup[16] = 88;
  maxgroup[17] = 88;
  maxgroup[18] = 88;
  maxgroup[19] = 88;
  maxgroup[20] = 88;
  maxgroup[21] = 88;
  maxgroup[22] = 88;
  maxgroup[23] = 88;
  maxgroup[24] = 88;
  maxgroup[25] = 88;
  maxgroup[26] = 88;
  maxgroup[27] = 88;
  maxgroup[28] = 88;
  maxgroup[29] = 88;
  maxgroup[30] = 88;
  maxgroup[31] = 88;
  maxgroup[32] = 88;
  maxgroup[33] = 88;
  maxgroup[34] = 88;
  maxgroup[35] = 72;
  maxgroup[36] = 70;
  maxgroup[37] = 70;
  maxgroup[38] = 70;
  maxgroup[39] = 70;
  maxgroup[40] = 8;
  maxgroup[41] = 8;
  maxgroup[42] = 8;
  maxgroup[43] = 8;
  maxgroup[44] = 8;
  maxgroup[45] = 8;
  maxgroup[46] = 8;
  maxgroup[47] = 8;
  maxgroup[48] = 8;
  maxgroup[49] = 8;
  maxgroup[50] = 96;
  maxgroup[51] = 96;
  maxgroup[52] = 96;
  maxgroup[53] = 96;
  maxgroup[54] = 96;
  maxgroup[55] = 96;
  maxgroup[56] = 96;
  maxgroup[57] = 96;
  maxgroup[58] = 94;
  maxgroup[59] = 94;
  maxgroup[60] = 94;
  maxgroup[61] = 94;
  maxgroup[62] = 94;
  maxgroup[63] = 94;
  maxgroup[64] = 94;
  maxgroup[65] = 94;
  maxgroup[66] = 94;
  maxgroup[67] = 94;
  maxgroup[68] = 94;
  maxgroup[69] = 94;
  maxgroup[70] = 94;
  maxgroup[71] = 94;
  maxgroup[72] = 94;
  maxgroup[73] = 94;
  maxgroup[74] = 94;
  maxgroup[75] = 94;
  maxgroup[76] = 94;
  maxgroup[77] = 94;
  maxgroup[78] = 94;
  maxgroup[79] = 94;
  maxgroup[80] = 94;
  maxgroup[81] = 94;
  maxgroup[82] = 94;
  maxgroup[83] = 94;
  maxgroup[84] = 94;
  maxgroup[85] = 94;
  maxgroup[86] = 94;
  maxgroup[87] = 94;
  maxgroup[88] = 94;
  maxgroup[89] = 94;
  maxgroup[90] = 94;
  maxgroup[91] = 94;
  maxgroup[92] = 94;
  maxgroup[93] = 94;
  maxgroup[94] = 94;
  maxgroup[95] = 94;
  maxgroup[96] = 94;
  maxgroup[97] = 94;
  maxgroup[98] = 94;
  maxgroup[99] = 94;
  maxgroup[100] = 94;
  maxgroup[101] = 94;
  maxgroup[102] = 94;
  maxgroup[103] = 94;
  maxgroup[104] = 94;
  maxgroup[105] = 94;
  maxgroup[106] = 94;
  maxgroup[107] = 94;
  maxgroup[108] = 94;
  maxgroup[109] = 94;
  maxgroup[110] = 94;
  maxgroup[111] = 94;
  maxgroup[112] = 94;
  maxgroup[113] = 94;
  maxgroup[114] = 94;
  maxgroup[115] = 94;
  maxgroup[116] = 94;
  maxgroup[117] = 94;
  maxgroup[118] = 94;
  maxgroup[119] = 94;
  maxgroup[120] = 94;
  maxgroup[121] = 94;
  maxgroup[122] = 94;
  maxgroup[123] = 94;
  maxgroup[124] = 94;
  maxgroup[125] = 94;
  maxgroup[126] = 94;
  maxgroup[127] = 94;
  maxgroup[128] = 94;
  maxgroup[129] = 94;
  maxgroup[130] = 94;
  maxgroup[131] = 94;
  maxgroup[132] = 94;
  maxgroup[133] = 94;
  maxgroup[134] = 94;
  maxgroup[135] = 17;
  maxgroup[136] = 17;
  maxgroup[137] = 17;
  maxgroup[138] = 17;
  maxgroup[139] = 17;
  maxgroup[140] = 17;
  maxgroup[141] = 17;
  maxgroup[142] = 17;
  maxgroup[143] = 15;
  maxgroup[144] = 15;
  maxgroup[145] = 15;
  maxgroup[146] = 15;
  maxgroup[147] = 15;
  maxgroup[148] = 15;
  maxgroup[149] = 15;
  maxgroup[150] = 15;
  maxgroup[151] = 15;
  maxgroup[152] = 15;
  maxgroup[153] = 15;
  maxgroup[154] = 15;
  maxgroup[155] = 15;
  maxgroup[156] = 15;
  maxgroup[157] = 15;
  maxgroup[158] = 15;
  maxgroup[159] = 82;
  maxgroup[160] = 82;
  maxgroup[161] = 82;
  maxgroup[162] = 82;
  maxgroup[163] = 82;
  maxgroup[164] = 82;
  maxgroup[165] = 82;
  maxgroup[166] = 82;
  maxgroup[167] = 82;
  maxgroup[168] = 82;
  maxgroup[169] = 82;
  maxgroup[170] = 82;
  maxgroup[171] = 82;
  maxgroup[172] = 82;
  maxgroup[173] = 82;
  maxgroup[174] = 82;
  maxgroup[175] = 82;
  maxgroup[176] = 82;
  maxgroup[177] = 82;
  maxgroup[178] = 82;
  maxgroup[179] = 82;
  maxgroup[180] = 82;
  maxgroup[181] = 82;
  maxgroup[182] = 82;
  maxgroup[183] = 82;
  maxgroup[184] = 82;
  maxgroup[185] = 82;
  maxgroup[186] = 82;
  maxgroup[187] = 82;
  maxgroup[188] = 82;
  maxgroup[189] = 82;
  maxgroup[190] = 82;
  maxgroup[191] = 82;
  maxgroup[192] = 82;
  maxgroup[193] = 82;
  maxgroup[194] = 82;
  maxgroup[195] = 82;
  maxgroup[196] = 82;
  maxgroup[197] = 82;
  maxgroup[198] = 82;
  maxgroup[199] = 82;
  maxgroup[200] = 82;
  maxgroup[201] = 82;
  maxgroup[202] = 82;
  maxgroup[203] = 82;
  maxgroup[204] = 82;
  maxgroup[205] = 82;
  maxgroup[206] = 82;
  maxgroup[207] = 82;
  maxgroup[208] = 80;
  maxgroup[209] = 80;
  maxgroup[210] = 80;
  maxgroup[211] = 80;
  maxgroup[212] = 75;
  maxgroup[213] = 75;
  maxgroup[214] = 75;
  maxgroup[215] = 75;
  maxgroup[216] = 75;
  maxgroup[217] = 73;
  maxgroup[218] = 73;
  maxgroup[219] = 73;
  maxgroup[220] = 73;
  maxgroup[221] = 4;
  maxgroup[222] = 2;
  maxgroup[223] = 99;
  maxgroup[224] = 99;
  maxgroup[225] = 99;
  maxgroup[226] = 99;
  maxgroup[227] = 99;
  maxgroup[228] = 99;
  maxgroup[229] = 99;
  maxgroup[230] = 99;
  maxgroup[231] = 99;
  maxgroup[232] = 53;
  maxgroup[233] = 53;
  maxgroup[234] = 51;
  maxgroup[235] = 51;
  maxgroup[236] = 51;
  maxgroup[237] = 99;
  maxgroup[238] = 99;
  maxgroup[239] = 99;
  maxgroup[240] = 99;
  maxgroup[241] = 99;
  maxgroup[242] = 99;
  maxgroup[243] = 99;
  maxgroup[244] = 99;
  maxgroup[245] = 99;
  maxgroup[246] = 99;
  maxgroup[247] = 99;
  maxgroup[248] = 99;
  maxgroup[249] = 99;
  maxgroup[250] = 99;
  maxgroup[251] = 99;
  maxgroup[252] = 99;
  maxgroup[253] = 99;
  maxgroup[254] = 99;
  maxgroup[255] = 99;
  maxgroup[256] = 99;
  maxgroup[257] = 99;
  maxgroup[258] = 99;
  maxgroup[259] = 99;
  maxgroup[260] = 99;
  maxgroup[261] = 99;
  maxgroup[262] = 99;
  maxgroup[263] = 99;
  maxgroup[264] = 99;
  maxgroup[265] = 99;
  maxgroup[266] = 99;
  maxgroup[267] = 99;
  maxgroup[268] = 11;
  maxgroup[269] = 11;
  maxgroup[270] = 11;
  maxgroup[271] = 11;
  maxgroup[272] = 11;
  maxgroup[273] = 11;
  maxgroup[274] = 11;
  maxgroup[275] = 11;
  maxgroup[276] = 11;
  maxgroup[277] = 11;
  maxgroup[278] = 11;
  maxgroup[279] = 11;
  maxgroup[280] = 11;
  maxgroup[281] = 11;
  maxgroup[282] = 11;
  maxgroup[283] = 11;
  maxgroup[284] = 11;
  maxgroup[285] = 11;
  maxgroup[286] = 11;
  maxgroup[287] = 11;
  maxgroup[288] = 11;
  maxgroup[289] = 11;
  maxgroup[290] = 11;
  maxgroup[291] = 11;
  maxgroup[292] = 11;
  maxgroup[293] = 11;
  maxgroup[294] = 11;
  maxgroup[295] = 11;
  maxgroup[296] = 11;
  maxgroup[297] = 8;
  maxgroup[298] = 8;
  maxgroup[299] = 8;
  maxgroup[300] = 8;
  maxgroup[301] = 8;
  maxgroup[302] = 8;
  maxgroup[303] = 31;
  maxgroup[304] = 31;
  maxgroup[305] = 29;
  maxgroup[306] = 29;
  maxgroup[307] = 29;
  maxgroup[308] = 29;
  maxgroup[309] = 29;
  maxgroup[310] = 29;
  maxgroup[311] = 29;
  maxgroup[312] = 29;
  maxgroup[313] = 29;
  maxgroup[314] = 29;
  maxgroup[315] = 29;
  maxgroup[316] = 29;
  maxgroup[317] = 29;
  maxgroup[318] = 6;
  maxgroup[319] = 4;
  maxgroup[320] = 4;
  maxgroup[321] = 4;
  maxgroup[322] = 4;
  maxgroup[323] = 4;
  maxgroup[324] = 4;
  maxgroup[325] = 4;
  maxgroup[326] = 4;
  maxgroup[327] = 4;
  maxgroup[328] = 4;
  maxgroup[329] = 4;
  maxgroup[330] = 4;
  maxgroup[331] = 4;
  maxgroup[332] = 4;
  maxgroup[333] = 4;
  maxgroup[334] = 4;
  maxgroup[335] = 4;
  maxgroup[336] = 4;
  maxgroup[337] = 4;
  maxgroup[338] = 4;
  maxgroup[339] = 4;
  maxgroup[340] = 4;
  maxgroup[341] = 4;
  maxgroup[342] = 4;
  maxgroup[343] = 4;
  maxgroup[344] = 4;
  maxgroup[345] = 4;
  maxgroup[346] = 4;
  maxgroup[347] = 4;
  maxgroup[348] = 4;
  maxgroup[349] = 4;
  maxgroup[350] = 4;
  maxgroup[351] = 4;
  maxgroup[352] = 4;
  maxgroup[353] = 4;
  maxgroup[354] = 4;
  maxgroup[355] = 4;
  maxgroup[356] = 4;
  maxgroup[357] = 4;
  maxgroup[358] = 4;
  maxgroup[359] = 4;
  maxgroup[360] = 4;
  maxgroup[361] = 4;
  maxgroup[362] = 33;
  maxgroup[363] = 33;
  maxgroup[364] = 33;
  maxgroup[365] = 33;
  maxgroup[366] = 33;
  maxgroup[367] = 33;
  maxgroup[368] = 33;
  maxgroup[369] = 31;
  maxgroup[370] = 31;
  maxgroup[371] = 31;
  maxgroup[372] = 31;
  maxgroup[373] = 31;
  maxgroup[374] = 31;
  maxgroup[375] = 31;
  maxgroup[376] = 31;
  maxgroup[377] = 31;
  maxgroup[378] = 31;
  maxgroup[379] = 31;
  maxgroup[380] = 31;
  maxgroup[381] = 31;
  maxgroup[382] = 31;
  maxgroup[383] = 31;
  maxgroup[384] = 31;
  maxgroup[385] = 31;
  maxgroup[386] = 31;
  maxgroup[387] = 27;
  maxgroup[388] = 27;
  maxgroup[389] = 27;
  maxgroup[390] = 27;
  maxgroup[391] = 27;
  maxgroup[392] = 27;
  maxgroup[393] = 27;
  maxgroup[394] = 27;
  maxgroup[395] = 25;
  maxgroup[396] = 25;
  maxgroup[397] = 25;
  maxgroup[398] = 25;
  maxgroup[399] = 25;
  maxgroup[400] = 65;
  maxgroup[401] = 65;
  maxgroup[402] = 65;
  maxgroup[403] = 65;
  maxgroup[404] = 65;
  maxgroup[405] = 65;
  maxgroup[406] = 63;
  maxgroup[407] = 63;
  maxgroup[408] = 99;
  maxgroup[409] = 99;
  maxgroup[410] = 99;
  maxgroup[411] = 99;
  maxgroup[412] = 99;
  maxgroup[413] = 99;
  maxgroup[414] = 99;
  maxgroup[415] = 99;
  maxgroup[416] = 59;
  maxgroup[417] = 59;
  maxgroup[418] = 59;
  maxgroup[419] = 59;
  maxgroup[420] = 59;
  maxgroup[421] = 59;
  maxgroup[422] = 59;
  maxgroup[423] = 59;
  maxgroup[424] = 57;
  maxgroup[425] = 99;
  maxgroup[426] = 99;
  maxgroup[427] = 97;
  maxgroup[428] = 97;
  maxgroup[429] = 99;
  maxgroup[430] = 99;
  maxgroup[431] = 99;
  maxgroup[432] = 99;
  maxgroup[433] = 99;
  maxgroup[434] = 99;
  maxgroup[435] = 99;
  maxgroup[436] = 99;
  maxgroup[437] = 99;
  maxgroup[438] = 99;
  maxgroup[439] = 99;
  maxgroup[440] = 21;
  maxgroup[441] = 21;
  maxgroup[442] = 21;
  maxgroup[443] = 21;
  maxgroup[444] = 21;
  maxgroup[445] = 21;
  maxgroup[446] = 21;
  maxgroup[447] = 19;
  maxgroup[448] = 19;
  maxgroup[449] = 99;
  maxgroup[450] = 99;
  maxgroup[451] = 99;
  maxgroup[452] = 99;
  maxgroup[453] = 99;
  maxgroup[454] = 99;
  maxgroup[455] = 99;
  maxgroup[456] = 99;
  maxgroup[457] = 99;
  maxgroup[458] = 99;
  maxgroup[459] = 99;
  maxgroup[460] = 99;
  maxgroup[461] = 99;
  maxgroup[462] = 99;
  maxgroup[463] = 99;
  maxgroup[464] = 99;
  maxgroup[465] = 99;
  maxgroup[466] = 99;
  maxgroup[467] = 99;
  maxgroup[468] = 47;
  maxgroup[469] = 47;
  maxgroup[470] = 47;
  maxgroup[471] = 47;
  maxgroup[472] = 47;
  maxgroup[473] = 47;
  maxgroup[474] = 47;
  maxgroup[475] = 47;
  maxgroup[476] = 47;
  maxgroup[477] = 47;
  maxgroup[478] = 37;
  maxgroup[479] = 35;
  maxgroup[480] = 35;
  maxgroup[481] = 35;
  maxgroup[482] = 35;
  maxgroup[483] = 35;
  maxgroup[484] = 35;
  maxgroup[485] = 35;
  maxgroup[486] = 23;
  maxgroup[487] = 23;
  maxgroup[488] = 23;
  maxgroup[489] = 23;
  maxgroup[490] = 23;
  maxgroup[491] = 23;
  maxgroup[492] = 23;
  maxgroup[493] = 23;
  maxgroup[494] = 23;
  maxgroup[495] = 23;
  maxgroup[496] = 23;
  maxgroup[497] = 23;
  maxgroup[498] = 23;
  maxgroup[499] = 21;
  maxgroup[500] = 21;
  maxgroup[501] = 31;
  maxgroup[502] = 31;
  maxgroup[503] = 39;
  maxgroup[504] = 37;
  maxgroup[505] = 51;
  maxgroup[506] = 49;
  maxgroup[507] = 49;
  maxgroup[508] = 49;
  maxgroup[509] = 25;
  maxgroup[510] = 25;
  maxgroup[511] = 25;
  maxgroup[512] = 25;
  maxgroup[513] = 25;
  maxgroup[514] = 25;
  maxgroup[515] = 25;
  maxgroup[516] = 43;
  maxgroup[517] = 41;
  maxgroup[518] = 73;
  maxgroup[519] = 73;
  maxgroup[520] = 51;
  maxgroup[521] = 99;
  maxgroup[522] = 99;
  maxgroup[523] = 99;
  maxgroup[524] = 99;
  maxgroup[525] = 99;
  maxgroup[526] = 99;
  maxgroup[527] = 99;
  maxgroup[528] = 99;
  maxgroup[529] = 99;
  maxgroup[530] = 99;
  maxgroup[531] = 59;
  maxgroup[532] = 59;
  maxgroup[533] = 59;
  maxgroup[534] = 57;
  maxgroup[535] = 57;
  maxgroup[536] = 57;
  maxgroup[537] = 57;
  maxgroup[538] = 57;
  maxgroup[539] = 57;
  maxgroup[540] = 71;
  maxgroup[541] = 71;
  maxgroup[542] = 69;
  maxgroup[543] = 69;
  maxgroup[544] = 69;
  maxgroup[545] = 99;
  maxgroup[546] = 99;
  maxgroup[547] = 99;
  maxgroup[548] = 99;
  maxgroup[549] = 99;
  maxgroup[550] = 99;
  maxgroup[551] = 99;
  maxgroup[552] = 99;
  maxgroup[553] = 99;
  maxgroup[554] = 99;
  maxgroup[555] = 99;
  maxgroup[556] = 99;
  maxgroup[557] = 99;
  maxgroup[558] = 99;
  maxgroup[559] = 99;
  maxgroup[560] = 99;
  maxgroup[561] = 99;
  maxgroup[562] = 99;
  maxgroup[563] = 99;
  maxgroup[564] = 99;
  maxgroup[565] = 99;
  maxgroup[566] = 99;
  maxgroup[567] = 99;
  maxgroup[568] = 99;
  maxgroup[569] = 99;
  maxgroup[570] = 99;
  maxgroup[571] = 99;
  maxgroup[572] = 99;
  maxgroup[573] = 99;
  maxgroup[574] = 45;
  maxgroup[575] = 99;
  maxgroup[576] = 99;
  maxgroup[577] = 41;
  maxgroup[578] = 41;
  maxgroup[579] = 41;
  maxgroup[580] = 37;
  maxgroup[581] = 99;
  maxgroup[582] = 99;
  maxgroup[583] = 99;
  maxgroup[584] = 99;
  maxgroup[585] = 99;
  maxgroup[586] = 57;
  maxgroup[587] = 97;
  maxgroup[589] = 99;
  maxgroup[590] = 99;
  maxgroup[591] = 99;
  maxgroup[592] = 99;
  maxgroup[593] = 99;
  maxgroup[594] = 99;
  maxgroup[595] = 99;
  maxgroup[596] = 80;
  maxgroup[597] = 80;
  maxgroup[598] = 80;
  maxgroup[599] = 80;
  maxgroup[600] = 99;
  maxgroup[601] = 99;
  maxgroup[602] = 57;
  maxgroup[603] = 57;
  maxgroup[604] = 57;
  maxgroup[605] = 57;
  maxgroup[606] = 57;
  maxgroup[607] = 57;
  maxgroup[608] = 57;
  maxgroup[609] = 55;
  maxgroup[610] = 55;
  maxgroup[611] = 55;
  maxgroup[612] = 55;
  maxgroup[613] = 55;
  maxgroup[614] = 55;
  maxgroup[615] = 55;
  maxgroup[616] = 55;
  maxgroup[617] = 55;
  maxgroup[618] = 55;
  maxgroup[619] = 55;
  maxgroup[620] = 55;
  maxgroup[621] = 55;
  maxgroup[622] = 55;
  maxgroup[623] = 55;
  maxgroup[624] = 55;
  maxgroup[625] = 55;
  maxgroup[626] = 55;
  maxgroup[627] = 2;
  maxgroup[628] = 2;
  maxgroup[629] = 2;
  maxgroup[630] = 2;
  maxgroup[631] = 2;
  maxgroup[632] = 2;
  maxgroup[633] = 2;
  maxgroup[634] = 2;
  maxgroup[635] = 2;
  maxgroup[636] = 2;
  maxgroup[637] = 2;
  maxgroup[638] = 2;
  maxgroup[639] = 2;
  maxgroup[640] = 2;
  maxgroup[641] = 2;
  maxgroup[642] = 2;
  maxgroup[643] = 2;
  maxgroup[644] = 2;
  maxgroup[645] = 98;
  maxgroup[646] = 88;
  maxgroup[647] = 86;
  maxgroup[648] = 40;
  maxgroup[649] = 40;
  maxgroup[650] = 40;
  maxgroup[651] = 40;
  maxgroup[652] = 38;
  maxgroup[653] = 38;
  maxgroup[654] = 22;
  maxgroup[655] = 22;
  maxgroup[656] = 22;
  maxgroup[657] = 22;
  maxgroup[658] = 22;
  maxgroup[659] = 12;
  maxgroup[660] = 12;
  maxgroup[661] = 12;
  maxgroup[662] = 12;
  maxgroup[663] = 10;
  maxgroup[664] = 10;
  maxgroup[665] = 10;
  maxgroup[667] = 30;
  maxgroup[668] = 30;
  maxgroup[669] = 28;
  maxgroup[670] = 28;
  maxgroup[671] = 28;
  maxgroup[672] = 28;
  maxgroup[673] = 28;
  maxgroup[674] = 28;
  maxgroup[675] = 28;
  maxgroup[676] = 10;
  maxgroup[677] = 10;
  maxgroup[678] = 10;
  maxgroup[679] = 9;
  maxgroup[680] = 76;
  maxgroup[681] = 10;
  maxgroup[682] = 9;
  maxgroup[683] = 9;
  maxgroup[684] = 9;
  maxgroup[685] = 9;
  maxgroup[686] = 9;
  maxgroup[687] = 9;
  maxgroup[688] = 9;
  maxgroup[689] = 9;
  maxgroup[690] = 9;
  maxgroup[691] = 3;
  maxgroup[692] = 3;
  maxgroup[693] = 3;
  maxgroup[694] = 3;
  maxgroup[695] = 3;
  maxgroup[696] = 3;
  maxgroup[697] = 3;
  maxgroup[698] = 3;
  maxgroup[699] = 3;
  maxgroup[700] = 18;
  maxgroup[701] = 18;
  maxgroup[702] = 18;
  maxgroup[703] = 18;
  maxgroup[704] = 18;
  maxgroup[705] = 18;
  maxgroup[706] = 18;
  maxgroup[707] = 18;
  maxgroup[708] = 18;
  maxgroup[709] = 18;
  maxgroup[710] = 18;
  maxgroup[711] = 18;
  maxgroup[712] = 18;
  maxgroup[713] = 18;
  maxgroup[714] = 18;
  maxgroup[715] = 18;
  maxgroup[716] = 18;
  maxgroup[717] = 18;
  maxgroup[718] = 18;
  maxgroup[719] = 18;
  maxgroup[720] = 18;
  maxgroup[721] = 18;
  maxgroup[722] = 18;
  maxgroup[723] = 18;
  maxgroup[724] = 28;
  maxgroup[725] = 18;
  maxgroup[726] = 18;
  maxgroup[727] = 10;
  maxgroup[728] = 14;
  maxgroup[729] = 7;
  maxgroup[730] = 7;
  maxgroup[731] = 5;
  maxgroup[732] = 5;
  maxgroup[733] = 5;
  maxgroup[750] = 5;
  maxgroup[751] = 3;
  maxgroup[756] = 1;
  maxgroup[757] = 1;
  maxgroup[758] = 1;
  maxgroup[759] = 1;
  maxgroup[760] = 1;
  maxgroup[761] = 1;
  maxgroup[762] = 1;
  maxgroup[764] = 64;
  maxgroup[765] = 62;
  maxgroup[766] = 48;
  maxgroup[767] = 48;
  maxgroup[768] = 48;
  maxgroup[769] = 48;
  maxgroup[770] = 48;
  maxgroup[771] = 48;
  maxgroup[772] = 48;
}

int illegal_ssn(char *ssn)
{
  // explicitly defined by SSA as invalid?
  // Needs a database lookup plus data from SSA.
  return FALSE;
}

// SSN vetting does not have to be 100%.  It is OK to flag some
// invalid SSNs as legal, as long as we don't flag legal SSNs as illegal.

int validgroup(int area, int group)
{
  int cur, even, under10;
  if (maxgroup[area] < 0) return FALSE;

  cur = maxgroup[area];
  even = ((cur&1) == 0);
  under10 = (cur < 10);

  if (debug) fprintf(stderr, "Our SSN's area is %d and group is %d. "
                             " max group for %d is %d\n",
                             area, group, area, cur);

  if (!even && under10) {
    if (debug) fprintf(stderr, "group is odd and < 10\n");
    // our group must therefore also be odd and < 10
    if (group > cur) return FALSE; // range check
    return ((group&1) != 0) && (group < 10);
  }

  if (even && !under10) {
    if (debug) fprintf(stderr, "group is even and >= 10, "
                               "which also allows odd and < 10\n");
    // our group may be odd and < 10, or even and >= 10
    // first range check:
    if (group > cur) return FALSE; // range check
    return (((group&1) != 0) && (group < 10)) 
        || (((group&1) == 0) && (group >= 10));
  }

  if (even && under10) {
    if (debug) fprintf(stderr, "group is even and < 10, "
                               "which also allows even and >= 10, "
                               "plus odd and < 10\n");
    // only illegal group would be if odd and >= 10  (note reversed logic)
    return (!(((group&1) != 0) && (group >= 10)));
  }

  // group must be odd and >= 10.
  // All groups now allowed, modulo range check if odd && >= 10.
  if (debug) fprintf(stderr, "group is odd and >= 10, which means "
                             "anything goes (but can be range checked "
                             "if our group is also odd)\n");
  if (((group&1) != 0) && (group >= 10) && (group > cur)) return FALSE;
  return TRUE;
}

int validate(char *orig)
{
  // First, remove '-' and make canonical.
  static char ssn[12], area[4], group[3], serial[5];
  int iarea, igroup, iserial;

  if (strlen(orig) > 11) /* INTERNAL ERROR */;
  memmove(ssn, orig, 12);
  if (ssn[3] == '-') memmove(ssn+3, ssn+4, 8);
  if (ssn[5] == '-') memmove(ssn+5, ssn+6, 6);

  if (illegal_ssn(ssn)) return 1;

  area[0] = ssn[0];
  area[1] = ssn[1];
  area[2] = ssn[2];
  area[3] = '\0';

  group[0] = ssn[3];
  group[1] = ssn[4];
  group[2] = '\0';

  serial[0] = ssn[5];
  serial[1] = ssn[6];
  serial[2] = ssn[7];
  serial[3] = ssn[8];
  serial[4] = '\0';

  iarea = atoi(area);
  igroup = atoi(group);
  iserial = atoi(serial);

  if (iserial == 0 || igroup == 0) return 1; /* invalid */

/* no longer needed now we have the table ...
  if (iarea == 0 || 
      iarea == 666 || 
      (729 <= iarea && iarea <= 749) || 
      (764 <= iarea && iarea <= 999)) return 1;
 */
  if (!validgroup(iarea, igroup)) return 1;

  // Probably more I can do here, but this is working well enough.

  return 0; /* valid */
}

void ssn_stats(char *ssn, int lineno)
{
  int i;
  ssncount++;
  boguscount += (i = validate(ssn));
  // no more than 10 to be printed per file.  Some files may have 100,000+ ...
  if (ssncount < 10) fprintf(stdout, "\"%s\", line %d: %s%s\n", filename, lineno, ssn,
    i == 0 ? "" : " (Bogus?)");

  // Summarizing needs to be improved.  Probably better to output
  // to a buffer and then throw the buffer away entirely if we decide
  // that the file didn't contain SSNs.

  // Also we want to parameterize the command line so that we can
  // either look for single SSNs or more, or only files with more
  // than some given parameter of SSNs (eg 1000) to find proper
  // databases, rather than odd emails etc.

  // Need to look at ratio of good to bogus too, when the numbers
  // are large.

}

/* Main procedure to scan input file and identify sequences of
   digits which look like SSNs, i.e. nnn-nn-nnnn or nnnnnnnnn
   *without* any extra digits before or after. */

void state_machine(int c)
{

#define STATE_ANY 0
#define STATE_DIGIT1 1
#define STATE_DIGIT2 2
#define STATE_DIGIT3 3
#define STATE_DASH1  4
#define STATE_DIGIT4 5
#define STATE_DIGIT5 6
#define STATE_DASHDIGIT4 15
#define STATE_DASHDIGIT5 16
#define STATE_DASH2  7
#define STATE_DIGIT6 8
#define STATE_DIGIT7 9
#define STATE_DIGIT8 10
#define STATE_DIGIT9 11
#define STATE_GOBBLE_DIGITS 12

  static int state = STATE_ANY;

  static char buffer[12] = { 0 }; // 9 digits, 2 dashes, and trailing NUL.
  static char *ssn = buffer;

  if (c == '\n') lineno++; // if needed for diagnostics

  if (c != EOF) switch (state) {
  case STATE_ANY:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT1; return;}
    /*putchar(c);*/
    return;
  case STATE_DIGIT1:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT2; return;}
    break;
  case STATE_DIGIT2:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT3; return;}
    break;
  case STATE_DIGIT3:
    if (c == '-') {*ssn++ = c; state = STATE_DASH1; return;}
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT4; return;}
    break;
  case STATE_DASH1:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DASHDIGIT4; return;}
    break;
  case STATE_DASHDIGIT4:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DASHDIGIT5; return;}
    break;
  case STATE_DIGIT4:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT5; return;}
    break;
  case STATE_DASHDIGIT5:
    if (c == '-') {*ssn++ = c; state = STATE_DASH2; return;}
    break;
  case STATE_DIGIT5:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT6; return;}
    break;
  case STATE_DASH2:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT6; return;}
    break;
  case STATE_DIGIT6:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT7; return;}
    break;
  case STATE_DIGIT7:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT8; return;}
    break;
  case STATE_DIGIT8:
    if (isdigit(c)) {*ssn++ = c; state = STATE_DIGIT9; return;}
    break;
  case STATE_DIGIT9:
    if (isdigit(c)) {
      // if we have another digit, it was not an SSN
      state = STATE_GOBBLE_DIGITS;
      *ssn = '\0'; /*fprintf(stdout, "%s", buffer);*/ ssn = buffer; fflush(stdout);
    } else {
      // otherwise it was an SSN, so log it.
      state = STATE_ANY;
      *ssn = '\0';
      ssn_stats(buffer, lineno);
      // fprintf(stdout, "%s\n", convert_ssn(buffer, lineno, fab, rab));
      ssn = buffer; fflush(stdout);
    }
    // Fall through
  case STATE_GOBBLE_DIGITS:
    // we want to explicitly discard trailing digits after a 9-digit sequence
    // rather than switch to STATE_ANY because otherwise an 18-digit sequence would
    // discard the first 9 digits then accept the next 9 as an SSN.
    if (!isdigit(c)) state = STATE_ANY; // end of a number sequence
    /*putchar(c);*/
    return;
  }
  // All 'break' statements above imply flush() and state = STATE_ANY ...
  *ssn = '\0'; /*fprintf(stdout, "%s", buffer);*/ ssn = buffer; fflush(stdout);
  /*if (c != EOF) putchar(c);*/
  state = STATE_ANY;
}

void DoOneFile(void) {

  // potentially could eliminate .dll, .bmp, .exe etc at this point.
  // for now, search *everything*.  Maximal paranoia mode.

  ssnfile = fopen(filename, "rb");
  if  (ssnfile == NULL) {
    if (errno != EACCES) {
      fprintf(stderr, "%s: %s - %s\n", progname, filename, strerror(errno));
    }
    return; // may be locked etc.  Not fatal if cannot open every file...
            // (also, directories give 'permission denied')
  }
  ssncount = 0; boguscount = 0; lineno = 1;
  if (debug) fprintf(stdout, "Processing %s\n", filename);
  /*************** MAIN LOOP WITHIN A FILE ***************/
  for (;;) {
    int c = fgetc(ssnfile);
    if (c == '\r') continue;
    state_machine(c);
    if (c == EOF) break;
  }
  /***************   END OF ONE FILE LOOP  ***************/
  if (ssncount) fprintf(stdout, "\"%s\": %d ssn-like strings detected of which %d may be bogus\n\n",
    filename, ssncount, boguscount);
  // See earlier comments on better summarizing
  fclose(ssnfile);
}

void ForAllFiles(void) {
  struct _finddata_t ft;
  long int lf;
  int bMore = FALSE;

  lf = _findfirst("*.*", &ft);
  bMore = (lf != -1L);

  while (bMore) {
    if  (( ft.name[0] != '.') && (ft.attrib & _A_SUBDIR)) {
      _chdir(ft.name);
      ForAllFiles();
      _chdir("..");
    }
    bMore = !_findnext(lf, &ft);
  }

  lf = _findfirst( "*.*", &ft);
  bMore = (lf != -1L);
  while (bMore) {
    if  ( ft.name[0] != '.')  {
      _getcwd(current, sizeof(current)-1);
      strcat(current, "\\");
      strcat(current, ft.name);
      filename = current; // another nasty global
      DoOneFile(); // BUT WOULD PREFER NOT TO IF IT IS A DIRECTORY!!!
                   // (relying on dir giving EACCES in WinXX)
    }
    bMore = !_findnext(lf, &ft) ;
  }
}

int main (int argc, char **argv)
{
   int c;
   char *s;

   progname = (((s=strrchr(argv[0], '/')) != NULL ? s+1:
               (s=strrchr(argv[0], '\\')) != NULL ? s+1:
               argv[0]));

   if (argc != 2) {
     fprintf(stderr, "syntax: %s rootdir\n", progname);
     exit(EXIT_FAILURE);
   }

   init_groups();
   /****************** LOOP OVER ALL FILES ****************/
   _getcwd(current, sizeof(current)-1);
   _chdir(argv[1]); // if dir as a param, do this
   ForAllFiles();
   _chdir(current);
   /************   END OF LOOP OVER ALL FILES   ************/

   exit(EXIT_SUCCESS);
   return(EXIT_FAILURE);
}
