/* BEGIN: CHECKCOMPAT */ /* ** CHECKCOMPAT -- check for From and To person compatible. ** ** This routine can be supplied on a per-installation basis ** to determine whether a person is allowed to send a message. ** This allows restriction of certain types of internet ** forwarding or registration of users. ** ** If the hosts are found to be incompatible, an error ** message should be given using "usrerr" and an EX_ code ** should be returned. You can also set to->q_status to ** a DSN-style status code. ** ** EF_NO_BODY_RETN can be set in e->e_flags to suppress the ** body during the return-to-sender function; this should be done ** on huge messages. This bit may already be set by the ESMTP ** protocol. ** ** Parameters: ** to -- the person being sent to. ** ** Returns: ** an exit status ** ** Side Effects: ** none (unless you include the usrerr stuff) */ /* NOTES: 0) I am well aware that putting all this code in one file is bad software engineering. I wouldn't normally do so; however I have had a little experience trying to get people to work on sendmail and the general concensus is people who have not hacked it before are very reluctant to start, so I've made it extra easy for them by allowing a single insertion (or swap of one file) and no other changes. Also a *lot* of this code was lifted wholesale out of other projects, and has some graunchy interfaces that should probably be rewritten. Since the whole thing took me about 4 days from start to finish, I don't feel too guilty over this :-) If anyone wants to pay me to rewrite this cleanly and integrate it better with sendmail, so I can afford to take a month off work to do it properly rather than the Memorial day long weekend, then I'll welcome them with open wallet :-) 1) all the places where I am picking values out of thin air here (eg score += 2 for all caps subject lines) could be replaced by something like "score += SCORE("ALLCAPS", 2)" which would look up "ALLCAPS" as a token in a SCORE.db file and allow these to be parameterised cleanly. 2) Currently the same site will count against a score multiple times if it appears in several headers. Should modify the code so that it only counts once. This way headers that went through juno, compuserve, uunet and earthlink will get four scores added, but mail that went through 4 earthlink machines will only receive demerits once. Easiest (hacky) way to do this is to keep a list of every site successfully looked up in DOMAINS.db, and next time that site is enquired of, return 0. This has the advantage that we also get a list of the number of different domains in the headers, which we can use as a further scoring technique, ie if a mail goes through 5 different domains - even if they're not in the spamming domain list - that's got to be a bad sign. (Except for the poor sods in UUCP land) 3) There's a Received: line forgery that is valuable to note which I don't think I'm catching yet: 123.123.123.123 (some.name [234.234.234.234]) where the received from host is a numeric IP and the real numeric IP doesn't match, i.e. someone has done HELO 123.123.123.123 ... */ #ifndef TRUE #define TRUE (0==0) #define FALSE (0!=0) #endif /* Nothing bad happens if a header line is longer than this; we just miss some */ #ifndef MAX_LINE #define MAX_LINE 4097 #endif static STAB *DOMAINS = NULL; static STAB *EMAILADDRS = NULL; static STAB *USERS = NULL; /* BEGIN: file debugf.c */ int suppress_debug = (0!=0); int checked = 0; #ifndef LOGFILE #define LOGFILE "/var/log/sendmail-filter.log" #endif void debugf(char *s, ...) { /* OK, I'm going to nail this damn bug for once and all... */ /* Time to size the string by vfprint'ing it to /dev/null... */ FILE *errfile; int string_length; static char *buff = NULL; va_list ap; if (checked == 0) { checked = 1; errfile = fopen(LOGFILE, "r"); if (errfile == NULL) { /* Only want to log if file exists already... */ suppress_debug = (0==0); } else { fclose(errfile); } } va_start(ap, s); { FILE *nullfile; nullfile = fopen("/dev/null", "w"); if (nullfile == NULL) { errfile = fopen(LOGFILE, "a"); if (errfile != NULL) { fprintf(errfile, "Major error - cannot open /dev/null\n"); fflush(errfile); fclose(errfile); } exit(1); } string_length = vfprintf(nullfile, s, ap); fclose(nullfile); buff = malloc(string_length+1); if (buff != NULL) vsprintf(buff, s, ap); } va_end(ap); /* Suppress logging to file, but still allow in window if present */ if (suppress_debug) { return; } /*fprintf(stderr, "%s", buff);*/ errfile = fopen(LOGFILE, "a"); if (errfile != NULL) { if (buff != NULL) fprintf(errfile, "%s", buff); if (buff == NULL) fprintf(errfile, "debugf: could not allocate %d byte buffer\n", string_length); fflush(errfile); fclose(errfile); } if (buff != NULL) free(buff); buff = NULL; } /* END: file debugf.c */ #ifdef NEVER #define CRUDE_LOGGING #ifdef CRUDE_LOGGING static void privlog(char *s) { /* Cheap & nasty for quick debugging */ FILE *logfile = fopen("/var/log/sendmail-filter.trace", "a"); if ((logfile != NULL) && (s != NULL)) fprintf(logfile, "%s\n", s); if (logfile != NULL) fclose(logfile); } #else #define privlog(s) (void)(tTd(49, 1) ? printf("%s\n", s) : 0) #endif #endif /* BEGIN: file sscanr.c */ #include #define MAX_MATCHES 10 static regmatch_t Matches[MAX_MATCHES+1]; static int myregsub(r, data, Matches, extractpattern, result) regex_t *r; char *data; regmatch_t *Matches; char *extractpattern; char *result; { char *from, *to; int c; from = extractpattern; to = result; for (;;) { c = *from++; if (c == '\0') break; if (c == '\\') { c = *from++; if (c == '\0') return(FALSE); if (isdigit(c)) { c -= '0'; if (Matches[c].rm_so == -1) return(FALSE); strncpy(to, data+Matches[c].rm_so, Matches[c].rm_eo-Matches[c].rm_so); to += Matches[c].rm_eo-Matches[c].rm_so; } else if (c == '\\') { *to++ = '\\'; } else { return(FALSE); } } else *to++ = c; } *to = '\0'; return(TRUE); } /* Returns TRUE or FALSE */ static int sscanr( char *pattern, char *data, char *extractpattern, char *result) { regex_t reg; regex_t *r = ® int i; if (pattern == NULL && data == NULL && extractpattern == NULL && result == NULL) return(FALSE); for (i = 0; i < MAX_MATCHES; i++) { Matches[i].rm_so = -1; Matches[i].rm_eo = -1; } if (*pattern == '\0') { if (*data == '\0') { if ((extractpattern == NULL) || (*extractpattern == '\0')) { return(TRUE); /* I guess we can allow "" to match "" */ } } debugf("Invalid empty pattern argument\n"); return(FALSE); } regcomp(r, pattern, REG_ICASE); i = (regexec(r, data, MAX_MATCHES, Matches, 0 /* Needs case equiv here */) == 0); if (i && extractpattern && result) { i = myregsub(r, data, Matches, extractpattern, result); } return(i); } /* END: file sscanr.c */ /* BEGIN: received.c */ #define MAX_PATTERN 128 /* Crude first hack at extracting info from Received: lines */ static char *RecRules[] = { "(from \\(.*\\)) by \\(.*\\) id \\(.*\\) for \\(.*\\);\\(.*\\)", "\\1", "\\2", "", "\\3", "\\4", "\\5", "(from \\(.*\\)) by \\(.*\\) id \\(.*\\);\\(.*\\)", "\\1", "\\2", "", "\\3", "", "\\4", "from \\(.*\\) by \\(.*\\) with \\(.*\\) id \\(.*\\) for \\(.*\\);\\(.*\\)", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "from \\(.*\\) by \\(.*\\) with \\(.*\\) id \\(.*\\);\\(.*\\)", "\\1", "\\2", "\\3", "\\4", "", "\\5", "from \\(.*\\) by \\(.*\\) via \\(.*\\) id \\(.*\\);\\(.*\\)", "\\1", "\\2", "\\3", "\\4", "", "\\5", "from \\(.*\\) by \\(.*\\) id \\(.*\\) for \\(.*\\);\\(.*\\)", "\\1", "\\2", "", "\\3", "\\4", "\\5", "from \\(.*\\) by \\(.*\\) id \\(.*\\);\\(.*\\)", "\\1", "\\2", "", "\\3", "", "\\4", "from \\(.*\\) by \\(.*\\) with \\(.*\\);\\(.*\\)", "\\1", "\\2", "\\3", "", "", "\\4", "from \\(.*\\) by \\(.*\\) via \\(.*\\);\\(.*\\)", "\\1", "\\2", "\\3", "", "", "\\4", "from \\(.*\\) by \\(.*\\) with \\(.*\\) id \\(.*\\)", "\\1", "\\2", "\\3", "\\4", "", "", "by \\(.*\\) via \\(.*\\) from \\(.*\\);\\(.*\\)", "\\3", "\\1", "\\2", "", "", "\\3", "by \\(.*\\) from \\(.*\\);\\(.*\\)", "\\2", "\\1", "", "", "", "\\3", "by \\(.*\\) with \\(.*\\) id \\(.*\\);\\(.*\\)", "", "\\1", "\\2", "\\3", "", "\\4", "by \\(.*\\) via \\(.*\\);\\(.*\\)", "\\2", "\\1", "", "", "", "\\3", "by \\(.*\\) id \\(.*\\);\\(.*\\)", "", "\\1", "", "\\2", "", "\\3", "from \\(\".*\" \\- .*\\)", "", "", "\\1", "", "", "", "from \\-\\-\\- \\(.*\\) \\-\\-\\-", "", "", "\\1", "", "", "", NULL, NULL, NULL, NULL, NULL, NULL, NULL /* from by with id for date */ }; static void trim(char *s) { char *from, *to; int c; from = s; to = s; while (*from == ' ') from += 1; for (;;) { c = *from++; if (c == '\0') break; if (c == '\t') c = ' '; if (c == '\n') c = ' '; if ((c == ' ') && (*(to-1) == ' ')) { /* Fold multiple spaces */ } else { *to++ = c; } } *to = '\0'; /* trim trailing spaces */ for (;;) { if (to == from) break; to -= 1; if (*to != ' ') break; *to = '\0'; } } int checkhelloip(char *claimed, char *actual) { if ((sscanr("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", claimed, NULL, NULL)) && (sscanr("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", actual, NULL, NULL))) { trim(claimed); trim(actual); if (strcmp(claimed, actual) != 0) { debugf("Score: HELO '%s' from site '%s' -> 1\n", claimed, actual); return(1); } } return(0); } int checkhello(char *claimed, char *actual) { char *cldot, *acdot; char *cl, *ac; trim(claimed); trim(actual); if ((sscanr("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", claimed, NULL, NULL)) || (sscanr("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", actual, NULL, NULL))) { /* Oops. One of them was an IP and should have been a name. */ return(0); } if ((cldot = strrchr(claimed, '.')) == NULL) return(0); if ((acdot = strrchr(actual, '.')) == NULL) return(0); if (strncasecmp(claimed, "localhost", 9) == 0) return(0); if (strncasecmp(actual, "localhost", 9) == 0) return(0); *cldot = '\0'; *acdot = '\0'; cl = strrchr(claimed, '.'); ac = strrchr(actual, '.'); if (cl == NULL) cl = claimed; else cl += 1; if (ac == NULL) ac = actual; else ac += 1; *cldot = '.'; *acdot = '.'; trim(cl); trim(ac); /* Easier than finding the error in the regexps :-( */ if (strcasecmp(cl, ac) != 0) { trim(claimed); trim(actual); debugf("Score: HELO '%s' from site '%s' -> 1\n", claimed, actual); return(1); } } int parsereceived(char *line, char *ResultFrom, char *ResultBy, char *ResultWith, char *ResultId, char *ResultFor, char *ResultDate, int maxline) { int score = 0; char *rule; static char ALL[MAX_LINE]; static char XWITH[MAX_LINE]; static char XID[MAX_LINE]; static char XBY[MAX_LINE]; static char XFOR[MAX_LINE]; static char XDATE[MAX_LINE]; static char XFROM[MAX_LINE]; char ResultVersion[MAX_PATTERN]; char RecClaimed[MAX_PATTERN]; char RecClaimedIP[MAX_PATTERN]; char RecActual[MAX_PATTERN]; char RecIdent[MAX_PATTERN]; char RecIP[MAX_PATTERN]; char pattern[MAX_PATTERN]; char **stringsp; char *s; FILE *Tests; char *xfrom, *xby, *xwith, *xid, *xfor, *xdate; *ResultFrom = '\0'; *ResultBy = '\0'; *ResultWith = '\0'; *ResultId = '\0'; *ResultFor = '\0'; *ResultDate = '\0'; stringsp = RecRules; for (;;) { if (*stringsp == NULL) break; rule = *stringsp++; xfrom = *stringsp++; xby = *stringsp++; xwith = *stringsp++; xid = *stringsp++; xfor = *stringsp++; xdate = *stringsp++; sprintf(pattern, "^[ ]*%s[ ]*$", rule); if (sscanr(pattern, line, "\\0", ALL)) { if ( sscanr(pattern, line, xfrom, XFROM) && sscanr(pattern, line, xby, XBY) && sscanr(pattern, line, xwith, XWITH) && sscanr(pattern, line, xid, XID) && sscanr(pattern, line, xfor, XFOR) && sscanr(pattern, line, xdate, XDATE) ) { strncpy(ResultFrom, XFROM, maxline); strncpy(ResultBy, XBY, maxline); strncpy(ResultWith, XWITH, maxline); strncpy(ResultId, XID, maxline); strncpy(ResultFor, XFOR, maxline); strncpy(ResultDate, XDATE, maxline); if (sscanr("\\(.*\\) (\\(.*\\) \\[\\(.*\\)\\])", XWITH, "\\1", ResultWith)) { (void)sscanr("\\(.*\\) (\\(.*\\) \\[\\(.*\\)\\])", XWITH, "\\2", RecActual); (void)sscanr("\\(.*\\) (\\(.*\\) \\[\\(.*\\)\\])", XWITH, "\\3", RecIP); /*addhead("SPAM-Rec-From-ReverseDNS:", RecActual);*/ /*addhead("SPAM-Rec-From-IP:", RecIP);*/ score += checkhello(RecClaimed, RecActual); score += checkhelloip(RecClaimedIP, RecIP); } if (sscanr("\\(.*\\) (\\(.*\\)\\[\\(.*\\)\\])", XFROM, "\\1", RecClaimed)) { (void)sscanr("\\(.*\\) (\\(.*\\)\\[\\(.*\\)\\])", XFROM, "\\2", RecActual); (void)sscanr("\\(.*\\) (\\(.*\\)\\[\\(.*\\)\\])", XFROM, "\\3", RecIP); if (sscanr("\\[\\(.*\\)\\]", RecClaimed, "\\1", RecClaimedIP)) { /* HELO [1.2.3.4] ??? */ *RecClaimed = '\0'; } else *RecClaimedIP = '\0'; /*addhead("SPAM-Rec-From-HELO:", RecClaimed);*/ if (sscanr("\\(.*\\)@\\(.*\\)", RecActual, "\\1", RecIdent)) { (void)sscanr("\\(.*\\)@\\(.*\\)", RecActual, "\\2", RecActual); /*addhead("SPAM-Rec-From-Ident:", RecIdent);*/ /*addhead("SPAM-Rec-From-ReverseDNS:", RecActual);*/ } else { /*addhead("SPAM-Rec-From-ReverseDNS:", RecActual);*/ } /*addhead("SPAM-Rec-From-IP:", RecIP);*/ score += checkhello(RecClaimed, RecActual); score += checkhelloip(RecClaimedIP, RecIP); } if (sscanr("\\(.*\\) (\\(.*\\))", XBY, "\\1", ResultBy)) { if (sscanr("\\(.*\\) (\\(.*\\))", XBY, "\\2", ResultVersion)) { /*addhead("SPAM-Rec-By:", ResultBy);*/ /*addhead("SPAM-Rec-Version:", ResultVersion);*/ } } debugf("parse_received #1: %d\n", score); return(score); /* Accept first rule that matches */ } } } score += 1; debugf("Score: couldn't parse Received: %s -> 1\n", line); debugf("parse_received #2: %d\n", score); return(score); /* Can't believe *no* matches? - that's what the final ".*" is for! */ } int process_received(char *s) { int score = 0; static char line[MAX_LINE], emailaddr[MAX_LINE], label[MAX_LINE]; static char User[MAX_LINE], Mail[MAX_LINE]; static char RECFROM[MAX_LINE], RECBY[MAX_LINE]; static char RECWITH[MAX_LINE], RECFOR[MAX_LINE]; static char RECID[MAX_LINE], RECDATE[MAX_LINE]; char *from, *to; int c; /* Transfer s to emailaddr, trimming tabs/nl -> space */ strcpy(emailaddr, s); trim(emailaddr); debugf("Tidied Received: %s\n", emailaddr); debugf("process_received before: %d\n", score); score += parsereceived(emailaddr, RECFROM, RECBY, RECWITH, RECID, RECFOR, RECDATE, MAX_LINE); debugf("process_received after: %d\n", score); return(score); } /* END: received.c */ /* BEGIN: file from2fqd.c */ /* IMPORTANT NOTES: This function is lifted wholesale out of another package. For the purposes of looking up a name in a pgp keyring, I don't need to work out a proper pgp path. In fact all I really want is host!user because even if the persons pgp key says host_x!host!user, the host!user part is enough to disambiguate them. However, the local host name is quite important - so I'm hacking this from the way it was in order to pick up the *same* localhost that the mail program uses (in send.c I think) */ /* TODO: local mail comes in as 'user@marisa' - need to add our domain */ /* In fact, sending something to plain user@site will dump core :-( */ /* BUG: I had assumed that in user%site, site would always already be an fqdn. Then I got mail today from bill%solaria@hac2arpa.hac.com May need a whole new algorithm: pling!user%percent@at -> at!pling!rev(percent. '%')!user */ #define MAILHOST "" #define LOCALSITE "" /*########################################################################## This file exports "char *from2fqdn(char *From_line)" which takes a whole line extracted from a Unix mail file (starting "From ") and mangles it to get a reliable email address. This is useful in mail programs and vacation programs etc to provide an auto-reply. (Of course you also have the choice of From:, Reply-To:, Return-Receipt-To:, Sender:, Errors-To:, X400-Originator:, Originator:, Really-From; and Original-, Resent-, and X-. Standards - I love 'em - there's so many to chose from.) It makes the assumption that site!site bang paths have to stay relative to some given fqdn, but that site.uucp is a 'real' registered uucp site that any uucp hub with maps will know about. I've tested this on several years worth of mail files. However I can only run so many mailers myself, and your mailer has no doubt yet another way of mangling headers beyond recognition, so mail me a line with any headers that break this. Preferably uuencoded just in case some gateway wants to piss around inside the mail text... Every time I post something like this to the net I get mail from perl fanatics who tell me some genius has written the same thing in 12 lines of perl. Well, I don't give a damn this time. If you have, keep it to yourself. I enjoyed hacking this and it's useful for the jobs I want it for. I also happen to think it's pretty robust, so look on that as a challenge to find a genuine header that breaks it - I'd appreciate the input. (c) Graham Toal 1993 As usual with all my code I copyright it *only* to stop someone else depriving me of access to my own code. However I place no restrictions on its use; it may be used academically, privately, or commercially. You're welcome to make money out of it if you can (ha ha). If a derivative work is placed under a more restrictive license (such as the GPL) then that license applies only to those derived works; the original code remains free and can be obtained from me directly. Include this notice in such derived works. ##########################################################################*/ /* Should get these two out of environment if possible */ /* static */ char mailhost[128]/* = { '\0' }*/; /* static */ char localhost[128]/* = { '\0' }*/; /* Forward declarations */ static void reverse_ch(char *s, int ch); /** Utility routines to look for things in strings **/ /* returns TRUE if char pointed at is an fqdn (possibly followed by %, " etc */ static int isfqdn(char *s) { int c; int dotseen = FALSE; for (;;) { c = *s++; if (c == '\0') break; if (c == '.') dotseen = TRUE; if (c == '@' || c == '%' || c == '!' || c == ':' || c == ',') break; } return(dotseen); } /* returns first char of a fqdn or NULL if no fqdn found */ static char *findleftfqdn(char *start, char *s) { /* s can be the first char of user in !path!path!user or the last '!' in !path!path! or the last char in path in !path!path!path but it will *not* be a '\0' at the end of a string */ char *f; if (s == NULL) return(NULL); f = s+strlen(s)-1; for (;;) { if (s == start) return(NULL); if (*s == '.') break; s -= 1; } /* s now in middle of an fqdn */ f = s; for (;;) { if (f == start) return(start); if (*f == '!' || *f == '%' || *f == ':' || *f == ',' || *f == '@') return(f+1); f -= 1; } } /* result is exclusive, ie NUL or '@' etc after fqdn */ static char *endoffqdn(char *s) { /* s is the first char of an fqdn or some point in the string before it */ char *p; p = strchr(s, '.'); if (p == NULL) return(p); for (;;) { if (*p == '\0' || *p == '%' || *p == '@' || *p == '!' || *p == ':' || *p == ',') { return(p); } p += 1; } } /* result points to first char in username */ static char *startofusername(char *start, char *endofuser) { /* endofuser is inclusive, ie last char in path!path!user Need special for !site!site!site!user because user can contain dots - hence we have to avoid confusing it with an fqdn */ char *s; s = endofuser; for (;;) { if (s == start) return(s); if (*s == '!' || *s == '%' || *s == ':' || *s == ',') return(s+1); s -= 1; } } /* Returns more-or-less an fqdn, minus a little post-processing to be done */ static int canonfrom(char *name, char *cname) { /* Main workhorse, accepts the space-separated tag immediately after 'From ' (has already been faked to add an extra site if 'From ' line had a 'remote from ' appended to it */ char *p, *ep; char *banguser, *fqdn, *user; /* The interesting cases: handle these and the rest fall into place... leaf1!leaf2!fqdn1!leaf3!leaf4!user%fqdn2%fqdn%fqdn@fqdn -> leaf3!leaf4!user@fqdn1 leaf1!leaf2!user%fqdn1%fqdn@fqdn -> leaf1!leaf2!user@fqdn1 */ fqdn = strchr(name, '%'); if (fqdn != NULL) { user = fqdn; /* set 'user' to user part in uucp!uucp!uucp!user%site%site@blah... */ for (;;) { if (user == name) break; if (*user == '!') { user += 1; break; } user -= 1; } fqdn += 1; if (isfqdn(fqdn)) { /* blah!blah!user%site.fqdn@anywhere */ /* I *think* we poke a \0 into the @ and replace the % with an @ */ /* but not sure if done here or elsewhere */ char *ep = endoffqdn(fqdn); *ep = '\0'; fqdn -= 1; *fqdn = '@'; fqdn = NULL; name = user; } else { /* inch forward to first *real* %fqdn, or @ */ /* in order to replace uu1!uu2!user%site1%site2%site3%fqdn1@fqdn2 | | | +------ or alternative 'ep' | | +------ 'ep' | +------ 'fqdn' +------ 'user' with uu1!uu2!site3!site2!site1!user@fqdn */ char *ep; int ch; ep = fqdn; for (;;) { if (*ep == '\0') { /* Should never happen - uucp!uucp!user%site with no '@' */ break; } if (*ep == '@') { ch = *ep; *ep = '\0'; reverse_ch(user, '%'); *ep = ch; break; } if (*ep == '%') { /* If its an fqdn we're done, otherwise skip it */ if (isfqdn(ep+1)) { ep = endoffqdn(ep+1); *ep = '\0'; reverse_ch(user, '%'); break; } } ep += 1; } } while ((fqdn = strchr(name, '%')) != NULL) *fqdn = '!'; fqdn = NULL; /* having sneakily faked all %'s into !'s, pass the buck */ } if (fqdn == NULL) fqdn = strchr(name, '@'); if (fqdn == NULL) { if ((ep = strrchr(name, '!')) == NULL) { sprintf(cname, "%s@%s", name, localhost); return(TRUE); } /* Must be bang path! */ /* site!user site!site!site!user fqdn!site!site!user fqdn!user */ fqdn = findleftfqdn(name, ep); if (fqdn == NULL) { /* pure uucp path or local user */ sprintf(cname, "%s@%s", name, mailhost); return(TRUE); } banguser = endoffqdn(fqdn); *banguser++ = '\0'; sprintf(cname, "%s@%s", banguser, fqdn); return(TRUE); } banguser = findleftfqdn(name, startofusername(name, fqdn-1)); *fqdn++ = '\0'; if (banguser == NULL) { banguser = name; ep = endoffqdn(fqdn); if (ep != NULL) *ep = '\0'; } else { ep = endoffqdn(banguser); if (ep != NULL) { fqdn = banguser; *ep++ = '\0'; banguser = ep; } } sprintf(cname, "%s@%s", banguser, fqdn); return(TRUE); /* Note there is no longer a 'return(FALSE)' since I think I now handle all cases. */ } /* Simple reverse all chars between start (inclusive) and end (exclusive) */ static void reverse(char *start, char *end) { int c; end -= 1; for (;;) { if (start == end) break; c = *start; *start = *end; *end = c; end -= 1; if (start == end) break; start += 1; } } /* Reverse the dot-separated components of an fqdn - a hack for the braindead JANET network. Rather sneaky code :-) (If you don't know this algorithm, you may have to draw the data out on paper to follow it) */ static void reverse_janet(char *s) { reverse_ch(s, '.'); } static void reverse_ch(char *s, int ch) { char *cp; reverse(s, s+strlen(s)); for (;;) { cp = strchr(s, ch); if (cp == NULL) break; reverse(s, cp); s = cp+1; } reverse(s, s+strlen(s)); } /** Main external entry-point **/ /* Returns user@fqdn or site!site!site!user@fqdn */ /* Result is in temporary workspace and must be copied immediately if this function is to be called again before it is used, otherwise it will be overwritten */ char *from2fqdn(char *origline) { /* Input is complete 'From ...' line as extracted from mail file */ static char raw[1024]; static char xxline[1024]; char *line = xxline; static char full[1024]; char *p, *fqdn; /*int deb = 0;*/ /*fprintf(stderr, "p%d\n", ++deb);*/ strcpy(line, origline); /* Should get these from: 1) command line if given overrides other values 2) SNEWS/KA9Q's environment files if running on DOS 3) gethostname() or whatever it's called function if running on Unix However for the moment, just remember to change the #defines at the top... */ if (*mailhost == '\0') { strcpy(mailhost, MAILHOST); } if (*localhost == '\0') { strcpy(localhost, LOCALSITE); } /* After spending the evening writing this (and bits of it started off as another program) I'm too zonked to tidy up the formatting properly */ if (p = strchr(line, '\n')) *p = '\0'; /* Oops - had index here before! */ if (p = strchr(line, '\r')) *p = '\0'; /* In case DOS "rb" mode files */ #ifdef DEBUG_FQDN fprintf(stdout, "INPUT: %s -> ", line); #endif /* 'remote from' needs special handling */ if (strncmp(line, "From ", 5) == 0) line += 5; else if (strncmp(line, "FROM:", 5) == 0) line += 5; for (p = line; *p && *p != ' '; ++p); if ((fqdn = strstr(p, "remote from ")) != NULL) { *p = '\0'; fqdn += strlen("remote from "); while (*fqdn == ' ' || *fqdn == '\t') fqdn += 1; p = strchr(fqdn, ' '); if (p != NULL) *p = '\0'; if (strchr(line, '!') != NULL) { /* From site!user%blah%blah@blah remote from site */ sprintf(full, "%s!%s", fqdn, line); } else if (strchr(line, '%') != NULL) { /* From user%site%site@fqdn remote from blah */ /* Only need to handle specially if none of the sites are fqdns */ sprintf(full, "%s", line); } else if (strchr(line, '@') != NULL) { /* From user@site remote from blah */ /* Only need to handle specially if site is not fqdn but blah is */ sprintf(full, "%s", line); } else { /* From user remote from site */ sprintf(full, "%s!%s", fqdn, line); } } else { *p = '\0'; (void)strcpy(full, line); } /* Nasty MMDF header */ if (*full == '@') { p = strrchr(line, ':'); /* I *think* you can get @fqdn,@fqdn,user@site but I've never seen it */ if (p != NULL) { strcpy(full, p+1); } else { p = strrchr(line, ','); if (p != NULL) strcpy(full, p+1); } } strcpy(line, full); p = line; strcpy(raw, line); if (canonfrom(p, raw)) { /* lowercase fqdn */ p = strchr(raw, '@')+1; for (;;) { if (*p == '\0') break; if (isalpha(*p) && isupper(*p)) *p = tolower(*p); p += 1; } /* if came from faulty janet site, bodge a repair */ fqdn = strchr(raw, '@')+1; if (strncmp(fqdn, "uk.ac.", 6) == 0) reverse_janet(fqdn); /* if routed to a uucp site, send to smtp->uucp gateway */ /* (pseudo-domain .uucp sites are assumed to be known globally; ...!site!site!site!user sites aren't) */ p = strchr(fqdn, '.'); if (p == NULL) { /* ADD LOCAL DOMAIN HERE! (TODO) */ } else if (strcmp(p, ".uucp") == 0) { *p = '\0'; *(fqdn-1) = '\0'; sprintf(line, "%s!%s@%s", fqdn, raw, mailhost); strcpy(raw, line); } #ifdef DEBUG_FQDN fprintf(stdout, "%s\n", raw); #endif } else { fprintf(stdout, "from2fqdn: Not a From ...' line - %s\n", line); strcpy(raw, "root@localhost"); } return(raw); } int resolve_fqdn(char *line, char *user, char *addr) { char *start = strchr(line, '"'); char *end = strrchr(line, '"'); char *estart = strrchr(line, '<'); char *eend = strrchr(line, '>'); if (start == NULL || end == NULL || estart == NULL || eend == NULL) { fprintf(stderr, "Bad fqdn: %s\n", line); return(FALSE); } /* Adjust to be inclusive */ start += 1; end -= 1; estart += 1; eend -= 1; memmove(user, start, end-start); user[end-start] = '\0'; fprintf(stderr, "user: %s\n", user); memmove(addr, estart, eend-estart); addr[eend-estart] = '\0'; fprintf(stderr, "addr: %s\n", addr); return(TRUE); } /* END: file from2fqd.c */ void init_quick_maps(void) { static int initialised = 0; if (initialised == 2) return; if (initialised == 1) { /* BUG. Re-entrant or failed */ debugf("Bug in init_quick_maps()\n"); return; } initialised = 1; /* I'd rather pick this up from the file location but don't know how to open a map from a filename in sendmail */ DOMAINS = stab("SpamDomains", ST_MAP, ST_FIND); if (DOMAINS == (STAB *)NULL) { /* No map file, we just don't do fast hash tests */ debugf("Warning: no KSpamDomains hash /etc/sendmail-config/scores/DOMAINS.db fin /etc/sendmail.cf\n"); } EMAILADDRS = stab("badusers", ST_MAP, ST_FIND); if (EMAILADDRS == (STAB *)NULL) { /* No map file, we just don't do fast hash tests */ debugf("Warning: no Kbadusers hash /etc/sendmail-config/scores/USERS.db fin /etc/sendmail.cf\n"); } USERS = stab("UserNames", ST_MAP, ST_FIND); if (USERS == (STAB *)NULL) { /* No map file, we just don't do fast hash tests */ debugf("Warning: no KUserNames hash /etc/sendmail-config/scores/USERS.db fin /etc/sendmail.cf\n"); } /* Add any other fast map test files here */ initialised = 2; return; } /* BEGIN: file scanfrom.c */ int check_email_addr(char *email) { int score = 0, thisscore = 0; /* Can safely call this several times */ init_quick_maps(); debugf("email addr map test:\n"); debugf("%s\n", email); if (inmap(EMAILADDRS, email, &thisscore)) { debugf("Score: %s in EMAILADDRS -> %d\n", email, thisscore); score += thisscore; } return(score); } int check_personal_name(char *s) { return(spamscore("Fullname", s)); } int check_username(char *email) { int score = 0, newscore = 0, thisscore = 0; /* Can safely call this several times */ init_quick_maps(); debugf("check_username:\n"); debugf("%s\n", email); if (strchr(email, '$') != NULL) { debugf("Score: $ in %s -> 3\n", email); score += 3; } if (strchr(email, '!') != NULL) { debugf("Score: ! in %s -> 3\n", email); score += 3; } if (strchr(email, '&') != NULL) { debugf("Score: & in %s -> 1\n", email); score += 1; /* often couples in sex spams */ } debugf("username map test:\n"); debugf("%s\n", email); if (inmap(USERS, email, &thisscore)) { debugf("Score: %s in USERS -> %d\n", email, thisscore); score += thisscore; } /* Now add scores for partial matches from USERNAME file */ /* USERNAME file takes precedence over USERS file if larger or negative */ newscore = spamscore("USERNAME", email); if (newscore != 0) debugf("Score: %s in USERNAME -> %d\n", email, newscore); if ((newscore > score) || (newscore < 0)) { debugf("Score: using %d rather than %d\n", newscore, score); score = newscore; } return(score); } int bad_address(char *s) { /* Bad address, could be spam or badly configured software */ debugf("Score: bad address '%s' -> 1\n", s); return(1); } #define MAX_PATTERN 128 static char *Rules[] = { "via[ ][ ]*the[ ][ ]*vacation[ ][ ]*program[ ][ ]*<\\(\\(.*\\)@.*\\)>", "\\2", "\\1", "\"[ ]*(\\(.*\\)[ ][ ]*\\-[ ][ ]*.*[ ][ ]*\\-[ ][ ]*.*)\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*(\\(.*\\)[ ][ ]*\\-[ ][ ]*.*)\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*(\\(.*\\))\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)\"@.*,[ ][ ]*\".*\".*[ ][ ]*<\\(.*\\)>\"@.*\"@.*", "\\1", "\\2", "\"[ ]*\\(.*\\)[ ][ ]*\\|[ ][ ]*.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*[ ][ ]*\\)(.*)[ ][ ]*\\(.*\\)\"[ ][ ]*<\\(.*\\)>", "\\1\\2", "\\3", "\"[ ]*\\(.*\\)[ ][ ]*[0-9][0-9][0-9].*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*by[ ][ ]*way[ ][ ]*of[ ][ ]*\\(.*\\)[ ][ ]*(\\(.*\\))\"[ ][ ]*<.*>", "\\2", "\\1", "\"[ ]*\\(.*\\)[ ][ ]*(.*)\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)[ ][ ]*\\[.*\\]\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)[ ][ ]*aka[ ][ ]*.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)\"[ ][ ]*(.*)[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)\"@.*,[ ][ ]*\".*[ ][ ]*<\".*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)'s[ ][ ]*answering[ ][ ]*.*\"@\\(.*\\)", "\\1", "\\1@\\2", "\"[ ]*\\(.*\\)'s[ ][ ]*electro.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\),[ ]*.*,[ ]*.*,[ ]*.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\),[ ]*.*,[ ]*.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\),[ ]*.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*.*(.*\"[ ][ ]*<\\(\\(.*\\)@.*\\)>,[ ][ ]*\".*)\"@.*", "\\2", "\\1", "\"[ ]*\\(.*\\)[ ][ ]*(.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)[ ][ ]*on[ ][ ]*.*\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\"[ ]*\\(.*\\)\"[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*\\-[ ][ ]*.*[ ][ ]*\\-[ ][ ]*.*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*\\-[ ][ ]*.*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*'.*'[ ][ ]*\\(.*\\)[ ][ ]*<\\(.*\\)>", "\\1 \\2", "\\3", "\\(.*\\)[ ][ ]*\\-\\-[ ][ ]*.*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*[0-9]\\-.*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*(\\(.*\\)[ ][ ]*\\-\\-[ ][ ]*.*)", "\\2", "\\1", "\\(.*\\)[ ][ ]*(\\(.*\\)[ ][ ]*(.*))", "\\2", "\\1", "\\(.*\\)[ ][ ]*(.*)[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*from[ ][ ]*.*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*x[0-9][0-9\\-].*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)'s[ ][ ]*vac.*[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*\\)[ ][ ]*<\\(.*\\)>", "\\1", "\\2", "\\(.*/S=\\(.*\\)/.*@.*\\)", "\\2", "\\1", "\\(.*@.*\\)[ ][ ]*(\\(.*\\)[ ][ ]*aka[ ][ ]*.*)", "\\2", "\\1", "\\(.*@.*\\)[ ][ ]*(\"\\(.*\\)\")", "\\2", "\\1", "\\(.*@.*\\)[ ][ ]*(\\(.*[ ][ ]*.*\\)[ ][ ]*[A-Z][A-Z][A-Z][0-9])", "\\2", "\\1", "\\(.*@.*\\)[ ][ ]*(\\(.*\\)[ ][ ]*X[0-9][0-9].*)", "\\2", "\\1", "\\(.*@f[0-9]*\\.n[0-9]*\\.z[0-9]*\\).*[ ][ ]*(\\(.*\\))", "\\2", "\\1.fidonet.org", "\\(.*@.*\\)[ ][ ]*(\\(.*\\))", "\\2", "\\1", "<\\(\\(.*\\)@.*\\)>", "\\2", "\\1", "\\(.*\\)<\\(.*\\)>", "\\1", "\\2", "\\(\\(.*\\)@.*\\)", "\\2", "\\1", "\\(.*\\) (\\(.*\\))", "\\2", "\\1", "\\(.*\\)", "\\1", "\\1", NULL, NULL, NULL }; extern char *from2fqdn(char *email); int addr2std(char *line, char *ResultUser, char *ResultEmail, int maxline) { char *rule; static char ALL[MAX_LINE], USERNAME[MAX_LINE], EMAIL[MAX_LINE]; char pattern[MAX_PATTERN]; char **stringsp; char *s; FILE *Tests; char *Username, *Email, *fqdn; *ResultUser = '\0'; *ResultEmail = '\0'; stringsp = Rules; for (;;) { if (*stringsp == NULL) break; rule = *stringsp++; Username = *stringsp++; Email = *stringsp++; sprintf(pattern, "^[ ]*%s[ ]*$", rule); if (sscanr(pattern, line, "\\0", ALL)) { if (sscanr(pattern, line, Username, USERNAME) && sscanr(pattern, line, Email, EMAIL) ) { if (strchr(USERNAME, ' ') == NULL) { /* Probably just an email address - couldn't find username */ /* so fix stuff like user%site and uucp!user */ if ((s = strchr(USERNAME, '%')) != NULL) *s = '\0'; s = strrchr(USERNAME, '!'); if ((s != NULL) && (strlen(s) > 1)) { /* overlapping strcpy */ s += 1; memmove(USERNAME, s, strlen(s)+1); } } while ((s = strchr(USERNAME, '_')) != NULL) *s = ' '; while ((s = strchr(USERNAME, '"')) != NULL) *s = '\''; fqdn = from2fqdn(EMAIL); if (fqdn == NULL) fqdn = EMAIL; strncpy(ResultUser, USERNAME, maxline); strncpy(ResultEmail, fqdn, maxline); return(TRUE); /* Accept first rule that matches */ } } } return(FALSE); /* Can't believe *no* matches? - that's what the final ".*" is for! */ } int isaddress(char *label) { if (strcasecmp(label, "From") == 0) return(TRUE); if (strcasecmp(label, "To") == 0) return(TRUE); if (strcasecmp(label, "Cc") == 0) return(TRUE); if (strcasecmp(label, "Reply-To") == 0) return(TRUE); if (strcasecmp(label, "Errors-To") == 0) return(TRUE); return(FALSE); } int isfromaddress(char *label) { if (strcasecmp(label, "From") == 0) return(TRUE); if (strcasecmp(label, "Reply-To") == 0) return(TRUE); if (strcasecmp(label, "Errors-To") == 0) return(TRUE); return(FALSE); } int istoaddress(char *label) { if (strcasecmp(label, "To") == 0) return(TRUE); if (strcasecmp(label, "Cc") == 0) return(TRUE); return(FALSE); } int emailaddrscore(char *Label, char *addr) { static char line[MAX_LINE], emailaddr[MAX_LINE], label[MAX_LINE]; static char User[MAX_LINE], Mail[MAX_LINE]; char *s; int score = 0; debugf("EmailAddrScore: '%s' '%s'\n", Label, addr); strcpy(line, addr); strcpy(emailaddr, addr); if (strcmp(Label, "FROM") == 0) { char *Host, *Mail; Mail = from2fqdn(line); debugf("from2fqdn: '%s' -> '%s'\n", line, Mail); score += check_email_addr(Mail); Host = strchr(Mail, '@'); if (Host == NULL) Host="bad.site"; else *Host++ = '\0'; score += check_domain(Host); score += check_username(Mail); debugf("Checked From line.\n"); } else { char *p, *addr; int quoted = FALSE; int mangled = FALSE; p = addr = emailaddr; for (;;) { if (*p == '"') { quoted = !quoted; } else if ((*p == '\0') || ((!quoted) && (*p == ','))) { int ch = *p; *p = '\0'; if ((ch == ',') && (p[1] == '\0')) { /* Oops - a trailing comma - let's just fix it... */ ch = '\0'; } if (addr2std(addr, User, Mail, MAX_LINE)) { char *Host; if (strchr(Mail, '"') != NULL) { mangled = TRUE; } else { debugf("Addr2std: '%s' -> '%s' '%s'\n", addr, User, Mail); score += check_email_addr(Mail); Host = strchr(Mail, '@'); if (Host == NULL) Host="bad.site"; else *Host++ = '\0'; score += check_domain(Host); score += check_username(Mail); if (strcmp(Label, "From") == 0) { if (strcmp(User, Mail) == 0) { debugf("Score: no personal name in From: -> 1\n"); } else { score += check_personal_name(User); } } else { /* Don't expect a name in Errors-To: or Reply-To: */ score += check_personal_name(User); } debugf("Done addr2std\n"); } } else { score += bad_address(addr); return(score); } if (ch == '\0') break; *p = ch; p += 1; addr = p; p = addr-1; } p += 1; } if (mangled) { /* feed in the whole line this time */ if (addr2std(emailaddr, User, Mail, MAX_LINE)) { if (strchr(Mail, '"') != NULL) { /* No improvement - leave as is */ score += bad_address(emailaddr); return(score); } else { debugf("Last resort: check '%s' and '%s'\n", User, Mail); score += check_personal_name(User); score += check_email_addr(Mail); } } } } return(score); } /* END: file scanfrom.c */ #define SENDMAIL_CONFIG_DIR "/etc/sendmail-config" #define SENDMAIL_SCORES_DIR "/etc/sendmail-config/scores" #define SENDMAIL_BLOCK_FLAG "blocking-YES" /* This checkcompat() function is based on the file http://www.informatik.uni-kiel.de/~ca/email/checkcompat.txt from kyle_jones@wonderworks.com (Kyle Jones) Message-ID: <57dk9c$k8o@crystal.WonderWorks.COM> I (gtoal@gtoal.com) have modified it to allow spam detection by looking in the headers and body for typical signs of spam; and for sites on a site-wide block list. My wife Anne suggested that the spam filtering can be selectively enabled by testing for the presence of a ".spamfilter" file in the home directory of each user. I've implemented it, although the code to find the .spamfilter file is a bit hacky - I need a sendmail guru to show me how to do it properly. Eventually I would like the .spamfilter file to contain the user's customizations that can override the system defaults. Originally this code used a mailer class for the block list; I changed it to use an external database. You can get the original behavior by compiling -DUSE_CLASS. The class code blocked everything by default if not properly initialised; the map code leaves everything open by default if its files are missing. It can either bounce the mail directly or tag it with a header that allows the users to filter it off into a spam mailbox or delete it themselves if site-wide deletion is not acceptable. Perhaps we also need a way to drop the mail silently on the floor, while saving *ONE* copy for the records. Could do this with a hash of the mail body; storing the saved mail under the hash. (See sendmail docs for dropenvelope) Either way, we need to keep a copy of bounced mail for debugging purposes. In fact, it would be even more useful to keep a copy of mail we *would* bounce but haven't because bouncing is not enabled. Mail is only bounced if ${CONFIG_DIR}/${SENDMAIL_BLOCK_FLAG} exists. This test is made at the last minute just before the mail would be bounced, to allow us to independently filter mail for spam and to log third-party mail even if we're not blocking it. If the .db files are not present, it behaves like normal sendmail with no blocking; this allows it to be installed safely and invoked in an emergency. The code below could use my IP map file stuff that understands bitmasks; however it wouldn't fall as cleanly into the 'makemap' structure then, and I don't have time to convert the code into a proper BSD map object. And keeping it as a hash file is undoubtedly faster, even if it means we're restricted to blocking whole Class Cs or individual IPs. I guess I can preprocess if need be. Makes the list of IPs bigger but not any slower. */ /* Kyle wrote: Here's the checkcompat() function I promised. You can use it to stop others from using your site as a relay. To install it, replace the checkcompat() function in src/conf.c with it and recompile sendmail. These are the rules that it enforces. a. locally generated mail is allowed to go anywhere. b. mail from networks considered local is allowed to go anywhere. c. mail from non-local networks is allowed to go to local users and aliases. d. mail from non-local networks is allowed to go to domains for which the local host is a legitimate relay. [GT: It should work this out from DNS lookups! Cacheing would make it efficient. If DNS for requested domain is not available, bounce it with a 'try later' 4XX return code] Everything else is bounced. The message body is not returned. The class AuthorizedNetworks should contain the networks that are considered local in (b). The class RelayedDomains should contain the legitimately relayed domains in (d). Subdomains of these domains are also considered legitimate. Let me know of any bugs or enhancements and I'll try to keep the thing up-to-date. */ int illegal_header(char *s) { if ((s == NULL) || (*s == '\0')) return(TRUE); for (;;) { if (isalpha(*s)) { } else if (isdigit(*s)) { } else if (*s == '-') { } else return(TRUE); s += 1; if (*s == '\0') return(FALSE); } } char *stristr(const char *s, const char *substr) { int slen = strlen(s), sublen = strlen(substr), i; for (i = 0; i <= slen-sublen; i++) { if (strncasecmp(substr, &s[i], sublen) == 0) return((char *)&s[i]); } return(0); } int spamscore(char *header, char *data) { char pat[MAX_LINE]; FILE *patterns; char *fname; int defscore = 1; int score = 0; if (strcmp(header, "BODY") != 0) debugf("Header %s: %s\n", header, data); fname = malloc(strlen(SENDMAIL_SCORES_DIR)+strlen(header)+2); if (fname == NULL) return(0); if (illegal_header(header)) { debugf("Score: Illegal header %s -> 1\n", header); return(1); /* Might as well infer that something spammy is going on */ } /* First, special handling of certain fields */ /* From: "user name" */ if (isfromaddress(header)) { score += emailaddrscore(header, data); } else if (istoaddress(header)) { /* Don't want to check every address in a To or Cc, but if there's just one (eg to you@bogus.domain) we do want to give it the once-over */ if (strchr(data, ',') == NULL) score += emailaddrscore(header, data); } else if (strcasecmp(header, "Comments") == 0) { char Sender[MAX_LINE]; if (sscanr("Authenticated sender is <\\(.*\\)>", data, "\\1", Sender)) { score += emailaddrscore("Sender", Sender); } } else if (strcasecmp(header, "Received") == 0) { debugf("Special handling of received line... local score before = %d\n", score); score += process_received(data); debugf("local score after = %d\n", score); } else if (strcasecmp(header, "Subject") == 0) { if (uppercasehdr(data)) { debugf("Score: All uppercase header %s -> 2\n", data); score += 2; /* ALL CAPS HEADERS ARE SUSPICIOUS!!! */ } } /* else debugf("No special handling\n"); */ /* TO ADD: All uppercase in subject. -> +2 */ sprintf(fname, "%s/%s", SENDMAIL_SCORES_DIR, header); if (strcmp(header, "BODY") != 0) debugf("%s\n", fname); patterns = fopen(fname, "r"); free(fname); if (patterns != NULL) { /* Use the info in this file to test if this header is spam */ for (;;) { int early_exit = FALSE; int len; pat[MAX_LINE-1] = '\0'; if (fgets(pat, MAX_LINE-1, patterns) == NULL) break; len = strlen(pat); if ((len > 0) && (pat[len-1] == '\n')) pat[len-1] = '\0'; if (*pat == '\0') continue; if (pat[1] == ' ') { switch (*pat) { case '#': continue; case '*': defscore = (int)atol(pat+2); break; case '~': /* no more tests from the data in this file */ early_exit = TRUE; break; case '!': /* regexp */ /* TO BE WRITTEN */ { int rc; regex_t r; rc = regcomp(&r, pat+2, REG_NOSUB | REG_ICASE); /* Now case-insensitive */ if (rc != 0) break; rc = regexec(&r, data, 0, NULL, REG_NOTBOL|REG_NOTEOL); regfree(&r); if (rc == 0) { debugf("Score: %s in %s -> %d\n", pat+2, data, defscore); score += defscore; } } break; case '=': /* strstr */ if (strstr(data, pat+2) != NULL) { debugf("Score: %s in %s -> %d\n", data, pat+2, defscore); score += defscore; } /*score += ((strstr(data, pat+2) != NULL) ? defscore : 0);*/ break; /* stop at first match; use continue if more wanted */ case '?': /* stristr */ if (stristr(data, pat+2) != NULL) { debugf("Score: %s in %s -> %d\n", data, pat+2, defscore); score += defscore; } break; /* stop at first match; use continue if more wanted */ default: /* Error in config file */ continue; } if (early_exit) break; } } } if (patterns != NULL) fclose(patterns); patterns = NULL; return(score); } int extracthost(char *line, char *s) { char web[MAX_LINE]; char *end = s; for (;;) { if (isalnum(*end)) { } else if (*end == '-') { } else if (*end == '.') { } else { end -= 1; if (end == s) return(0); break; } end += 1; } /* inclusive pointers */ strncpy(web, s, end-s+1); web[end-s+1] = '\0'; debugf("Host:'%s'\n", web); return(check_domain(web)); } int extractemailaddr(char *line, char *s) { char *start, *end; char email[MAX_LINE]; if (start == line) return(0); start = s-1; for (;;) { if (isalnum(*start)) { } else if (*start == '-') { } else if (*start == '.') { } else if (*start == ':') { /* Mailto: */ start += 1; if (*start == '@') return(0); break; } else { start += 1; if (*start == '@') return(0); break; } if (start == line) break; start -= 1; } end = s+1; for (;;) { if (isalnum(*end)) { } else if (*end == '-') { } else if (*end == '.') { } else { end -= 1; if (*end == '@') return(0); break; } end += 1; } /* inclusive pointers */ strncpy(email, start, end-start+1); email[end-start+1] = '\0'; debugf("Email:'%s'\n", email); return(check_email_addr(email)); } int bodyline(char *line) { char *s = line; int score = 0; while ((s = strchr(s, '@')) != NULL) { score += extractemailaddr(line, s); s += 1; } s = strstr(line, "http://"); if (s != NULL) { score += extracthost(line, s+strlen("http://")); } return(score); } int bodyjunklinescore(char *line) { /* don't assign score to quoted body junk */ if ((*line == '>') || (*line == ':')) return(0); return(bodyline(line)+spamscore("BODY", line)); } /* isjunk returns TRUE if and only if the junk is guaranteed spam and can be deleted before it even gets to the users box, *and* the user has asked for spam to be deleted at source. Otherwise it just adds a 'Received:' header. Guaranteed spam is usually mail with a spamscore of 10000; we're petty damn sure its spam though when the score is 10 or up. Less than 10 should be checked by hand. It is not advisable to give scores >= 10000 for text in the body but it can be done by the user if they really insist. I was going to make it impossible for body junk to cause a bounce but I've changed my mind to allow the user to make that decision. I'm just afraid that sending a spam site list with > 10000 lines each worth 1 point will bounce! */ #include void mydebug_dumpheads(score, s, e, f) int score; char *s; ENVELOPE *e; FILE *f; { time_t tim; time(&tim); fprintf(f, "From %s %s", e->e_sender, ctime(&tim)); if ((e != NULL) && (e->e_header != NULL)) { HDR *h; char scorestr[16]; sprintf(scorestr, "%d", score); /* Header gets added to email as a side-effect of this call. Not the proper way to do it. In fact, I didn't intend to put the spamscore header in the email, but the test users liked it so much I've decided to leave it for now. */ /*addheader("X-Spamscore", scorestr, &e->e_header);*/ h = e->e_header; for (;;) { if (h == NULL) { break; } if ((h != NULL) && (h->h_field != NULL) && (h->h_value != NULL)) { fprintf(f, "%s: %s\n", h->h_field, h->h_value); } h = h->h_link; } } else { debugf("Unexpected empty envelope in mydebug_dumpheads\n"); /* Shouldn't happen */ } fprintf(f, "\n"); } void mydebug_dumpmail(score, s, e, to) int score; char *s; ENVELOPE *e; ADDRESS *to; { char *datafilename; FILE *datafile = NULL; char *mailname; FILE *mailcopy; int c; datafilename = queuename(e, 'd'); if (datafilename != NULL) datafile = fopen(datafilename, "r"); mailname = malloc(strlen("/var/log/-junk.mai") + strlen(s) + 1); sprintf(mailname, "/var/log/%s-junk.mai", s); mailcopy = fopen(mailname, "a"); if (mailcopy == NULL) { if (datafile != NULL) fclose(datafile); return; } mydebug_dumpheads(score, s, e, mailcopy); if (datafile != NULL) { for (;;) { c = fgetc(datafile); if (c == EOF) break; if (feof(datafile)) break; if (ferror(datafile)) break; if (fputc(c, mailcopy) == EOF) break; if (ferror(mailcopy)) break; } } if (c != '\n') fputc('\n', mailcopy); fputc('\n', mailcopy); fclose(datafile); fclose(mailcopy); } #define DELETE_ON_SIGHT 10000 #define PROBABLY_SPAM 10 int bodyjunkscore(e, to) ENVELOPE *e; ADDRESS *to; { FILE *datafile; char *datafilename; int score = 0; debugf("bodyjunkscore\n"); datafilename = queuename(e, 'd'); datafile = fopen(datafilename, "r"); if (datafile != NULL) { /* Handle body text */ static char s[MAX_LINE], *rc; debugf("body tests\n"); /* Need to read bodyrules file */ for (;;) { s[MAX_LINE-1] = '\0'; if ((rc = fgets(s, MAX_LINE-1, datafile)) == NULL) break; if (ferror(datafile)) break; score += bodyjunklinescore(s); } } return(score); } char *top_level[] = { "ad","ae","af","ag","ai","al","am","an","ao","aq","ar", "as","at","au","aw","az","ba","bb","bd","be","bf","bg", "bh","bi","bj","bm","bn","bo","br","bs","bt","bv","bw", "by","bz","ca","cc","cf","cg","ch","ci","ck","cl","cm", "cn","co","cr","cs","cu","cv","cx","cy","cz","de","dj", "dk","dm","do","dz","ec","ee","eg","eh","er","es","et", "fi","fj","fk","fm","fo","fr","fx","ga","gb","gd","ge", "gf","gh","gi","gl","gm","gn","gp","gq","gr","gs","gt", "gu","gw","gy","hk","hm","hn","hr","ht","hu","id","ie", "il","in","io","iq","ir","is","it","jm","jo","jp","ke", "kg","kh","ki","km","kn","kp","kr","kw","ky","kz","la", "lb","lc","li","lk","lr","ls","lt","lu","lv","ly","ma", "mc","md","mg","mh","mk","ml","mm","mn","mo","mp","mq", "mr","ms","mt","mu","mv","mw","mx","my","mz","na","nc", "ne","nf","ng","ni","nl","no","np","nr","nu","nz","om", "pa","pe","pf","pg","ph","pk","pl","pm","pn","pr","pt", "pw","py","qa","re","ro","ru","rw","sa","sb","sc","sd", "se","sg","sh","si","sj","sk","sl","sm","sn","so","sr", "st","su","sv","sy","sz","tc","td","tf","tg","th","tj", "tk","tm","tn","to","tp","tr","tt","tv","tw","tz","ua", "ug","uk","um","us","uy","uz","va","vc","ve","vg","vi", "vn","vu","wf","ws","ye","yt","yu","za","zm","zr","zw", "arpa","com","edu","gov","int","mil","nato","net","org", NULL }; /* A cheap test before we had DNS support added */ int istop(char *domain) { int which = 0; char *end; end = strrchr(domain, '.'); if (end == NULL) return(FALSE); end += 1; for (;;) { if (top_level[which] == NULL) return(FALSE); if (strcasecmp(end, top_level[which]) == 0) return(TRUE); which += 1; } return(FALSE); } int inmap(STAB *map, char *key, int *score) { char *value; int r = 0; if (map == NULL) return(FALSE); if ( (value = ((*map->s_map.map_class->map_lookup)(&map->s_map, key, NULL, &r))) != NULL ) { *score = (int)atol(value); debugf("SpamDomain:\n"); debugf("%s\n", value); return(TRUE); } *score = 0; return(FALSE); } #define MAX_MEMO 20 static int next_free_memo = 0; static int memorized(char *domain) { static char *remembered[MAX_MEMO]; /* Yes, should be flex array or linked list, but for the purposes of this program, more than 20 different domains in one header is so damn spammy that I don't care about remembering all their names. */ int i; for (i = 0; i < next_free_memo; i++) { if (strcasecmp(domain, remembered[i]) == 0) return(TRUE); } if (next_free_memo < MAX_MEMO)remembered[next_free_memo++] = strdup(domain); return(FALSE); } int check_domain(char *domain) { int score = 0; int rc; int i1, i2, i3, i4; char numdomain[MAX_LINE]; if (*domain == '\0') { debugf("Score: no domain in username -> 9\n"); return(9); /* External mail with no domain is suspicious */ } /* Note this is wrong if filtering internal mail */ if (sscanr("\\[\\([0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\)\\]", domain, "\\1", numdomain)) { rc = sscanf(numdomain, "%d.%d.%d.%d", &i1, &i2, &i3, &i4); if (rc != 4) { debugf("Score: sscanf error in %s -> 1\n", numdomain); return(1); /* COULD VET IP HERE AGAINST KNOWN SPAMMER DOMAINS */ } if ((i1 > 255) || (i2 > 255) || (i3 > 255) || (i4 > 255)) { debugf("Score: illegal values in domain %s -> 9\n", numdomain); return(9); } if ((i1 == 0) && (i2 == 0) && (i3 == 0) && (i4 == 0)) { debugf("Score: illegal values in domain %s -> 9\n", numdomain); return(9); } debugf("Score: numeric IP %s -> 1\n", domain); score += 1; } else if (sscanr("\\([0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\)", domain, "\\1", numdomain)) { rc = sscanf(numdomain, "%d.%d.%d.%d", &i1, &i2, &i3, &i4); if (rc != 4) { debugf("Score: sscanf error in %s -> 1\n", numdomain); return(1); /* COULD VET IP HERE AGAINST KNOWN SPAMMER DOMAINS */ } if ((i1 > 255) || (i2 > 255) || (i3 > 255) || (i4 > 255)) { debugf("Score: illegal values in domain %s -> 9\n", numdomain); return(9); } if ((i1 == 0) && (i2 == 0) && (i3 == 0) && (i4 == 0)) { debugf("Score: illegal values in domain %s -> 9\n", numdomain); return(9); } debugf("Score: numeric IP %s -> 1\n", domain); score += 1; } else if (sscanr("\\([0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\)", domain, "\\1", numdomain)) { rc = sscanf(numdomain, "%d.%d.%d.%d", &i1, &i2, &i3, &i4); if (rc != 4) { return(1); /* COULD VET IP HERE AGAINST KNOWN SPAMMER DOMAINS */ } if ((i1 > 255) || (i2 > 255) || (i3 > 255) || (i4 > 255)) return(9); if ((i1 == 0) && (i2 == 0) && (i3 == 0) && (i4 == 0)) return(9); score += 1; } if (sscanr("^[0-9][0-9]*\\.com$", domain, NULL, NULL)) { debugf("Score: numeric .com domain %s -> 5\n", domain); score += 5; /* Would be nice to do this in the USER.db file, but no regexps allowed there, and too tricky to do well in the "From" file. */ } if (!istop(domain)) { /* This test also gets domains that have no TLD, eg From: */ /* Actually this *is* suspicious if the Received: headers so any other sites involved. But it's more likely a misconfigured mailer */ if (strcasecmp(domain, "localhost") == 0) return(0); if (strchr(domain, '.') == NULL) { debugf("Score: unqualified domain %s -> 3\n", domain); return(3); /* No TLD */ } else { debugf("Score: Invalid TLD %s -> 6\n", domain); return(6); /* Bogus TLD */ } } /* Does the domain contain illegal characters? */ { char *s = domain; for (;;) { int c = *s++; if (c == '\0') break; if (isalnum(c) || (c == '-') || (c == '.')) { /* the only valid domain chars */ } else if ((c == '!') || (c == '$')) { /* Known spammer giveaways */ debugf("Score: %c in %s -> 9\n", c, domain); score += 9; } else { debugf("Score: %c in %s -> 1\n", c, domain); score += 1; /* '_' is common but wrong */ } } } /* Can safely call this several times */ init_quick_maps(); /* Now see if it is a known spammer domain, from map file... */ /* NOTE: A legitimate posting from someone at, say bigfoot (5 pts) ends up being scored 3 times ("From ", "Return-Path:" and "From:" Similarly with Received: lines. We can get round this by temporarily removing a name from the DB once it has been returned once, Also could probably avoid Return-Path test or "From " test since one is redundant. */ for (;;) { char *s; int thisscore = 0; if (*domain == '\0') break; s = strchr(domain, '.'); if (s == NULL) break; /* TLD done elsewhere */ debugf("domain map test:\n"); debugf("%s\n", domain); if (inmap(DOMAINS, domain, &thisscore)) { debugf("Score: %s in DOMAINS -> %d\n", domain, thisscore); if (memorized(domain)) { /* As a side-effect, adds if it not known */ debugf("Domain test for %s - will not score against this twice.\n", domain); } else { score += thisscore; } break; } domain = s+1; if (*domain == '\0') break; } return(score); } int uppercasehdr(char *s) { int c; int alphaseen = FALSE; if (s == NULL) return(FALSE); for (;;) { c = *s++; if (c == '\0') break; if (isalpha(c)) { alphaseen = TRUE; if (islower(c)) return(FALSE); } } if (alphaseen) return(TRUE); return(FALSE); /* No lower case is not the same as all uppercase */ } void print_spamscore( char *hdr, char *rating, int score, int bodyscore) { time_t tim; char *cdate; char *t ; time(&tim); cdate = ctime(&tim); t = strchr(cdate, '\n'); if (t != NULL) *t = '\0'; sprintf(hdr, "from %s by spam%s-%d-%d; %s", "localhost", rating, score, bodyscore, cdate); } int isjunk(to, e) ADDRESS *to; ENVELOPE *e; { FILE *datafile; char *datafilename; int score = 0; /* FIRST, TAKE EACH HEADER AND PROCESS IT */ debugf("Check headers\n"); if ((e != NULL) && (e->e_header != NULL)) { HDR *h; /* First, do some tests on specific headers that aren't possible using the external rulesets */ #ifdef NEVER /* ** Envelope structure. ** This structure defines the message itself. There is usually ** only one of these -- for the message that we originally read ** and which is our primary interest -- but other envelopes can ** be generated during processing. For example, error messages ** will have their own envelope. */ struct envelope { HDR *e_header; /* head of header list */ long e_msgpriority; /* adjusted priority of this message */ time_t e_ctime; /* time message appeared in the queue */ char *e_to; /* the target person */ ADDRESS e_from; /* the person it is from */ char *e_sender; /* e_from.q_paddr w comments stripped */ char **e_fromdomain; /* the domain part of the sender */ ADDRESS *e_sendqueue; /* list of message recipients */ ADDRESS *e_errorqueue; /* the queue for error responses */ long e_msgsize; /* size of the message in bytes */ long e_flags; /* flags, see below */ int e_nrcpts; /* number of recipients */ short e_class; /* msg class (priority, junk, etc.) */ short e_hopcount; /* number of times processed */ short e_nsent; /* number of sends since checkpoint */ short e_sendmode; /* message send mode */ short e_errormode; /* error return mode */ short e_timeoutclass; /* message timeout class */ void (*e_puthdr)__P((MCI *, HDR *, ENVELOPE *)); /* function to put header of message */ void (*e_putbody)__P((MCI *, ENVELOPE *, char *)); /* function to put body of message */ struct envelope *e_parent; /* the message this one encloses */ struct envelope *e_sibling; /* the next envelope of interest */ char *e_bodytype; /* type of message body */ FILE *e_dfp; /* temporary file */ char *e_id; /* code for this entry in queue */ FILE *e_xfp; /* transcript file */ FILE *e_lockfp; /* the lock file for this message */ char *e_message; /* error message */ char *e_statmsg; /* stat msg (changes per delivery) */ char *e_msgboundary; /* MIME-style message part boundary */ char *e_origrcpt; /* original recipient (one only) */ char *e_envid; /* envelope id from MAIL FROM: line */ char *e_status; /* DSN status for this message */ time_t e_dtime; /* time of last delivery attempt */ int e_ntries; /* number of delivery attempts */ dev_t e_dfdev; /* df file's device, for crash recov */ ino_t e_dfino; /* df file's ino, for crash recovery */ char *e_macro[256]; /* macro definitions */ }; #endif if (e->e_sender != NULL) { char *domain; char *fromdomain = e->e_sender; debugf("FROM: %s\n", fromdomain); if ((domain = strchr(fromdomain, '@')) == NULL) { /* Because we don't checklocally-generated mail, and From address without an "@" is either a misconfigured mailer or a fakemail spam */ score += 5; debugf("Score: Sender's domain is not obvious (#1) -> 5\n"); debugf("%s\n", fromdomain); } else { *domain++ = '\0'; debugf("check sender domain?\n"); score += check_domain(domain); score += check_username(fromdomain); *--domain = '@'; } } else { /* Not sure how this would happen but it has to be bogus, right? */ score += 4; debugf("Score: Sender's domain is not obvious (#2) -> 4\n"); } { char *To = hvalue("to", e->e_header); if (To == NULL) { debugf("Score: No To: header! -> 4\n"); score += 4; } else { char envto[MAX_LINE]; /* If the envelope recipient is present in the "To:" header, we have an individually addressed mail. It may still be spam, but it is less likely to be because the sender has to send these out one at a time, which costs him more. Most spammers aren't willing to eat that cost in order to look like personal mail */ envto[MAX_LINE-1] = '\0'; /* stopper */ if (e->e_to == NULL) { } else if (*e->e_to == '<') { char *s; strncpy(envto, e->e_to+1, MAX_LINE-2); s = strchr(envto, '>'); if (s != NULL) *s = '\0'; } else if (*e->e_to == '\\') { strncpy(envto, e->e_to+1, MAX_LINE-2); } debugf("Look for string '%s' in To: '%s'\n", envto, To); if ((e->e_to != NULL) && (stristr(To, envto) != NULL)) { debugf("Score: '%s' in To: '%s' -> -3\n", envto, To); score -= 10; } } } /* Now check the headers using the patterns */ h = e->e_header; for (;;) { if (h == NULL) { break; } if ((h != NULL) && (h->h_field != NULL) && (h->h_value != NULL)) { score += spamscore(h->h_field, h->h_value); /* INSERT CODE FROM BELOW HERE */ } h = h->h_link; } /* Now do tests that rely on having examined all the headers: */ if (next_free_memo >= 4) { debugf("Score: this mail referenced %d domains! -> 1\n", next_free_memo); score += 1; } /* MOVE BACK UP INSIDE LOOP WHEN DONE TESTING SCORING */ if (score >= DELETE_ON_SIGHT) { char hdr[MAX_LINE]; int bodyscore = bodyjunkscore(e, to); /* FOR DEBUGGING ONLY: */ /* We don't NEED to know the body score and it will be faster if we omit this test. But what the hell, we're having fun. */ print_spamscore(hdr, "delete", score, bodyscore); addheader("Received", hdr, &e->e_header); mydebug_dumpmail(score, "deleted", e, to); return(FALSE); /* TRUE IF DELETING */ } if (score >= PROBABLY_SPAM) { char hdr[MAX_LINE]; int bodyscore = bodyjunkscore(e, to); /* FOR DEBUGGING ONLY: */ /* We don't NEED to know the body score and it will be faster if we omit this test. But what the hell, we're having fun. */ debugf("probably spam - adding header ...\n"); print_spamscore(hdr, "filter", score, bodyscore); addheader("Received", hdr, &e->e_header); mydebug_dumpmail(score, "probable", e, to); return(FALSE); /* No need to do expensive body test now */ } } if (score >= 1) { /* Do NOT do a body score if header score < 0, ie known safe */ /* Actually, don't do it either for header score = 0 or 1 */ debugf("body\n"); datafilename = queuename(e, 'd'); datafile = fopen(datafilename, "r"); if (datafile != NULL) { /* Handle body text */ int headerscore = score; static char s[MAX_LINE], *rc; debugf("body tests\n"); /* Need to read bodyrules file */ for (;;) { s[MAX_LINE-1] = '\0'; if ((rc = fgets(s, MAX_LINE-1, datafile)) == NULL) break; if (ferror(datafile)) break; score += bodyjunklinescore(s); /* INSERT CODE FROM BELOW HERE */ } /* MOVE THIS BACK INTO FOR LOOP WHEN DONE TESTING SCORES */ if (score >= PROBABLY_SPAM) { char hdr[MAX_LINE]; int bodyscore = score - headerscore; score = headerscore; fclose(datafile); datafile = NULL; debugf("probably spam - adding header ...\n"); if (score >= DELETE_ON_SIGHT) { debugf("probably spam - adding header ...\n"); print_spamscore(hdr, "delete", score, bodyscore); addheader("Received", hdr, &e->e_header); mydebug_dumpmail(score, "deleted", e, to); return(FALSE); /* TRUE IF DELETING */ /* Should be used wisely */ } print_spamscore(hdr, "filter", score, bodyscore); addheader("Received", hdr, &e->e_header); mydebug_dumpmail(score, "probable", e, to); return(FALSE); /* No need to scan any more */ } debugf("body done.\n"); fclose(datafile); datafile = NULL; /* return(FALSE);*/ /* Mail is never deleted unread for body junk */ } if (score > 0) { /*debugf("maybe spam - adding header ...\n");*/ /*addheader("Received", "by spamtest", &e->e_header);*/ mydebug_dumpmail(score, "maybe", e, to); } } /* header score < 0 */ debugf("Done. score is %d.\n", score); return(FALSE); } int checkcompat(to, e) register ADDRESS *to; register ENVELOPE *e; { ADDRESS *a; struct mailer *m; int classid; STAB *map; static char ip_addr[100]; char workbuf[MAXNAME * 2 + 25], *p, *end = 0; char *bestguess_home; /* * Allow all "return to sender" messages. */ debugf("Entering checkcompat()\n"); debugf("Testing email in checkcompat() function\n"); if (bitset(EF_RESPONSE, e->e_flags)){ debugf("Return-to-sender type email. No checks.\n"); return EX_OK; } /* * We need to find the IP address of the sender's host, if any. * (You're on your own if you're not using IP.) We can't use * RealHostAddr because we might be in a queue run and * RealHostAddr doesn't contain valid data during queue runs. * $_ contains the information we need and is valid during queue * runs. * * We use cached results of a previous parse of $_ if we can. * We can do this if we're processing only one non-internally * generated message per process. That condition is met if * we're not in a queue run or we're in a queue run and the user * has ForkEachJob set true. */ if (! *ip_addr || (bitset(e->e_flags, EF_QUEUERUN) && ! ForkQueueRuns)) { /* * Since $_ may contain data besides the IP address we must * carefully parse out the IP addr. eeeeyuck. Sure would * be nice to have a macro that contains just the remote IP * addr and that is also stored in the qf file. * * The end of $_ is expected to be of the form * * [xxx.xxx.xxx.xxx] * or * [!@aaa.aaa.aaa.aaa@bbb.bbb.bbb.bbb<...>:xxx.xxx.xxx.xxx] * */ strncpy(workbuf, macvalue('_', e), sizeof workbuf); workbuf[sizeof workbuf - 1] = 0; p = &workbuf[strlen(workbuf)]; while (--p != workbuf && *p != ':' && *p != '[') { if (*p == ']') end = p; else if (! isdigit(*p) && *p != '.') break; } /* * Test whether we found the addresses start and end markers. * If not, then the parse failed, which means $_ contains no * recognizable IP address. * * If there is no remote host address, then the message must * have originated locally. (UUCP? Never heard of it.) */ if ((! end) || (*p != ':' && *p != '[')) { strcpy(ip_addr, "none"); debugf("no ip addrs???\n"); return EX_OK; } p++; /* copy out the IP addr */ strncpy(ip_addr, p, min(sizeof ip_addr, end - p)); ip_addr[sizeof ip_addr - 1] = 0; /* * If ip_addr == none, then the message originated locally. */ debugf("Source IP addr:\n"); debugf("%s\n", ip_addr); } else if (strcmp(ip_addr, "none") == 0){ debugf("Source IP addr not known - no check\n"); return EX_OK; } /* * Check remote hosts's IP address against a list of hosts and * networks authorized to use this host as a relay. The class * name is AuthorizedNetworks. You can put full IP addresses * into it or you can use net prefixes like * * 26 * 137.39 * 192.203.206 * * for class A, B and C nets. Sorry, no fancy subnet masks. * * Note that this class is empty by default, so no addresses are * authorized, not even the loopback net. Put 127 into the * class to enable the loopback net for those mailers that like * to submit mail via SMTP and don't use sendmail -bs. */ /* Note a class and an external database are not the same. The class finds entries in a "F" file and the database looks up the "K" file. Totally different mechanisms and code. */ /* I use: KAuthorizedNetworks hash /etc/sendmail-config/localips.db KRelayedDomains hash /etc/sendmail-config/secondarymx.db This should be placed in sendmail.cf (or preferably the macro file; in our case, bsdi.mc, after LOCAL_CONFIG) The hash files are created in /etc/sendmail-config by the following commands: makemap hash localips.db < localips makemap hash secondarymx.db < secondarymx If you don't have makemap installed, the source is in the makemap subdirectory of the sendmail source distribution I'm not sure if I perhaps shouldn't also add "-o" after the word "hash" above. */ #ifdef USE_CLASS classid = macid("{AuthorizedNetworks}", (char **) 0); #else map = stab("AuthorizedNetworks", ST_MAP, ST_FIND); if (map == (STAB *)NULL) { /* No map file, so all networks are considered local and we stop now. */ debugf("no map file????\n"); return(EX_OK); } #endif /* AuthorizedNetworks may usefully include the IPs of mail relays that are allowed to forward us mail which they were holding when we or a lower-numbered MX were down -if they are trustworthy; but if omitted, the code below will allow legitimately relayed mail through anyway, whereever it came from. */ strncpy(workbuf, ip_addr, sizeof workbuf); workbuf[sizeof workbuf - 1] = 0; p = workbuf + strlen(workbuf); do { int ret = 0; char *net_type; *p = 0; debugf("%s\n", workbuf); #ifdef USE_CLASS if (wordinclass(workbuf, classid)) { #else if ((net_type = ((*map->s_map.map_class->map_lookup)(&map->s_map, workbuf, NULL, &ret))) != NULL) { #endif debugf("Sender found in {AuthorizedNetworks}\n"); /* AT THIS POINT WE CAN IF WE WISH TEST TO SEE IF THE OUTGOING MAIL HAS A FROM WITH A DOMAIN THAT IS NOT LEGALLY ONE OF OURS, IE SEE IF OUR USERS ARE FORGING MAIL HEADERS. IF SO, ADD A X- HEADER TO POINT IT OUT, AND SAVE IT TO CONFRONT THE USER */ /* ALSO, WE CAN CHECK OUTGOING MAIL FOR SPAM SIGNS AND ALERT AN OPERATOR IF WE DETECT ANY. A NICE REFINEMENT WOULD BE TO PUT IT IN A 30-MINUTE-DELAYED QUEUE TO HAVE TIME TO LOOK INTO IT */ return EX_OK; } while (p != workbuf && *p != '.') /* check wider IP range */ p--; } while (p != workbuf); /* Try IP, classes C, B, A (usually only 127) */ debugf("Sender not local - now check recipient\n"); /* * At this point we're convinced the sender is not local and not * authorized to use this host as a relay. So now we check the * recipient address to see if it is local, or that is it for a * domain for which we are a legitimate relay. * * Aliases are local addresses but they may point to non-local * addresses; mailing to these aliases should be allowed. * Therefore we can't check the 'to' address for locality. We * must backtrack through the alias chain until we reach the * root address. If this address is local then all the * addresses that expanded from it should also be considered * local for the purpose of this test. */ bestguess_home = NULL; for (a = to; a->q_alias; a = a->q_alias) { char *s; /* We have a major problem here with mail that is punted straight out again or fed to procmail. Until I can discover the right way to find out who the owner of the .forward file is and decide to spamfilter accordingly, this grody hack will have to suffice. Note it relies on the "#username" hack for procmail */ if ((a != NULL) && (a->q_user != NULL) && ((s = strstr(a->q_user, "procmail #")) != NULL)) { bestguess_home = s; bestguess_home = strchr(s, '#')+1; s = strchr(bestguess_home, '"'); if (s != NULL) *s = '\0'; s = strchr(bestguess_home, ' '); if (s != NULL) *s = '\0'; s = strchr(bestguess_home, ','); if (s != NULL) *s = '\0'; s = strchr(bestguess_home, ' '); if (s != NULL) *s = '\0'; debugf("procmail hack\n"); debugf("%s\n", bestguess_home); } if ((a != NULL) && (a->q_home != NULL)) { bestguess_home = a->q_home; } } m = a->q_mailer; /* * Now check to see if the root of the alias chain is local. */ if (bitnset(M_LOCALMAILER, m->m_flags)) { /* NOTE: we only filter mail coming in from offsite! - we don't filter local mail or outgoing mail */ char *filter; FILE *test; debugf("Root of alias chain is local\n"); /* AT THIS POINT WE SHOULD DO JUNKMAIL FILTERING IF THE USER HAS A .spamfilter FILE IN HIS HOME DIRECTORY */ if ((to == NULL) || (to->q_home == NULL)) { struct passwd *pw; debugf("No .spamfilter home - best guess was %s\n", (bestguess_home == NULL ? "NONE" : bestguess_home)); if (bestguess_home == NULL) return EX_OK; pw = getpwnam(bestguess_home); if (pw != NULL && pw->pw_dir != NULL) { bestguess_home = pw->pw_dir; } else return EX_OK; } else bestguess_home = to->q_home; /* Should pick up ".spamfilter" from some parameter */ filter = malloc(strlen(bestguess_home) + 1 + strlen(".spamfilter") + 1); if (filter == NULL) { debugf("Can't malloc ram for .spamfilter string: %s\n", bestguess_home); return EX_OK; /* Hmmm... */ } sprintf(filter, "%s/%s", bestguess_home, ".spamfilter"); debugf("%s\n", filter); test = fopen(filter, "r"); free(filter); if (test != NULL) { fclose(test); /* Note: we'll later use the contents of this file to set specific options for this user */ /* This user wants spam to be filtered! */ debugf("filtering for spam\n"); /* DO THE FILTERING HERE! */ if (isjunk(to, e)) { debugf("spam found - rejecting\n"); usrerr("554 This user is rejecting your mail. See http://www.tispa.org/spam/reject-notice/"); e->e_flags |= EF_NO_BODY_RETN; return EX_UNAVAILABLE; } debugf("Won't delete unread.\n"); } return EX_OK; } /* * Check destination host to see if it is a domain we relay or a * subdomain thereof. */ #ifdef USE_CLASS classid = macid("{RelayedDomains}", (char **) 0); #else map = stab("RelayedDomains", ST_MAP, ST_FIND); #endif if (map == (STAB *)NULL) { /* No map file, anything goes - better than class where nothing goes! */ /* Relay everything untouched; no spam tests */ debugf("Cannot find .db file for {RelayedDomains}\n"); return(EX_OK); } strncpy(workbuf, to->q_host ? to->q_host : "", sizeof workbuf); workbuf[sizeof workbuf - 1] = 0; end = workbuf + strlen(workbuf); if (*workbuf != '\0') p = workbuf+strlen(workbuf)-1; else p = workbuf; /* Strip trailing "." because we don't store it in the hash file */ if (*p == '.') *p = '\0'; p = workbuf; while (*p) { int ret; char *net_type; #ifdef USE_CLASS if (wordinclass(p, classid)) { #else debugf("%s\n", p); if ((net_type = ((*map->s_map.map_class->map_lookup)(&map->s_map, p, NULL, &ret))) != NULL) { #endif debugf("Destination is in {RelayedDomains} so is OK.\n"); return EX_OK; } while (*p && *p != '.') p++; if (*p) p++; } /* everything else is bad */ debugf("Relay traffic prohibited\n"); /* AT THIS POINT WE CAN LOG THE THIRD-PARTY MAIL */ /* IF SPACE IS AT A PREMIUM WE CAN HASH THE BODY OF THE MAIL AND ONLY KEEP ONE COPY OF EACH SPAM */ { /* Should get this name from proper config info - this is a quick hack */ char *fname; FILE *test; /* BUG: fname = malloc(strlen(SENDMAIL_CONFIG_DIR) + strlen(SENDMAIL_BLOCK_FLAG)) + 2;*/ /* Bug fix from leni@ais.com.au */ fname = malloc(strlen(SENDMAIL_CONFIG_DIR) + strlen(SENDMAIL_BLOCK_FLAG) + 2); if (fname == NULL) return EX_OK; sprintf(fname, "%s/%s", SENDMAIL_CONFIG_DIR, SENDMAIL_BLOCK_FLAG); test = fopen(fname, "r"); free(fname); if (test == NULL) return EX_OK; else fclose(test); } usrerr("554 relay traffic prohibited"); e->e_flags |= EF_NO_BODY_RETN; return EX_UNAVAILABLE; } /* END: CHECKCOMPAT */