// If you are reading this source, it is what should have been an
// internal release and consequently has a few comments at the head
// of the file which would not normally be here.  Most likely I
// must have released this in a hurry because someone needed some
// recent feature that has been added, before I had time to do a
// periodic code review and tidy-up.  If this is your first time
// looking at or installing this code, you should probably get in
// touch with the author (gtoal@gtoal.com) and ask for a cleaner
// version before you start.

// The descriptive comments which explain what this program does,
// and how, have slowly migrated from being at the top of the
// file to being lost half way down :-)  That is something which
// will be addressed in the next maintenance release.

// AS FAR AS THE WORLD IS CONCERNED, THIS IS ALPHA-TEST CODE
// AND YOU *MUST* BE A PROGRAMMER TO USE IT.  IT IS ***NOT***
// BEING RELEASED FOR PRODUCTION USE ANYWHERE BUT AT MY OWN SITE.

// But this version *does* work, and works very well now.  It is in
// service at the University of Texas Pan American, with 2000 staff
// and 15,000 students (although not all of those are active emailers -
// maybe only 2000-3000 are in heavy use.  It sometimes looks like
// AOL and MSN and Yahoo are the official campus mailers :-/ )


//--------------------------------------------------------------------

// Making test change ("Importance:" header) to this copy;
// still need to update spamfilter2 source

// TODO: should I refuse to mark as spam anything which originated
// locally?  Or even more aggressively, whitewash it?  That would
// solve the problem of local people objecting to their mail being
// tagged as spam.  Think about it before making a decision...

// HUH??? why am I using 'spamc' without having something set up
// to start 'spamd'.  Thought I had decided to use 'spamassassin -L'
// to run the non-daemon versiion with local tests only???? CHECK THIS!
// fortunately the 'fail safe' code works great, so the only consequence
// was that self-training was not taking place.

// spamd at home is hanging frequently.  I *hope* that's because it's
// an older version that at UTPA, and I'm not just waiting for a big
// surprise at work some morning :-/  If it does break, it can be
// safely disabled and things still work - but preferably at the
// source level (USE_ASSASSIN) rather than just killing the daemon,
// which fails safe but does genreate lots of log entries.

// The Oracle mailer is rejecting some emails, I think possibly
// due to bad mime encoding but not 100% sure.  The error returned
// from the data command is "451 4.3.0 Data command failed."
// but then some of the actual data is still sent to the server
// as if it were commands.  That is the part which needs to be
// fixed.  I wonder if it is another c=255 -> c=-1 problem?
// It is minor as long as all it affects is spam, but that isn't
// guaranteed. [Later: seems Oracle is deliberately rejecting mail
// with bad MIME formatting]

// last edit: 23:45  15 Jul 04 MASTER COPY NOW ON spamfilter1.utpa.edu
// use ci -l to check in to rcs ...
#define VERSION "0.7.2f"
// 0.7b is a bug fix so that 8-bit character 0xff is not handles like EOF
// 0.7c is a bug fix for failure to edit out old X-Priority: header
//      spamtraps went in here somewhere for LOCAL USE on spamfilter2 ONLY
// 0.7d is an internal maintenance release with a couple of spamtraps removed
// 0.7e is a backport to spamfilter1.utpa.edu
// 0.7.2f is production, first release

// *** TO DO: "man xinetd" and modify params in conf file for D.o.S. protection
//            - done, now document it...

#define DEBUG 1
#define DEBUG_SPAMTRAP 1
#define DEBUG_CLAMAV 1
#define DEBUG_UVSCAN 1
#define DEBUG_ASSASSIN 1

// panam.edu test is not doing wrapping

#define USE_ANOMY 1
/*
   NOTE: while debugging, you ought to put something like
   this into /etc/cron.daily/local-cleanuptmp to prune the
   large number of debug files created.

#!/bin/sh
# wipe the big records entirely, daily
/bin/rm -f /tmp/file*.ASSASSIN
/bin/rm -f /tmp/file*.CLAMAV
/bin/rm -rf /tmp/smtpfilter
# wipe up old stuff, may not be only from smtpfilter
/usr/bin/find /tmp -name 'file??????' -mmin +5000 -maxdepth 1 -exec /bin/rm {} \; > /dev/null 2>&1

 */


// remove this if you do not have Clam Antivirus installed.
// (or better yet, go install it.  It's easy, free, and good)
// [TO DO] Code does not YET check that the program was present.
// if it is missing, the shell stderr is sent to the client and
// the transfer fails, and later retries many times...
#define USE_CLAMAV 1

// I am adding "avscan" to this code, so need to add a define
// here in order to enable it.  You can define either of the AV scanners,
// or both (as a QA check) - or neither.
#define USE_UVSCAN 1

// remove this if you do not have SpamAssassin installed.
// (or better yet, go install it too.)
#define USE_ASSASSIN 1

// NOTE: this code is written in a very open and deliberately
// naive style quite unlike my usual code.  This is a deliberate
// decision done in order to be able to review the code more easily
// for security issues.  In particular I have been very conscious of
// avoiding potential buffer overflow problems.

// BEING DONE: add support for spamassassin as a first-pass test to
// self-train, rather than my personal filter which was the original
// plan.  If you run spamd -L for local tests only and test on
// a threshhold of >= 8.0, you'll get about a 40% detection rate
// for spam and virtually no false positives, while avoiding the
// significant run-time overhead of spamassassin's body tests
// (- this is not yet done as far as I remember.  We're still
//  feeding the whole body through, and I think with net-enabled
//  tests turned on too.  As one guy pointed out, it makes better
//  decisions with more information, so why cripple it deliberately?
//  However I suspect that these tests are quite expensive)

// ALSO BEING DONE: adding call to clamav to check for viruses...
// again, a fairly expensive test; but I should be able to reduce
// the number of times it is invoked by only running it when there
// is a mime header with a file attachment (does it detect html
// viruses?)

// ... but don't fear, there is a plan B (and a plan C!):
// I have some lightweight code for doing equivalent tests on
// the headers, but currently the signs they are looking for
// are hard-coded.  I have been experimenting with a new
// algorithm (trie-based, much like compression) for identifying
// common strings, and it seems ideally suited for header tests
// - cheap, and dynamically adjustable.
// TO DO: add links to my wiki which contains the prototype code
// and rewrite the above paragraph.
// (intro is here: http://www.gtoal.com/mt/archives/000010.html
//  - code still to add)

/* Current known problems:
    Very large mails were causing a problem.  After spooling them to this
   server, at the point where we pass them on to the real server, that
   transfer was taking so long (with nothing going back to the client
   in the meantime) that the dead-man's-switch timeout was causing me
   to abort.  This was easy to fix, by resetting the timeout every 10K
   characters as I sent data to the final server.  HOWEVER the potential
   still remains that the client will wait longer than 10 minutes for
   a reply from us and he will time out.  (And unfortunately the other
   end will think the mail got through OK so we'll see duplicate copies
   of large mail turning up...)
   This hasn't happened yet here.  Even the largest mails have been
   accepted in less than 10 minutes.  But the potential remains.
   Three possible solutions, all poor:
   1) Fake the 'accepted' response.  This is bad if the other end
      doesn't accept it and breaks the clean 'transparent' design goal.
      This is basically a store-and-forward implementation rather
      than a filter, which I had been trying to avoid.
   2) Send back NOOP lines - I don't think that is valid when the client
      is waiting for a numeric response code
   3) slowly send back continuation lines with a bogus return code, BUT send
      back the real code as the last line once we finally get it, with the
      correct return code - and pray that all MTAs look only at the last
      value they receive when they test the return code.
   [4) hope that clients don't timeout, only servers ...]
   5) Should we assume virus files are less than some critical
      size (eg 5Mb) and buffer and check only that much, then pass the
      rest through?  (5mb is about the size you can send to our servers
      within the 300 sec timeout period...)
 */

/*
   This program is a semi-transparent "man-in-the-middle" SMTP
   server which accepts incoming connections and forwards them
   in real-time to another mail server.

   It passes all other commands through transparently except for
   the "DATA" command, which it handles by buffering the entire
   data packet, inserting a spam score, and then sending the data.
   It also has the option of sending a RSET to kill the mail
   should the spam score be above a critical threshhold.

   [TO DO] It *should* send NOOP commands while gathering the DATA.
   This will not be too hard to do.  We have a timer available already.

   [TO DO] Need to check for an empty mail body and tag those mails
   as spam.  Although spamassassin *might* get those for us, now
   that I'm using it.

   [TO DO] Since spams are put in the spam folder, do we need to virus
   check spams?  (Probably yes, for people who are not using the client-side
   rule.  However might be better to virus check first and then skip the
   spam check if positive).  Need to kill virus mails dead, silently+logged.

   Related programs:
   http://www.aplawrence.com/KevinSmith/scanmail/smtprelay/smtprelay.c
   - does something similar for virus scanning.  Maybe we can pick up
   some good bits from each other's code.

   Because all connections to the 'real' mail server will now
   be coming from this code rather than directly, this server has
   to do the first-level "third-party-relay" checks.  It could
   also do dns-based blacklisting if you want to go to those lengths.
   Personally I don't.  Everything I've read says they're not helpful.

   See http://lists.debian.org/debian-devel/2000/debian-devel-200003/msg01325.html

	"Conclusions

	I have been unable to conclusively show that any of the RBLs are actually
	reducing spam, but I have positively confirmed that they *all* (save RBL
	which I cannot check since we block on it) would result in legitimate
	messages being blocked. 

	ORBS deserves special mention because of their insane hit count, I don't
	know what that is about but ORBS would block 10% of the mails we get. I
	think it is without question that the majority of those blocks are
	legitimate mails. ORBS is also almost completely inclusive of the RSS and
	RBL."

	[ Later note: I was given a pointer to cbl.abuseat.org which
	I've tested on all the 'last hop' IPs in the received lines of
	my baseline spam corpus, and it gave *no* false positives!  I'm
	very tempted to use it as a front-end test here...
	Look up IPs as BYTE4.BYTE3.BYTE2.BYTE1.cbl.abuseat.org ]

   In order to do third-party relay blocking, we need to know all the
   domains that our real mail server will accept mail for if coming from
   outside.  (A simpler solution would be if the real mail server does
   not treat this server as a local mailer.  This was my personal config
   so I did not originally implement 3rd-party relay blocking, but I
   have added it now.)

   Note: spamassassin can use BLs as a contributing factor rather than
   a firm decision.  However you don't get the benefit of early rejection.

   I found a neat idea, "Greylisting", which I think is definitely
   possible.  http://projects.puremagic.com/greylisting/  It will be
   added here as an option as soon as the basic product is sound.

   Rather than have complex configuration files etc, anything that
   needs to be customised will follow this comment and you should
   edit this source file and recompile.  There is no complex build
   system, it's just one simple .c file.

   cc -o /usr/local/bin/smtpfilter -DPORT=nnnn smtpfilter.c

   Add something like this to inetd.conf:
   smtp stream tcp nowait nobody /usr/local/bin/smtpfilter smtpfilter

   (For xinetd sites, read the accompanying document)

   This code doesn't require any privileges and can easily be run in a
   chroot jail (or a uml) as 'nobody' - all it needs is to be able to create
   and later delete temporary files for the spam filter, and invoke
   the spam filter.  Currently the Bayesian tests are implemented as
   an external command - you have to change the code in function 'spam()'
   if you want to use a different filter.

   We deliberately don't implement very much of the SMTP protocol -
   we generally don't care what commands are issued, whether you start
   with HELO or EHLO etc.

   [TO DO: note "SEND FROM" can send interactive messages to the mailer,
    and if we don't insert a "Received:" header, they appear to originate
    from the spam filter - which could be used to DoS or anonymously
    harass people, eg with pop-up spams.]

   This program is meant to be very simple and it should be compatible
   with anyone's mail server as long as it is line buffered and issues
   standard RFC821-format return or continuation codes.  NOTE: currently
   we do *not* generate extended return codes.  This appears to be breaking
   some non-conformant mailers.

 */
//#include <sys/time.h>
//#include <sys/types.h>
//#include <unistd.h>
#include <sys/wait.h>

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <time.h>
#include <sysexits.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#ifndef FALSE
#define FALSE (0!=0)
#define TRUE (0==0)
#endif

/* MUST NOT include tailing slash */
#define LOGDIR "/tmp/smtpfilter"
/* customise here for other sites */
#if ((PORT == 1003) || (PORT == 1004))
#undef LOGDIR
#define LOGDIR "/tmp/panam"
#endif

static FILE *debugclient = NULL, *debugserver = NULL;
static time_t starttime, nowtime, deltatime;
void setup_debug(void)
{
  int pid = getpid();
  char file[1024], *s;
  starttime = time(NULL);
  sprintf(file, "%s/client/%d.log", LOGDIR, pid);
  debugclient = fopen(file, "w");
  if (debugclient == NULL) {
    mkdir(LOGDIR, 0700); /* May exist already.  Who cares. */
    sprintf(file, "%s/client", LOGDIR);
    mkdir(file, 0700); /* $LOGDIR/client */
    sprintf(file, "%s/client/%d.log", LOGDIR, pid);
    debugclient = fopen(file, "w");
  }
  sprintf(file, "%s/server/%d.log", LOGDIR, pid);
  debugserver = fopen(file, "w");
  if (debugserver == NULL) {
    sprintf(file, "%s/server", LOGDIR);
    mkdir(file, 0700);
    sprintf(file, "%s/server/%d.log", LOGDIR, pid);
    debugserver = fopen(file, "w");
  }
}

#ifndef PORT
// This is the main customisation parameter.  Determines which system
// is being filtered.  Note you do not need to run this code on this
// port - it *can* run on port 25 always and redirect to another box.
// However even if you do that, the value of PORT should be something
// else, just as a convenient way of selecting the appropriate
// customisation below.
#define PORT 25
#endif

// The 'ifdef's have been restructured so that they are ordered by
// target system, thus reducing the number of tests and shortening the
// code.  This does give some duplication.  I have commented only
// the first block.

#if PORT == 1000  /* Kirtley's CS server */
 /* --------------------------------------------------------------- PORT 1000 */
// put target domains in here.  We'll accept mail addressed to these
// domains only.  Anyone else will be bounced as a third-party relay.

// We should not accept any form of "@site," "!site" etc routing that
// may be passed on to the real mailserver [possibly TO DO]


// the more I look at this, the more I'm beginning to favour a store-and-forward
// approach that selects the destination server after the domain is known,
// and allows one spamfilter box to handle multiple real servers behind it...
// Should also reduce the load on the server.
#define DOMAINS "cs.panam.edu:zappa.cs.panam.edu:bahia.cs.panam.edu:mesquite.cs.panam.edu"

// If mail is sent to a plain username with no "@domain.com" then
// this domain may be appended.
//
// Might rewrite this just to take the first domain off the DOMAINS string
// instead.  Less to remember.
#define DEFAULT_DOMAIN "cs.panam.edu"

// These are the spam tags to be inserted as a fake Received: header...

// THIS IS A CHANGE IN DESIGN.  Previously I was offering the choice
// between a faked 'received' field and a regular X- header in order
// to tag spam.  However if you use the X- header, you lose the
// information about where the mail was injected, so you really
// do need the 'received' line for its genuine purpose, not just
// as a conveniently innocuous place to put the spam tag.  Therefore
// I am changing the code so that the Received is always added, but
// the X- header is added as an option.

#define GOOD_TAG "utpagood"
#define SPAM_TAG "utpaspam"

// The cs mailer shouldn't need to accept mail from the island or ITT as local
#define LOCALIPS "127.:129.113."

// The original model of this code is that it sits outside your
// local zone of trust, and accepts connections from outside which
// it passes to the real mail server.  So the mail servers 3rd-party
// relay blocking will work OK because we're as much an outsider as
// the spammer is.  (The same is true if this is implemented as a
// transparent bridge)   HOWEVER if through circumstances we are forced
// to run this filter on a system which is considered a trusted host
// by the real mail server, and to compound the issue our local users
// are likely to connect to this system in order to send mail out
// (eg because the real mailer is immediately behind this system),
// then we MUST distinguish local senders so that *we* do not bounce
// their outgoing mail under the misapprehension that it is a 3rd-party
// spam...

// (apologies for not using a proper netmask eg 129.113/16
// but this is currently a quick hack... - I have some code that
// handles CIDR addressing nicely (as a bit-trie) which I'll put
// on the web and add here as a link later.)

// This is the real host that we pass our mail on to:
// it is converted to an address by 'gethostbyname'; however using a
// number appears to work with that interface too, so we won't bother
// with the hassle of testing the string and calling gethostbyaddr whenever
// it looks like a dotted quad...

#define RELAY_HOST "zappa.cs.panam.edu"

#else
 /* --------------------------------------------------------------- PORT 1001 */
#if PORT == 1001 /* Main panam.edu VMS system, lots of domains accepted */

// Do not wrap attachments for whole of panam during early test
#undef USE_ANOMY

#define DOMAINS "panam.edu:panam1.panam.edu:panam2.panam.edu:panam3.panam.edu:smtp.panam.edu:pop3.panam.edu:utpa.edu:utpanam.edu:husky.panam.edu:listserv.panam.edu:webmail.panam.edu"

#define DEFAULT_DOMAIN "panam.edu"

#define GOOD_TAG "utpagood"
#define SPAM_TAG "utpaspam"
#define CUSTOMHEADER "X-Priority: 5\nX-UTPA-Spam: %s\n"

//#define LOCALIPS "127.:129.113."
#define LOCALIPS "127.:129.113.:204.158.57:206.254.217:198.213.49:198.213.144"
//          localhost panam     island     eli         coserve    annexe

//#define RELAY_HOST "129.113.1.2"
#define RELAY_HOST "smtp.panam.edu"

#else
#if PORT == 1002 /* My home system, where most of testing goes on */
 /* --------------------------------------------------------------- PORT 1002 */

// This should come from the Cw line in sendmail.cf - might automate
// some day in the Makefile???  Another good check would be to walk
// the dns files looking for MX records that point to us.  (Tricky -
// a run-time check would be better, but that relies on the DNS being up)

#define DOMAINS "gtoal.com:vt.com:ns1.gtoal.com:admin.vt.com:history.dcs.ed.ac.uk:pizzabox.gtoal.com:feldtman.com:localhost"

#define DEFAULT_DOMAIN "gtoal.com"

#define GOOD_TAG "bayeschecker"
#define SPAM_TAG "spamfilter"

#define LOCALIPS "127.:192.168."

//#define RELAY_HOST "24.173.85.38"
// now running on same host as the actual mailer.  With about 3000 spamtrap
// addresses active on my home box (800MHz) it runs with a load average
// of about 1. :-/  Hopefully the big multiprocessors at work will take
// the higher load, plus we get the direct scaling benefit of multiple
// MX hosts (ie two spamfilter boxes)

// Later: yes, UTPA is working well with beefier computers but my homebox is
// getting overloaded.  Time to add a box at home dedicated to only spam filtering.

#define RELAY_HOST "127.0.0.1"

// debug_spamtrap flag is a little hacky - should really also have a USE_SPAMTRAP flag
// ... also, there is a lot of spamtrap code that is turned on only for UTPA, which
// needs to be cleaned up and made general for everyone to use.  Currently spamtraps
// on home system are done externally to this code with lots of email aliases and
// a procmail filter.

#undef DEBUG_SPAMTRAP
#undef USE_ANOMY
#undef USE_UVSCAN
#undef DEBUG_UVSCAN

#else
#if PORT == 1003 /* My work test system (formal QA) */
 /* --------------------------------------------------------------- PORT 1003 */
#define DOMAINS "infos.panam.edu"

#define DEFAULT_DOMAIN "infos.panam.edu"

#define GOOD_TAG "utpagood"
#define SPAM_TAG "utpaspam"

#define LOCALIPS "127.:129.113."

#define RELAY_HOST "129.113.28.249"
#define CUSTOMHEADER "X-UTPA-Spam: %s\n"

#else
#if PORT == 1004 /* Our first 'live' domain, new Oracle-based mailer */
 /* --------------------------------------------------------------- PORT 1004 */
#define DOMAINS "utpa.edu:testappl.panam.edu:panam.edu:testappl.utpa.edu"
  // minimum change for testing.  Mod the MX for panam.edu only
  // We will add this host as an equal-numbered MX on the first day...
#define DEFAULT_DOMAIN "utpa.edu"

#define GOOD_TAG "utpagood"
#define SPAM_TAG "utpaspam"

// TO DO: Add the island, and ITT building which are outside 129.113
#define LOCALIPS "127.:129.113.:204.158.57:206.254.217:198.213.49:198.213.144"
//          localhost panam     island     eli         coserve    annexe

// TEST SERVER.  NOT FINAL HOST.
#define RELAY_HOST "129.113.37.13"
#define CUSTOMHEADER "X-Priority: 5\nX-UTPA-Spam: %s\n"

#else
#if PORT == 999 /* PINBOARD */
#define DOMAINS "pinboard.com:sysstats.com:kuru.to:mita.co.jp:freenet.ch:raiffeisen.ch"
#define DEFAULT_DOMAIN "pinboard.com"
#define GOOD_TAG "goodmail"
#define SPAM_TAG "spammail"
#define LOCALIPS "127.:194.209.195.8."
#define RELAY_HOST "mail.pinboard.com"
#define CUSTOMHEADER "X-SpamFilter: %s\n"

#else
#error No port given
#endif
#endif
#endif
#endif
#endif
#endif

// If we are inside the real server's zone of trust we must do
// third-party relay checking; if we're outside it, can let the
// real server do it itself...  Hence we need to be able to find
// the callers IP.  (also useful for optional RBL tests)

/*#includes needed for determining IP of caller, and handling timeouts: */
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <setjmp.h>
#include <signal.h>

// I've moved the streams so that they are global, in order to aid debugging
// - if the debugging is removed, you can put the declaration below back into
// main.
static int from_client, from_server, to_client, to_server; /* streams */

/* printf-like procedure to write to fd's rather than files */
// If we are debugging, a copy of all I/O is logged to a file in /tmp
// along with a timestamp.  Timing is possibly a big unexplored
// opportunity in finding new ways to identify spammers over real mailers.
void put(int fd, char *s, ...)
{
   int string_length, rc;
   static char *buff = NULL;
   va_list ap;

   va_start (ap, s);
   {
      static FILE *nullfile = NULL;

      if (nullfile == NULL) nullfile = fopen ("/dev/null", "w"); // Don't open on every call!
      if (nullfile == NULL) {
         exit(1);  // [TO DO]
      }
      string_length = vfprintf(nullfile, s, ap);
      // fclose (nullfile);
      buff = malloc (string_length + 1);
      if (buff != NULL)
         vsprintf (buff, s, ap);
   }
   va_end (ap);

   // *** Instead of a block write, we should write a character at a time,
   //     and extract all "\r"s then insert \r in front of any \n
   //     (hoping that it doesn't screw up 'content-length' in mail)
   // rc = write(fd, buff, string_length);
   {
     static char cr[2] = { '\r', '\0' };
     int i, c;
     for (i = 0; i < string_length; i++) {
       c = buff[i];
       if (c != '\r') {
         if (c == '\n') {
           write(fd, cr, 1);
         }
         write(fd, buff+i, 1);

#ifdef DEBUG
// Debugging is ugly.  I do this deliberately.  It encourages
// me to take it out.  Also makes it easier to distinguish from the code.
{
static int startline = TRUE;
if ((debugclient != NULL) && (debugserver != NULL)) {
nowtime = time(NULL); deltatime = nowtime-starttime;
if ((fd == from_client) || (fd == to_client)) {
// insert timestamp at the start of each line.
if (startline) (void)fprintf(debugclient, "[t=%d]", deltatime);
(void)fprintf(debugclient, "%c", c);
fflush(debugclient);
} else if ((fd == from_server) || (fd == to_server)) {
if (startline) (void)fprintf(debugserver, "[t=%d]", deltatime);
(void)fprintf(debugserver, "%c", c);fflush(debugserver);
} else {
(void)fprintf(debugclient, "[%d]%c", fd, c);fflush(debugclient);
(void)fprintf(debugserver, "[%d]%c", fd, c);fflush(debugserver);
}
}
startline = (c == '\n');
}
#endif

       }
     }
   }

   if (buff != NULL) free (buff); buff = NULL;
}

// Using a macro here allows us to replace this easily with a diagnostic version
#define debug_exit(rc) my_exit(rc, __LINE__)

int my_exit(int rc, int line)
{
#ifdef DEBUG
  nowtime = time(NULL); deltatime = nowtime-starttime;
  if (debugclient != NULL) {
    fprintf(debugclient, "\n[t=%d]exit(%d) at line %d\n", deltatime, rc, line);
    fflush(debugclient);
  }
  if (debugserver != NULL) {
    fprintf(debugserver, "\n[t=%d]exit(%d) at line %d\n", deltatime, rc, line);
    fflush(debugserver);
  }
#endif
  usleep(2500); // This is paranoia.  Keep the connection open a few seconds
  // longer in case the problems we are having with duplicate mails are due
  // to the final packet in the buffer (the return code) not being delivered
  // (ie a race condition)  [It wasn't - it was a firewall bug mentioned
  // elsewhere; however this doesn't hurt!)
  exit(rc);
}


static jmp_buf RDTimeout;

int DeadMansTimeout = 60*6;      /* Defensive programming */
int ConnTimeout = 6;             /* timeout in secs for initial connect */
int ReadTimeout = 60*5;          /* Read timeout in SMTP phase - primarily
                                    'waiting for command or reply' phases */
/* int ReadTimeout = 6;*/        /* I've found it more convenient
                                    during debugging to shorten this */
// If you want to be RFC compatible, set both of these to 60*5 ...
int CmdTimeout = 30;             /* This turns out to be essential if
                                    you want to avoid exceeding the max
                                    no of daemon children active at once */

// Note: I'm currently using inetd or xinetd.  It would undoubtedly
// be more efficient to *not* use these, and to pre-fork listeners
// for a quick connection like several other programs do.

// too short for manual testing of DATA entry: int ShortTimeout = 10;
int ShortTimeout = 45;           /* Much shorter timeout used eg in the
                                    middle of reading a line. */

static void dtimer(int sig)
{
  longjmp(RDTimeout, 1);
}

/* Get a single character from either client or server, with
   an appropriate timeout.  Originally I wrote this using a
   simple read and an alarm() timeout.  While debugging a problem
   (which actually turned out to be nothing to do with timeouts)
   I replaced that code with this version, which is pretty much
   straight out the man page for select().  I've now deleted the
   previous code so if you need to see it look in the rcs file.
   I also found a procedure "smtp_read()" on the net in the BMilter
   sources, and tried it but had big problems making it work.
   (http://blue-labs.org/software/Bmilter/code/utilities.c)

   There may be some unused includes that could be removed now that
   I've deleted the old get().
 */

// [TO DO] - should I drop \r here in get and just never pass it
// back up?  Simplifies the rest of the code although it might cause
// timeout issues?

int get(int fd, int timeout) {
    char cbuff;
    int c;
    fd_set rfds;
    struct timeval tv;
    int retval;
    static int timedoutalready = FALSE;

    FD_ZERO(&rfds);
    FD_SET(fd, &rfds);

    tv.tv_sec = timeout;
    tv.tv_usec = 0;

    retval = select(fd+1, &rfds, NULL, NULL, &tv);
    /* Don't rely on the value of tv now! */

    c = EOF;
    if ((retval != -1) && (retval != 0)) { // retval > 0 ???
      // Unbuffered, one char at a time
      // but buffered would be OK too.
      if (read(fd, &cbuff, 1) > 0) c = (cbuff & 255);// BUG FIX 20040723
      /* FD_ISSET(0, &rfds) will be true. */
    }

#ifdef DEBUG
{static int startline = TRUE;
if ((debugclient != NULL) && (debugserver != NULL)) {
char buff[128];
buff[0] = c; buff[1] = '\0';
nowtime = time(NULL); deltatime = nowtime-starttime;
if (c == EOF) sprintf(buff, "[eof t=%d]",deltatime);
if (c == '\r') sprintf(buff, "\\r");
if (c == '\n') sprintf(buff, "\\n\n");
if ((fd == from_client) || (fd == to_client)) {
if (startline) fprintf(debugclient, "[t=%d]", deltatime);
(void)fprintf(debugclient, "%s", buff);
fflush(debugclient);
} else if ((fd == from_server) || (fd == to_server)) {
if (startline) fprintf(debugserver, "[t=%d]", deltatime);
(void)fprintf(debugserver, "%s", buff);
fflush(debugserver);
} else {
(void)fprintf(debugclient, "[%d]%s", fd, buff);fflush(debugclient);
(void)fprintf(debugserver, "[%d]%s", fd, buff);fflush(debugserver);
}
}
startline = (c == '\n'); // next get is at the start of a line.
}
#endif

    if (c == EOF) {
      if (timedoutalready == TRUE) {
        // This is a belt-and-suspenders safety trick.  If I accidentally
        // fail to exit after returning EOF, the next read will force an exit.
        // (i.e. pascal-style!)
#ifdef DEBUG
        if (debugclient != NULL) {
          (void)fprintf(debugclient, "{EXIT!}");fflush(debugclient);
        }
#endif
        debug_exit(0);
      }
      timedoutalready = TRUE;
    }
    return c;
}


// [TO DO] I do not currently pass the virus name back as a string.  It might be
// an idea to put the name in an "X-Virus-Detected: Worm.SomeFool.P" header ...
// If I do this, I need to also run uvscan under popen() and parse the output.
int is_virus(char *fname)
{
  FILE *clam;
  char command[1024];  // this is rather hacky - I'm assuming here that the
  char result[1024];   // filenames are all under our control and won't be exceeded
  int virus_found = FALSE; // default is to fail safe.
  int rc;

// clamscan is taking 3 to 4 seconds to scan a trivial email.
// So if uvscan is installed, use it in preference, unless you
// have lots of CPU and are paranoid, in which case running *both*
// programs is definitely preferred.  The code below allows both
// to be active at the same time.  In fact with the _DEBUG options
// turned on, you can compare them for effectiveness just by looking
// at the files in /tmp

// UPDATE: looks like *both* virus scanners take at least 4 seconds to run.
// - this can't be right, I must be mis-timing something.  Running the
// commands externally under 'time' shows a fraction of a second.
// uvscan is still faster than clamscan, though I haven't fine-tuned the
// options of either yet.

#ifdef USE_UVSCAN
  sprintf(command, "/usr/local/bin/uvscan -u --mailbox --mime %s > /dev/null 2>&1", fname);
  rc = system(command);
#ifdef DEBUG_UVSCAN
{
if ((debugclient != NULL) && (debugserver != NULL)) {
(void)fprintf(debugclient, "UVSCAN: WEXITSTATUS(rc)=%d\n", WEXITSTATUS(rc)); // see "man 2 wait"
fflush(debugclient);
(void)fprintf(debugserver, "UVSCAN: WEXITSTATUS(rc)=%d\n", WEXITSTATUS(rc)); // see "man 2 wait"
fflush(debugclient);
}
}
#endif
  if (WEXITSTATUS(rc) == 13) {
    // 13 is the return code for 'virus found' - *EVERYTHING* else we treat as 'no virus' (fail safe mode)
#ifdef DEBUG_UVSCAN
      char *tmpbuff;
      tmpbuff = malloc(strlen(fname)*2+1024);
      if (tmpbuff == NULL) return(FALSE);
      sprintf(tmpbuff, "/bin/ln %s %s.NAI", fname, fname); // VIRUS DETECTED.  SAVE IT!
      system(tmpbuff);
      free(tmpbuff);
      return(TRUE);
      // Do not need to also scan with clam.  If you want to do both, remove the return and add the line below
//      virus_found = TRUE;
#endif
  }
  // otherwise if apparently not infected, and we are paranoid, also scan it with clamav...
#endif


#ifdef USE_CLAMAV
// WARNING !!!!!!!!!!!!  9 may 04
// I have just discovered that some corrupt mime formatting and other
// errors cause clamscan to send error messages to stderr even when
// --stdout is given.  If this text is sent back to the client, it will
// cause a protocol failure.  We MUST run this with something like
// "2>/dev/null" in sh or preferably "2>&1" in csh - however the shell
// invoked by system() is not always sh I believe.  I think it is whatever
// the user has selected, which makes this problematic.  Do I need to
// do something messy like:
//  "csh -c 'clamscan --mbox --stdout file 2>&1'" ???

// Executing: /bin/cat /tmp/file2ztNXc >> TEST.VIRUS
// Command: /usr/local/bin/clamscan --mbox --stdout /tmp/fileKYtsJ2
// LibClamAV Warning: Invalid content-type ' text' received, no subtype specified


// now that I know about WEXITSTATUS maybe I should be using it here
// instead of parsing the text output from clamscan... on the other
// hand, I would probably add that back in anyway for reporting purposes...


  sprintf(command, "/usr/local/bin/clamscan --stdout --mbox %s 2>/dev/null", fname);

                   // The absolute path needs to be moved to headers

  // return code from popen and system is not reliable - I'm
  // getting a value of 256 for scanning known virus files.
  // so... I'm using the hacky approach of looking at the return string.
  // this will fail if clamscan changes, however it *should* fail safe.

  // [TO DO] *** NOTE *** if the program is not present, the shell prints
  // an error message which comes out when the client is expecting to read
  // an SMTP result code.  If then assumes the send failed, and resends
  // later.  Can I fix it by appending 2>&1 to the command line?
  // (which is very shell dependent.  What if /bin/sh is not the default?)
  // Well... I've added 2>/dev/null and we'll see if that hurts...

  {
    // crude sanity check.  (but doesn't check that it is executable)
    FILE *check_executable = fopen("/usr/local/bin/clamscan", "r");
                                   // needs to be moved to headers
    if (check_executable == NULL) clam = NULL; else {
      fclose(check_executable);
      clam = popen(command, "r");
      // IMPORTANT!  clamscan --mbox *must* have a unix mbox-style "From " at the start
    }
  }
  if (clam != NULL) {
    int rc, c, lastc = -1;
    int suppress_output = (0==0);
    char *answer = result;
#ifdef DEBUG_CLAMAV
{
if ((debugclient != NULL) && (debugserver != NULL)) {
(void)fprintf(debugclient, "CLAMSCAN:\n");
fflush(debugclient);
(void)fprintf(debugserver, "CLAMSCAN:\n");
fflush(debugclient);
}
}
#endif
    for (;;) {
      // return info is on first line
      c = fgetc(clam);
      if (c == EOF) break;
#ifdef DEBUG_CLAMAV
{
if ((debugclient != NULL) && (debugserver != NULL)) {
(void)fprintf(debugclient, "%c", c);
fflush(debugclient);
(void)fprintf(debugserver, "%c", c);
fflush(debugserver);
}
}
#endif
      if (c == '\n') break;
      if (!suppress_output) *answer++ = c;
      if (c == ' ' && lastc ==  ':') suppress_output = (0!=0);
      lastc = c;
    }
    *answer = '\0';
#ifdef DEBUG_CLAMAV
{
if ((debugclient != NULL) && (debugserver != NULL)) {
(void)fprintf(debugclient, "DRAIN:\n");
fflush(debugclient);
(void)fprintf(debugserver, "DRAIN:\n");
fflush(debugclient);
}
}
#endif
    if (c != EOF) {  // This should help avoid a 'signal 13'...
      // drain
      for (;;) {
        c = fgetc(clam); if (c == EOF) break;
#ifdef DEBUG_CLAMAV
{
if ((debugclient != NULL) && (debugserver != NULL)) {
(void)fprintf(debugclient, "%c", c);
fflush(debugclient);
(void)fprintf(debugserver, "%c", c);
fflush(debugserver);
}
}
#endif
      }
    }
    rc = pclose(clam);
    fflush(stdout);
    fflush(stderr);
    if (strcmp(result, "OK") == 0) {
      // clean.
      // return(FALSE);
    } else if (strcmp(result+strlen(result)-strlen(" FOUND"), " FOUND") == 0) {
      // clean.
      // fprintf(stdout, "X-ClamAV: %s\n", result);
#ifdef DEBUG_CLAMAV
      char *tmpbuff;
      tmpbuff = malloc(strlen(fname)*2+1024);
      if (tmpbuff == NULL) return(FALSE);
      sprintf(tmpbuff, "/bin/ln %s %s.CLAMAV", fname, fname); // VIRUS DETECTED.  SAVE IT!
      system(tmpbuff);
      free(tmpbuff);
#endif
      virus_found = TRUE;
      // return(TRUE);
    }
    // return(FALSE);    
  }
#endif /* USE_CLAMAV */

  return(virus_found); // system failure, or no antivirus defined, so fail safe...
}


#ifdef USE_ASSASSIN
int assassin_spam(char *fname)
{
  FILE *commandfile;
  float score=0.0, thresh;
  char *tmpbuff;

  tmpbuff = malloc(strlen(fname)+1024);
  if (tmpbuff == NULL) return(FALSE);

// TO DO: NEED TO HANDLE stderr HERE SAME WAY AS FOR clamav
// also have now found the correct way to decode the return-code
// from system(), so don't really need to parse the text reply.

// HOWEVER!  Be careful:  invoking spamc from the command line
// with stdout redirected does seem to cause it to always
// return 0/0 - not obvious why from the man page...

  // Spam Assassin apparently takes an *exponential* time, not
  // a linear time, proportional to the file length.  The "-s"
  // parameter below limits the amount of data that spamc reads
  // and should guarantee a quicker return.

  // I have had about 1 process per day at home run for hours
  // and build up a contribution of 1.0 to the load average
  // from calls to spamc.  I'm hoping that the -s tweak will
  // stop that from happening, but if it continues I'll have
  // to kill spamd as a short-term measure and write a process
  // monitor hack as a medium term fix.  The long term fix is
  // of course to fix spamassassin, but reading the net appears
  // to show that spamd has had similar problems for over a
  // year without a definitive fix.

// spamfilter2 gtoal/spam# spamassassin -e -L < BAD1 > /dev/null ; echo $?
// 5
// spamfilter2 gtoal/spam# spamc -c < BAD1
// 13.0/5.0

  // ... we could fall back to spamassassin if spamc fails due to
  // spamd not running.  the -L flag means 'local tests only'

  sprintf(tmpbuff, "/usr/bin/spamc -s 18000 -c < %s", fname);
                    // needs to be moved to headers
  {
    FILE *check_assassin = fopen("/usr/bin/spamc", "r");
                                  // needs to be moved to headers
    if (check_assassin == NULL) commandfile = NULL; else {
      fclose(check_assassin);
      commandfile = popen(tmpbuff, "r");
    }
  }
  if (commandfile == NULL) return(FALSE);

  // fprintf(stderr, "Command: %s\n", tmpbuff);
  fscanf(commandfile, "%f/%f", &score, &thresh);
  {int c; for (;;) {c = fgetc(commandfile); if (c == EOF) break; }} // Drain to avoid signal 13
  pclose(commandfile);
  // fprintf(stderr, "Score: %f   Thresh: %f\n", score, thresh);
                  /* Note:  0/0 is not spam - fail safe ... */
  thresh=7.9;
  return (score > thresh);
                  /* Can't find command-line place to tweak thresh,
                     and don't want to rely on config file,
                     so hard-code it??? */
}
#endif /* USE_ASSASSIN */


int spam(char *fname)
{
// needs to be moved to headers
#if PORT == 999
// Tweak for Kurt.  TEMPORARY.
#define SPAMPROBE_COMMAND "/tmp/spamprobe/bin/spamprobe -d /tmp/spamprobe/share/spamprobe -8 score %s"
#define SPAMPROBE_TRAIN_COMMAND "/tmp/spamprobe/bin/spamprobe -d /tmp/spamprobe/share/spamprobe -8 train-spam %s"
#else
#define SPAMPROBE_COMMAND "/usr/local/bin/spamprobe -d /usr/local/share/spamprobe score %s"
#define SPAMPROBE_TRAIN_COMMAND "/usr/local/bin/spamprobe -d /usr/local/share/spamprobe train-spam %s"
#endif
  // This invokes spamprobe and makes a yes/no decision about whether it is spam or not
  // (ie return TRUE if it is definitely spam.)
  int c;
  int spamcode;
  char *tmpbuff;
  FILE *commandfile;

  tmpbuff = malloc(strlen(fname)+1024 /* strlen(SPAM_COMMAND) ? */);
  if (tmpbuff == NULL) return(FALSE);

  // File has been written and is already closed.
  // Note: the file is either empty and consists of just "."
  // or contains text followed by \n and a single "." and EOF.

  // (a later bugfix will remove this trailing "." I promise,
  // but for now it doesn't really mess anything up)

  /* First we try spamassassin.  If it says that this is spam,
     we feed the file to "spamprobe train-spam" ... - this is
     the self-training part of the system */

#ifdef USE_ASSASSIN
  if (assassin_spam(fname)) {
    // Several things to do here:
    // 1) LOG IT.
#ifdef DEBUG_ASSASSIN
    sprintf(tmpbuff, "/bin/ln %s %s.ASSASSIN", fname, fname);
    system(tmpbuff);
#endif
    // 2) feed mail to spamprobe train-spam
    /* NOTE: currently using spamassassin's built-in threshhold, *and*
             the whole mail body, plus any other tests SA might give us,
             such as DNSBL tests.  Worse, it might even be doing a bayesian
             test :-/  --- so, the moral of this story is 'check the tools'
         (The original plan was to bump the threshhold way up and only check
         the headers, as a really fast test.  Got forgotten about somewhere
         along the line...)
     */
    sprintf(tmpbuff, SPAMPROBE_TRAIN_COMMAND, fname);
    commandfile = popen(tmpbuff, "r");
    if (commandfile == NULL) return(FALSE);
    for (;;) {
      c = fgetc(commandfile); // Again, drain to avoid signal 13.
      if (c == EOF) break;
    };
    // don't actually care about output from the train command. forget 'c'
    pclose(commandfile);
//    c = fgetc(commandfile); pclose(commandfile);
  }
#endif

  // The external program may if it wishes reinforce the spam
  // parameters with this data, as well as rating it.  However
  // if it does, it should only make small changes because we
  // do not want to reinforce an error so strongly that any more
  // similar mails are wrongly classified the same way.

  // Originally I ran 'spamprobe score' then 'spamprobe good' or
  // 'spamprobe spam', but I later discovered that 'spamprobe receive'
  // did the job for me.  Actually an even later release has added
  // 'spamprobe train' which only adds the data if it wouldn't have
  // been correctly classified.  This should keep the size of the
  // database down.  Also Brian suggested using PBL instead of BerkeleyDB

  // (Talking of which, at the time of writing, I haven't done anything
  // to trim the database.  That's something that's external to this
  // program, but needs to be documented (and done in cron) )

  // Later note: the PBL database is *much* better than the BerkeleyDB
  // one for both size and robustness.  I had to retrain from scratch
  // after a corruption in my .db file.

  // NOTA BENE! keep *all* your spam and ham after classification
  // and manual sorting.  That way you *can* retrain completely.
  // I had made the mistake of not keeping my spamtrap address emails :-(

  // Question: is it the case that I currently only train-spam and
  // never train-good?  If so, need to rethink that.  What I'm doing
  // now *does* seem to work but it worries me!  One thing I could
  // easily add (well, maybe not so easily) is to whitewash *all*
  // *outgoing* email.  The intention being to help customise ham to
  // the local site, and pre-bias the Bayesian code for any legitimate
  // replies.  Hopefully our users won't be sending out spam.  (BUT
  // what do we do for accounts with mail forwarding set up, who *are*
  // sending out spam?  Can we check the headers to see if it was
  // already filtered on the way in, and ignore it?  Count the Received
  // lines perhaps?)

  sprintf(tmpbuff, SPAMPROBE_COMMAND, fname);
  commandfile = popen(tmpbuff, "r");
  if (commandfile == NULL) return(FALSE);
  c = fgetc(commandfile);
  if (c != EOF) {
    int c;  // HIDE OUTER C!
    for (;;) {
      c = fgetc(commandfile);
      if (c == EOF) break;
    }
  }
  pclose(commandfile);
//  c = fgetc(commandfile); pclose(commandfile);

  spamcode = (c == 'S');  // Real cheap-assed test!  Spamprobe's output is "SPAM ..."

  free(tmpbuff);
  return(spamcode);
}

void this_is_thirdparty_spam(char *fname)
{
  // [TO DO!] - feed this spam to the filter to reinforce the
  // badness scores.  We definitely got this one from someone
  // trying to third-party through us.

  // An idea for third-party relay spam would be to tweak the destination
  // in the RCPT TO command and redirect it to a spamtrap. (rather
  // than training with it here)

  // Currently we just bounce it with a reject message in response
  // to the DATA comand, and send an RSET to the server, without ever
  // reading the body.

  // There are several dummy receive-only mailers on the net that
  // we could call or redirect to but at this point it's probably easier
  // just to do it all ourselves in the DATA command.
}

static int makeconnection(
  char *host,				/* host name to connect to */
  int *outfile,				/* smtp output channel */
  int *infile 				/* smtp input channel */
);

static jmp_buf RDTimeout;
static void dead_mans_timer(int sig)
{
  longjmp(RDTimeout, 1);
}


#define MAX_STRING 256
int main(int argc, char **argv)
{
//static int from_client, from_server, to_client, to_server; /* streams */
  static char comm[4], code[4]; /* command, and return code */
  char connecting_host[MAX_STRING+1]; /* [TO DO] Check for buffer overflow */

  // See comment in "RCPT TO" code: these strings may need to be
  // arrays, in order to accomodate multiple recipients.
  // As long as we pass each one through transparently, this
  // code should still work; the only problem is one that I
  // think sendmail has suffered from in the past too, which is
  // that you can send a third party spam by sending multiple
  // rcpt to's, with the first (and maybe in our case the last)
  // being local users, but the rest being off campus.  This
  // code needs to allow through the local ones but not the remote
  // ones - while allowing the mail to go through.  This is different
  // to what it does for a single recipient.  "TO DO"...

  // PostScript: it would appear that our VMS mailer at least
  // "does the right thing".  It accepts the valid local destination
  // users and rejects the remote ones, while accepting the mail.

  // The problem here is not coding, it's knowing what to do.  I have
  // seen mail sent to my server which was legitimate mail for multiple
  // recipients, only one of which was on my host.  This is the awkward
  // case which I need to decide if it is worth handling.

/* Something I just read on the net... may be an idea here I can steal...
     MessageWall allows administrators to customize filtering based on
     the recipient of a mail. This presents a logical problem when we
     have multiple recipients with different mail filtering settings:
     do we accept mail to all of them when some would block it, reject
     it to all when some would accept it, or silently drop the recipients
     that would block it without a bounce? None of these are acceptable
     solutions, so MessageWall takes another route: when a host that
     isn't authenticated or in relay_ips attempts to send to multiple
     recipients, MessageWall returns temporary errors for all recipients
     after the first. This causes all sane MTAs to retry in a few minutes
     (sendmail, postfix, exim, Exchange 5.5+). qmail never sends to more
     than one recipient at a time. However, some MTAs (notably IMail and
     MS Exchange 5.0) bounce the mail on such temporary errors. While
     MessageWall is in RFC violation on this point (and this is the only
     known RFC violation in the program), there is no other solution to
     this problem

     (Code-share with greylisting?)
 */

  char domain[MAX_STRING+1], username[MAX_STRING+1];

  int caller_is_local, domain_is_local; // third-party tests
  int suppress_returncode; // if true, reply from real server is dropped
  int this_is_definitely_spam = FALSE; // currently used for spamtrap only
  int thirdparty = FALSE; // set in "rcpt to" command.
  int i, c, rc;

#ifdef DEBUG
setup_debug();
#endif

  // Initially this code used stdin and stdout for communication
  // with the client, and fdopen'ed FILE*'s for communication with
  // the server.  While debugging a nasty problem (turned out to
  // be a firewall bug - the code here was fine) I had tried both
  // unbuffering stdin & stdout, and converting from FILE* to fd.

  // Rather than revert to the earlier FILE*-based code, I am leaving
  // both of these changes in place as they should make any tweaks
  // to the I/O in future easier.  But if it gives a problem for
  // you, you can delete the setvbuf stuff below entirely with
  // no worries about side-effects.  SMTP is line-buffered anyway.

  // Using fd's instead of FILEs *should* make it easier to set up
  // a fake sender's IP in the packet when I later migrate this code
  // into implementing a relay under a transparent bridge.  (Be careful
  // to make that code a parameter for people who can't hook it up that way)

  setvbuf(stdin,  NULL, _IONBF, BUFSIZ);
  setvbuf(stdout, NULL, _IONBF, BUFSIZ);
  setvbuf(stderr, NULL, _IONBF, BUFSIZ);

  // I could hard-wire these numbers (ie '0') but this is probably safer...
  from_client = fileno(stdin); // fd of stdin
  from_server = fileno(stdout); // fd of stdout

  username[0] = domain[0] = '\0';  // Initialisation.

  { // Determine IP of caller...
    // I *really* do NOT want the name, just the IP address.
    // - I hope that's what this code does...
    // If anyone wants to help me improve this code I'd
    // be very greatful!
    struct sockaddr_storage name;
    int namelen = sizeof(name);
    struct in_addr addr;

    strcpy(connecting_host, "0.0.0.0"); /* Default is an invalid address */
    // test a friendly address by telneting to localhost
    // test a hostile address by running this code from the command-line

    memset(&name, 0, sizeof(name));

    if (getpeername(from_client, (struct sockaddr *)&name, &namelen) >= 0) {
      addr = ((struct sockaddr_in *)&name)->sin_addr;
      // Had some portability issues with the IPV6 code.  Removed it.
//    if (name.ss_family == AF_INET) {
        (void) inet_ntop(AF_INET, &addr, connecting_host, MAX_STRING);
//    }
    }
    connecting_host[MAX_STRING-1] = '\0'; /* just in case */
  }

if ((debugclient != NULL) && (debugserver != NULL)) {
nowtime = time(NULL); deltatime = nowtime-starttime;
(void)fprintf(debugclient, "[t=%d] *** Connecting IP = %s ***\n", deltatime, connecting_host);fflush(debugclient);
(void)fprintf(debugserver, "[t=%d] *** Connecting IP = %s ***\n", deltatime, connecting_host);fflush(debugserver);
}

  // If the connection is from an IP that is in a blacklist
  // we could look at it at this point and reject the connection
  // without ever opening a connection to the real smtp server.
  // in fact all the ip-based tricks can be done at this point.

  // *However* note comments above about unreliability of blacklists

  // Might be better to set the 'tarpit/teergrube' flag (or do a
  // temporary rejection? - see the reference above to greylisting)

  caller_is_local = FALSE;
  {
    char *s, *t;
    s = strdup(LOCALIPS); // Ensure writable
    for (;;) {
      t = strchr(s, ':'); if (t != NULL) *t = '\0';
      if (strncmp(connecting_host, s, strlen(s)) == 0) {
        caller_is_local = TRUE;  // Note this for later.
        break;
      }
      if (t != NULL) *t = ':'; else /* last item */ break;
      s = t+1; /* next string to compare */
    }
  }

  // Currently the model is that each real mail server (MX record)
  // is front-ended by a separate instance of this code.  We *could*
  // tweak the RELAY_HOST code below to back off to the backup MX
  // hosts if our own relay_host is unreachable.  But for now that's
  // not how it works - it's a 1:1 correspondence between spam filters
  // and actual mail hosts.

  rc = makeconnection(RELAY_HOST, &to_server, &from_server);

  if (rc != EX_OK) {
    // Should back off to next server.  For now just exit.
    put(to_client, "421 Remote site down - forwarding service not available.\n");
    debug_exit(0);
  }


  if (setjmp(RDTimeout) != 0) {
    // Dead-man's switch.  This should suicide an smtpfilter process which
    // has run away with the CPU.  Set it long enough that it doesn't
    // interfere with real connections, and enable/disable it only around
    // code areas where we may be seeing the problem.  If we suicide during
    // a pipe connection to spamprobe it may be left running.  Also the
    // sendmail we're talking to...

    // WARNING: first real bug found in deployment... when receiving a
    // really long mail (eg 4mb, 12mb etc) - after it has been received
    // by this code, and is being sent to the other end, then this timeout
    // is invoked.  Solution: reset the alarm call during the send to the
    // real server.  (Previously it was only reset immediately before reads)
 
    debug_exit(0);
  }

  (void) signal(SIGALRM, dtimer);
  (void) alarm((unsigned)DeadMansTimeout);
  {
    int last = FALSE, first = TRUE; i = 0;
    for (;;) { // transparently pass through the initial welcome banner
               // which looks like a command response
      c = get(from_server, ReadTimeout);
      if (c == '\r') { i = 0 /* belt and suspenders */; continue;}
      if (c == EOF) debug_exit(1);  /* And tweak it so we know we're active. */
      if (c == '\n') {
        if (first == TRUE) put(to_client, " +SPAMFILTER %s\n", VERSION);
        else put(to_client, "\n"); // OOPS - forgot the \n in multiline responses!
        // (Note: put inserts the \r before the \n ...)
        if (last == TRUE) break;
        first = FALSE;
        i = 0;
      } else {
        put(to_client, "%c", c);
        i += 1;
        if ((i == 4) && (c != '-')) last = TRUE;
      }
    }
  }
  (void) alarm((unsigned)0);  // Cancel the alarm call

  this_is_definitely_spam = FALSE; // currently per-session...
  // If *any* mail is flagged as definitely spam during the session,
  // all subsequent mails will be too.  This *may* not be what is
  // wanted.  However for now, it's only a hack in order to
  // intercept spamtrap@local.domain mails.  And it is ifdef'd where
  // it is used.  Needs to be cleaned up a LOT before being used
  // in any deployment.

  /*
     OK, this is a problem :-(  When receiving a batch of mail
     from a relay (such as a backup MX server) and there is one
     definite spam in a batch, it taints the rest of the batch.

     I need to reset the flag (this_is_definitely_spam = FALSE;)
     on either a RSET or perhaps after the end of a DATA command.
  */

  for (;;) { // Loop on each command from sender
    i = 0;
    suppress_returncode = FALSE;  // Sometimes when we steal a command, we
                                  // don't want to pass on the real return code

    // get first four non-space chars into a buffer to examine the
    // command.  We do *not* buffer the entire line, because this
    // way we can avoid buffer overflow errors.  Everything after
    // those first 4 chars is copied verbatim.

    // We use a non-standard timeout while waiting for a command.
    // We use the regular timeout in all other places (at the moment)

    (void) signal(SIGALRM, dtimer);
    (void) alarm((unsigned)DeadMansTimeout);
    c = get(from_client, CmdTimeout);
    for (;;) {
      if (!((c == '\r') || (c == ' ') || (c == '\t'))) {
        if (c == EOF) debug_exit(1);
        if (isalpha(c) && isupper(c)) c = tolower(c); // for strncmp later
        comm[i++] = c;
        if (c == '\n') break;
        if (i == 4) break;
      }
      // Once we've started reading a command, we make the timeout even shorter
      c = get(from_client, ShortTimeout);
    }
    if (c == '\n') {
      // Command is too short.  Probably an error.  So let the real server handle it.
      int j; for (j = 0; j < i; j++) put(to_server, "%c", comm[j]);
      // and now read reply...
    } else {
      // We have a 4-char command.  If it is "data", handle it specially,
      // otherwise just send the 4 chars plus any following text.
      // This avoids buffer overflows.  Most other commands are transparent
      // - for now.  Later we might intercept RCPT TO: commands which
      // the server rejects as an invalid user, so that we can pass it
      // to the spam filter for training data (a honeypot function)
      // if we do, that won't be done here but in the reply gathering code
      // The drawback of this would be that legitimate but misaddressed
      // mail will not cause the sender to receive any kind of error.
      // For the moment we just monitor the "rcpt to" command to extract
      // the target username and domain for the purposes of third-party
      // relay detection - but that's not really good enough.

      // We could also intercept 'rcpt to' in order to do external redirection
      // of mail - but that's an opportunistic local hack that is not going
      // to be included in any version of code that's released to the public!
      // (It's in order to help us consolidate half a dozen locally managed
      //  departmental servers under one centralized server)

      // (other commands: HELO/EHLO - tweak the return string just for
      //   diagnostic purposes, to add the callers IP and reverse DNS???
      //   EHLO may be problematic because the extended commands which
      //   are enabled may break the clean request/response line format)

      if (strncmp(comm, "data", 4) == 0) {
        FILE *spamfile;
        char *spamname = tmpnam(NULL); // or mktemp("/tmp/smtp-XXXXXXXX.txt");
        // assumption here is that no users log on to this system so
        // the 'unsafe' calls tmpnam() and mktemp() are really not an issue...
        int state;
        for (;;) {
          // drain rest of line.  Shouldn't be any text...
          c = get(from_client, ReadTimeout);
          if (c == '\r') continue;
          if (c == EOF) debug_exit(1);
          if (c == '\n') break;
        }
        // fake this because we don't want to send the 'data' command
        // until we know for sure we're passing this mail to the server
        // (because once we issue the DATA command, we're committed -
        //  there's no 'abort' command - the final "." terminates the
        // mail... We could just exit this code but just in case a timeout
        // causes any pending data to be delivered, we won't even let it
        // get that far...)
        put(to_client, "354 Enter mail, end with \".\" on a line by itself\n");
       
        // buffer all the data before sending.  If it's spam, we have the
        // option (not yet implemented) of issuing a RSET rather than the
        // DATA command, and never sending the spam to the real mailer
        // in the first place.  However at this point in development we
        // are nowhere near confident enough to reject mail outright.
  
        spamfile = fopen(spamname, "w");
        if (spamfile == NULL) debug_exit(1); // more graceful failover needed?
                    // At a minimum we need error logging such as to syslog...


        // It would appear that for clamscan --mbox to work, the mail box *must*
        // have a unix-style 'From ' header as the first line.  So I am adding it
        // here, and I *****MUST***** remove it when we read the file back to
        // send to to the destination MTA.

        fprintf(spamfile, "From MAILER-DAEMON  %s", ctime(&starttime) /* ctime includes \n */ );

        /* Small state machine to track the sending of mail body.  Try to avoid writing
           the final "." to the output file... */

#define STARTLINE 1
#define DOTSTART 2
#define DOTTED 3
#define INLINE 4

        state = STARTLINE;
#ifdef DEBUG
        { int Timeout = ShortTimeout; // debugging. change back to Read
#else
        { int Timeout = ReadTimeout;
#endif

        // This wasn't relevant until I added anomy sanitizer, but what
        // I should be doing is removing the initial "." on any line,
        // because the SMTP protocol will have doubled any leading dots.
        // Doing so has the lucky side effect of removing the trailing
        // dot from the file without requiring special handling.  We
        // must remember to add them all back in when we send the body
        // on to the final smtp server.

        for (;;) {
          int lastc = c;
          do {c = get(from_client, Timeout);} while (c == '\r');

          // (The firewall bug I mentioned earlier manifested itself as the get
          // above never returning with the last "." of the data send.)

          // there is a possibility of a second bug here if the sender is not
          // sending \r\n but is only sending \r - we need some kind
          // of special case hack code after a timeout following a \r
          // - and maybe the timeout period should be shorter for the
          // character following a \r?

          if (c == EOF) {
              fflush(spamfile); fclose(spamfile);
#ifdef DEBUG
#else
                                                  remove(spamname);
#endif
              debug_exit(1); // broken connection on input => drop it!
          }

          if ((c == '.') && (state == STARTLINE)) state = DOTSTART;
          else if ((state == DOTSTART) && (c == '\n')) state = DOTTED;
          else if (c == '\n') state = STARTLINE;
          else if (c == '\r') /* do nothing */;
          else state = INLINE;
          if (state == DOTTED) break;
          if (state != DOTSTART) fputc(c, spamfile);

          // if STARTLINE we might consider sending a NOOP every X lines
          // or so just to keep the other end alive while we're collecting
          // the mail text? [TO DO!].  If we do, drain the reply silently.
          // (actually we can use the clock function now to keep them to
          //  a minimum)
        }
        }
        fflush(spamfile); fclose(spamfile);
        (void) alarm((unsigned)0);  // Cancel the alarm call

        (void) signal(SIGALRM, dtimer);  // reset before invoking spamprobe
        (void) alarm((unsigned)DeadMansTimeout);
        // determine spamscore of spamfile!
#ifdef DEBUG_SPAMTRAP
        if (this_is_definitely_spam) {
          int c;
          FILE *commandfile;
          char tmpbuff[1024]; // still hacky.  temp code.
          sprintf(tmpbuff, "/bin/ln %s %s.SPAMTRAP", spamname, spamname);
          system(tmpbuff);
#ifdef NEVER

  // TEMPORARILY DISABLING spamtrap FEATURE IN CASE OF POISONING,
  // UNTIL QUALITY OF SPAMTRAP DATA IS ASSESSED.

          sprintf(tmpbuff, SPAMPROBE_TRAIN_COMMAND, spamname);
          commandfile = popen(tmpbuff, "r"); // use alarm just in case?
          if (commandfile != NULL) {
            for (;;) {
              c = fgetc(commandfile); if (c == EOF) break;
            }
            pclose(commandfile);
          }
#endif
        }
#endif





  // [TO DO] - only invoke this on files whose size is within a certain range?

        if (is_virus(spamname)) {
          FILE *commandfile;
          char *tmpbuff;
          tmpbuff = malloc(strlen(spamname)*2+1024);
          if (tmpbuff == NULL) {
            // [TO DO]: better error appropriate number...
            put(to_client, "451 Connection aborted - spamfilter error\n");
            debug_exit(1);
          }

          // 1) IF WE PASS THE VIRUS THROUGH AS SPAM:
          //    as well as feeding viruses back to the training (which helps catch 0-day viruses)
          //    I would also like to add an X-Virus header of some sort, preferably with the name
          //    and maybe which of the AVs caught it.
          // 2) PREFERABLY:
          //    Log the virus and delete it.  Do not pass it on to the user.  Either silently
          //    drop it on the floor, or send an error back to the sender.  Currently working on the latter.

          sprintf(tmpbuff, SPAMPROBE_TRAIN_COMMAND, spamname); // train with it anyway.
          commandfile = popen(tmpbuff, "r");
          if (commandfile != NULL) {
            int c;
            for (;;) {
              c = fgetc(commandfile); // Again, drain to avoid signal 13.
              if (c == EOF) break;
            };
            // don't actually care about output from the train command. forget 'c'
            pclose(commandfile);
          }
          // Testing the 'kill at server' code here!
          put(to_server, "RSET\n");
          // We should expect a 2XX code, which is fine!
          // It is UNTESTED whether this code correctly drops through to
          // read a reply code and deal with it appropriately - testing NOW.

          // OUCH!  Nasty C programming bug:  forgot the "else" in
          // the statement below.  Still compiled OK, just didn't do
          // what I wanted it to do :-(

        } else if (!thirdparty && (this_is_definitely_spam || spam(spamname))) {  // Thirdparty was determined earlier by the IP address
                            // to a spamtrap?           sa+sp+av?
          // The code below was originally written assuming that we wanted
          // to delete the spam here before it even got to the real mailer.
          // However we decided it was safer to insert a tagged header.

          // The 'spam' command will already have used the bad mail to
          // reinforce the 'bad' spamscore if we want to allow that.
          // Hopefully with a low reinforcement weighting in case of error.

#ifdef KILL_AT_SERVER
#error DO NOT ENABLE THIS FLAG.  THE CODE IS UNTESTED.
          // We could fake a reply ourselves but why bother...
          put(to_server, "RSET\n");
          // We should expect a 2XX code, which is fine!
          // It is UNTESTED whether this code correctly drops through to
          // read a reply code and deal with it appropriately.
#else


          // Here we can sanitize the stream if we wish:
#ifdef USE_ANOMY
          {
          char *command = malloc(strlen(spamname)+1024);
          if (command != NULL) {
            sprintf(command, "/usr/local/bin/sanitize < %s 2>/dev/null", spamname);
            spamfile = popen(command, "r");
// ********* BUG!  What if it fails?!!! - currently, mail will be rejected.  OK but not ideal.
//  Two suggestions:
//   1) fopen the executable first, to confirm it is installed
//   2) fall back to reading from raw file if fopen fails.
//      - only care necessary is to fclose rather than pclose,
//        so suggestion is to "if (USE_ANOMY) ..." rather than "#ifdef USE_ANOMY ..."
//
          } else spamfile = NULL;
          }
#else
          spamfile = fopen(spamname, "r");
#endif


          if (spamfile == NULL) {
            // [TO DO]: better error appropriate number...
            put(to_client, "451 Connection aborted - spamfilter error\n");
            debug_exit(1);
          }
          // We actually send the DATA command, and have to
          // drain and discard the 354 reply which we faked earlier...
          (void) alarm((unsigned)0);  // Cancel the alarm call

          (void) signal(SIGALRM, dtimer);  // reset before invoking spamprobe
          (void) alarm((unsigned)DeadMansTimeout);
          // determine spamscore of spamfile!
          put(to_server, "DATA\n");
          for (;;) {
            c = get(from_server, ReadTimeout);
            if (c == '\r') continue;
            if (c == EOF) {
              // [TO DO]: check STMP spec for appropriate error number...
              put(to_client, "451 Connection aborted\n");
              debug_exit(1);
            }
            if (c == '\n') break;
          }
          // Send a generated 'Received:' line to tag this as spam.
          // Received is my personal choice of where to insert the tag
          // because it's unobtrusive.

          //  We now have all the info - sender's IP, target user@domain, date/time

          put(to_server, "Received: from %s ([%s]) by %s", /* add "; Date" as well??? */
                  connecting_host, connecting_host, SPAM_TAG);
          // first %s above should be HELO name, and the
          // ([]) should have the DNS lookup of the IP between '(' and '['
          // however I have not yet done that.
          put(to_server, " for <%s@%s>",
                  username, domain);
          {                // testing
          struct tm *ptr;
          time_t tm;
          char str[60];

          tm = time(NULL);
          ptr = localtime(&tm);
          strftime(str ,100 , "%a, %d %b %Y %H:%M:%S %Z",ptr);
          put(to_server, "; %s", str);
          }

          put(to_server, "\n");

#ifdef NEVER
////////////////////////////////////////////////// OUT!!!!
          {int i = 0;
           int first_line = TRUE;
           int start_line = TRUE;
           int in_header = TRUE;
           int lastc = EOF;
           for (;;) {
            lastc = c;
            c = fgetc(spamfile);
            if (c == EOF) break;
            i += 1;
            if (i == 10000) {
              i = 0;
              (void) alarm((unsigned)DeadMansTimeout);
            }
            if ((c == '\n') && (lastc == '\n') && in_header) { // first blank line
              in_header = FALSE;
#ifdef CUSTOMHEADER
              put(to_server, CUSTOMHEADER, SPAM_TAG);
#endif

            }
            // THE FIRST LINE IS THE UNIX MBOX "From " LINE WHICH WAS ADDED ABOVE
            // AND MUST NOW BE REMOVED HERE.
            if (!first_line) {
              if (start_line && (c == '.')) put(to_server, ".."); else put(to_server, "%c", c);
            }
            if (c == '\n') { first_line = FALSE; start_line = TRUE;} else start_line = FALSE;
           }
           if (lastc != '\n') put(to_server, "\n");
           put(to_server, ".\n");
          }
#else
////////////////////////////////////////////// IN !!!
#define HEADER_START 0
#define HEADER_COPY_LINE 1
#define HEADER_CONDITIONALLY_COPY_CONTINUATION 2
#define HEADER_DISCARD 3
#define HEADER_CONDITIONALLY_DISCARD_CONTINUATION 4
#define HEADER_DONE 99

// These are all the fields which Outlook looks at to decide how
// to display priority :-/
#define TEST_FIELD_1 "X-Priority:"
#define TEST_FIELD_2 "Priority:"
#define TEST_FIELD_3 "X-MSMail-Priority:"
#define TEST_FIELD_4 "Importance:"

                     // longest field
#define MAX_TEST_LEN strlen(TEST_FIELD_3)
{
  static char field[255]; // longer than longest field
  int c = EOF, next = 0, lastc;

            // THE FIRST LINE IS THE UNIX MBOX "From " LINE WHICH WAS ADDED ABOVE
            // AND MUST NOW BE REMOVED HERE.
  int state = HEADER_DISCARD;

  int i = 0;
//  int first_line = TRUE;
//  int start_line = TRUE;
//  int in_header = TRUE;

#ifdef CUSTOMHEADER
  // for now, put back in at start.  may tidy up later
  put(to_server, CUSTOMHEADER, SPAM_TAG);
#endif
  for (;;) {
    lastc = c;
    c = fgetc(spamfile);
    if (c == EOF) break;
    i += 1;
    if (i == 10000) {
      i = 0;
      (void) alarm((unsigned)DeadMansTimeout);
    }

RESTART_STATE_MACHINE:
    if (c == '\n' && lastc == '\n') state = HEADER_DONE;
    if (state == HEADER_START) {
      // gather field name
      field[next++] = c; field[next] = '\0';
      //fprintf(stderr, "Compare '%s' vs '%s'\n", TEST_FIELD, field);
      if (  (strcasecmp(field, TEST_FIELD_1) == 0)
         || (strcasecmp(field, TEST_FIELD_2) == 0)
         || (strcasecmp(field, TEST_FIELD_3) == 0)
         || (strcasecmp(field, TEST_FIELD_4) == 0)) {
        state = HEADER_DISCARD;
        next = 0;
        //fprintf(stderr, "Found and discarding!\n");
      } else if (c == '\n') {
        put(to_server, "%s", field);
        state == HEADER_CONDITIONALLY_COPY_CONTINUATION;
        next = 0;
      } else if (next >= MAX_TEST_LEN) {
        put(to_server, "%s", field);
        state = HEADER_COPY_LINE;
        next = 0;
      } // else still gathering, no changes
// ************ BUG!  If it is a short header and followed by newline...?
//   FOR EXAMPLE  "Cc: me\nX-Priority: ..." would not recognise the X-Priority
//   (but it is a benign bug because it won't mangle the mail, it will just
//    mean that it is not filtered correctly)
// should flush and reset the string on a newline and a colon.

/*

This mail did not delete the previous X-Priority: field.  Could it
be affected by this bug?

Received: from pizzabox.gtoal.com (localhost [127.0.0.1])
	by gtoal.com (8.12.10/8.12.10/SuSE Linux 0.7) with ESMTP id i7HBhghE019473
	for <gtoal@testappl.panam.edu>; Tue, 17 Aug 2004 06:43:42 -0500
Received: (from gtoal@localhost)
	by pizzabox.gtoal.com (8.12.10/8.12.10/Submit) id i7HBhgOW019472
	for gtoal@testappl.panam.edu; Tue, 17 Aug 2004 06:43:42 -0500
Received: from 4671.uemlpromos.com (localhost [127.0.0.1])
	by gtoal.com (8.12.10/8.12.10/SuSE Linux 0.7) with ESMTP id i7HBhchE019458
	for <gtoal@vt.com>; Tue, 17 Aug 2004 06:43:38 -0500
Message-Id: <200408171143.i7HBhchE019458@gtoal.com>
Received: from 209.236.50.67 ([209.236.50.67]) by spamfilter for <gtoal@vt.com>
Content-Type: text/html; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: Free Sony Camcorder
From: "Camcorder Connection"<3157@4671.uemlpromos.com>
To: gtoal@vt.com
X-Priority: 3
Date: Tue, 17 Aug 2004 11:28:11


 */
    } else if (state == HEADER_COPY_LINE) {
      put(to_server, "%c", c);
      if (c == '\n') state = HEADER_CONDITIONALLY_COPY_CONTINUATION;
    } else if (state == HEADER_CONDITIONALLY_COPY_CONTINUATION) {
      if (c == '\t' || c == ' ') {
        put(to_server, "%c", c);
        state = HEADER_COPY_LINE;
        //fprintf(stderr, "CONTINUATION FOUND!\n");
      } else {
        state = HEADER_START;
        goto RESTART_STATE_MACHINE;
      }
    } else if (state == HEADER_DISCARD) {
      if (c == '\n') state = HEADER_CONDITIONALLY_DISCARD_CONTINUATION;
    } else if (state == HEADER_CONDITIONALLY_DISCARD_CONTINUATION) {
      if (c == '\t' || c == ' ') {
        state = HEADER_DISCARD;
        //fprintf(stderr, "CONTINUATION *DISCARD* FOUND!\n");
      } else {
        state = HEADER_START;
        goto RESTART_STATE_MACHINE;
      }
    } else {
      // BODY - quote any leading dots.
      if ((lastc == '\n') && (c == '.')) put(to_server, ".."); else put(to_server, "%c", c);
    }
  }
  // Flush
  if (lastc != '\n') put(to_server, "\n");
  put(to_server, ".\n");
  fflush(spamfile);
}
#endif

#ifdef USE_ANOMY
          pclose(spamfile);
#else
          fclose(spamfile);
#endif
          // Drop through to reading reply code from server
#endif  /* passed on to server */

        } else if (thirdparty) {

          // [TO DO] Log it to /tmp/thirdparty ??

          // This is a 3rd-party spam so DO NOT SEND IT to the real server...
          // We could fake a reply ourselves but why bother...
          put(to_server, "RSET\n"); // Generate error reply to spammer's DATA command.
          // We should expect a 2XX code, which is fine!
          this_is_thirdparty_spam(spamname);  // [TO DO] this procedure is empty for now
          // but we really should accept the text of the spam and feed it to the bayesian\
          // system as training data.  Either write code to read it; or tweak the 'rcpt to'
          // address and send it to a spamtrap account instead of the intended recipient.
          // (not the best solution though.  Accepting it here would be neater, although
          //  more programming effort)

          // Should we pretend to accept it, just to waste the spammers time?
          // Maybe also teergrube them?
        } else {
          // *** NOT SPAM - deliver it! ***
          spamfile = fopen(spamname, "r");
          if (spamfile == NULL) {
            // TO DO: better error appropriate number...
            put(to_client, "451 Connection aborted\n");
            debug_exit(1);
          }
          // We actually send the DATA command, and have to
          // drain and discard the 354 reply which we faked earlier...
          put(to_server, "DATA\n");
          for (;;) {
            c = get(from_server, ReadTimeout);
            if (c == '\r') continue;
            if (c == EOF) {
              // TO DO: better error appropriate number...
              put(to_client, "451 Connection aborted\n");
              debug_exit(1);
            }
            if (c == '\n') break;
          }
          // tag it as good... not strictly needed but it will help us
          // differentiate between mail that went through the filter
          // and mail which bypassed it, eg if the firewall redirect
          // was not in place, or a backup MX was used and wasn't also
          // filtered somewhere (which would be a misconfiguration).

          put(to_server, "Received: from %s ([%s]) by %s", /* add "; Date" as well??? */
                  connecting_host, connecting_host, GOOD_TAG);
          // first %s above should be HELO name, and the
          // ([]) should have the DNS lookup of the IP between '(' and '['
          // however I have not yet done that.

          put(to_server, " for <%s@%s>",
                  username, domain);
          {                // testing
          struct tm *ptr;
          time_t tm;
          char str[60];

          tm = time(NULL);
          ptr = localtime(&tm);
          strftime(str ,100 , "%a, %d %b %Y %H:%M:%S %Z",ptr);
          put(to_server, "; %s", str);
          }

          put(to_server, "\n");


#ifdef CUSTOMHEADER
          // OK, this is a hack.  We've just now decided at UTPA
          // not to put in any headers in good mail, and to spam filter
          // on the *presence* of the X-UTPA-Spam header rather than
          // the *contents* of it.  This will allow us to change it
          // later from the simple text "utpaspam" to a descriptive tag.

          // I will generalise this later, but I'm under the gun
          // right now to deploy this code today...
          if ((PORT < 1000) || (PORT > 1004)) put(to_server, CUSTOMHEADER, GOOD_TAG);
#endif
          {int i = 0;
           int first_line = TRUE;
           int start_line = TRUE;
           int lastc = EOF;
           for (;;) {
            c = fgetc(spamfile);
            if (c == EOF) break;
            lastc = c;
            i += 1;
            if (i == 10000) {
              i = 0;
              (void) alarm((unsigned)DeadMansTimeout);
            }
            // THE FIRST LINE IS THE UNIX MBOX "From " LINE WHICH WAS ADDED ABOVE
            // AND MUST NOW BE REMOVED HERE.
            if (!first_line) {
              if (start_line && (c == '.')) put(to_server, ".."); else put(to_server, "%c", c);
            }
            if (c == '\n') { first_line = FALSE; start_line = TRUE;} else start_line = FALSE;
           }
           if (lastc != '\n') put(to_server, "\n");
           put(to_server, ".\n");
          }
          fclose(spamfile);
        }
#ifdef DEBUG
#else
        remove(spamname);
#endif
        // Now drop through even further below and read the reply code...
        this_is_definitely_spam = FALSE; // BUG FIX AFTER DATA 20040828
      } else if (strncmp(comm, "rcpt", 4) == 0) {

        // TO DO: on reading RFC812 it appears that a sender may send
        // multiple RCPT TO lines, to cut down on bandwidth use for
        // mailing lists, CC's, etc.  I do not yet implement this!

        // I have not seen this feature used in legitimate incoming mail to
        // any of my sites.  However I need to at the least detect that
        // it has happened and log it with a feature request...
        // I don't care if it is only used by spammers, but I need
        // to implement it if it is used by mailing lists.

        // If you receive multiple mail-to's and even one of them is
        // a spamtrap address, you can probably assume that the mail is spam.
        // and kill or tag it for everyone.

        // I believe the current third-party test will reflect only
        // the last 'rcpt to' destination.  What we need to do
        // is pass through "rcpt to" commands for local users but trash
        // those for thirdparty recipients *and* set 'thirdparty'
        // to TRUE *if and only if* there are NO local recipients.

        int state;
        char *domainp, *usernamep;
        // Validate the destination address against the list of allowed
        // recipient domains in DOMAINS.  If the test fails but the
        // sender is in LOCALIPS then let it go through anyway.
        // send the buffered 4-letter command first
        int j; for (j = 0; j < i; j++) put(to_server, "%c", comm[j]);
        // and then copy over any remaining text (eg extended options)
        thirdparty = FALSE; // set on every "rcpt to" command
        domainp = domain; usernamep = username; i = 0;

        // [TO DO]  *** NEED TO ADD MORE CHECKS ***
        // - checks for relaying... eg MAIL FROM: <f851zgwqu@ereap@lamer.hu>
        // disallow ! @ % :: etc
        // Need to check both username part and host part.

        // Small state machine to extract username@domain from "rcpt to" command.
#define PRE 1
#define GRAB_USERNAME 2
#define GRAB_DOMAIN 3
#define DONE 4
        state = PRE;
        for (;;) {
          c = get(from_client, ReadTimeout);
          if (c == '\r') continue;
          if (c == EOF) debug_exit(1);
          put(to_server, "%c", c); // track the destination address here
          if (c == '\n') break;
          if ((state == GRAB_USERNAME) || (state == GRAB_DOMAIN)) {
            // canonicalise case in username and domain
            if (isalpha(c) && isupper(c)) c = tolower(c);
          }
          if ((state == PRE) && (c == '<')) {
            state = GRAB_USERNAME; i = 0;
          } else if ((state == GRAB_USERNAME) && (c == '>')) {
            state = DONE;
            strcpy(domain, DEFAULT_DOMAIN); domainp = domain+strlen(domain);
            // [TO DO!] #define for local default domain above
          } else if ((state == GRAB_DOMAIN) && (c == '>')) {
            state = DONE;
          } else if ((state == GRAB_USERNAME) && (c == '@')) {
            state = GRAB_DOMAIN; i = 0;
          } else if ((state == GRAB_USERNAME) && (c != ' ')) {
            // truncate if username too long (might be x500 address :-( )
            if (i < (MAX_STRING-1)) {*usernamep++ = c; i += 1;}
          } else if ((state == GRAB_DOMAIN) && (c != ' ')) {
            // truncate if domain too long - it's probably a hack attempt
            if (i < (MAX_STRING-1)) {*domainp++ = c; i += 1;}
          }
        }
        *domainp = '\0';
        *usernamep = '\0';

        // Now check that the domain they want to mail to is
        // a local one supported by our local mail server

        // NOTE: as referred to earlier, we *could* (if we were
        // being hacky) tweak the destination username and/or
        // domain at this point.  Not nice, but incredibly useful
        // in certain circumstances...
        // Beware case insensitivity and "username+extradata" format.



  // THIS WHOLE SECTION IS A ***HACK*** - TEMPORARY CODE FOR UTPA,
  // SHOULD ***NOT*** MAKE IT INTO ANY RELEASE.

#ifdef DEBUG_SPAMTRAP
#if PORT == 1001
	{
#include "deadusers.c"
	int each = 0;
	char *s;
	// brute force and ignorance.  THIS IS A HACK.
	for (;;) {
	  if ((s = deaduser[each]) == NULL) break;
	  if (strcasecmp(username, s) == 0) {
	    strcpy(username, "spamtrap");
	// There's a problem here - by the time we get to this code, the
	// RCPT TO command has already been sent to the VMS.  Maybe we
	// Could send another one and ignore the first rejection?
	{
    for (;;) { // loop over reply lines until all data received
      int j;
      i = 0;
      for (;;) {
        c = get(from_server, ReadTimeout);
        if (c == '\r') continue;
        if (c == EOF) {
          // TO DO: more appropriate error-number...
          put(to_client, "451 Connection aborted\n");
          debug_exit(1);
        }
        if (c == '\n') break;
        code[i] = c;  // We assume no redundant spaces from server
        i += 1;
        if (i == 4) break;
      }
      // lines with "250-" etc are continuation lines so there is more to come
      if (i < 4) break;
      if (code[3] != '-') break;
      // if it's a continuation line we don't need to buffer any more so
      // send out the current buffer plus the rest of the line...
      //if (!suppress_returncode) {
        //for (j = 0; j < i; j++) put(to_client, "%c", code[j]);
      //}
      // the transfer the remainder (if any) of the return code message
      for (;;) {
        c = get(from_server, ReadTimeout);
        if (c == '\r') continue;
        if (c == EOF) {
          // TO DO: more appropriate error-number...
          put(to_client, "451 Connection aborted\n");
          debug_exit(1);
        }
        //if (!suppress_returncode) put(to_client, "%c", c);
        if (c == '\n') break;
      }

      // There's a good tarpit scheme implemented at the link below:
      // http://www.iks-jena.de/mitarb/lutz/usenet/antispam.html
      // which inserts a delay between each line of a continuation
      // that's sent back to the spammer.  This ties them up for
      // a long time at low cost to us.  If we wanted to do that,
      // we'd do it here.  Note it does not hurt legitimate senders
      // very much, just spammers.  Neat idea.
    }
    { // drop-through case prints the buffered RC and following text.
      // return the buffered 4-char response first
      int j;
      //if (!suppress_returncode) {
        //for (j = 0; j < i; j++) put(to_client, "%c", code[j]);
      //}
      // then transfer the remainder (if any) of the return code message
      for (;;) {
        c = get(from_server, ReadTimeout);
        if (c == '\r') continue;
        if (c == EOF) debug_exit(1);
        //if (!suppress_returncode) put(to_client, "%c", c);
        if (c == '\n') break;
      }

    }
	}
	put(to_server, "RCPT TO:<spamtrap@panam.edu>\n");
	// now we can take up where we left off...
	    break;
	  }
	  each += 1;
	}
	}
#endif
        if (strcmp(username, "spamtrap") == 0) {
          // the reason for this is that our recipient system is
          // on a different machine, so we can't just have the
          // spamtrap user feed the data to "spamprobe train-spam"
          this_is_definitely_spam = TRUE;
        }
#endif

        // NOTE: With the current code we have just noted the
        // username on the fly; we ideally would prefer to buffer
        // the whole TO line, and just send back a fake reply
        // like the one below.

        // Currently it would be possible for the spammer to ignore the reject code
        // and just send the DATA command anyway. 
        // This area needs some work...

        domain_is_local = FALSE;
        {
          char *s, *t;
          s = strdup(DOMAINS); // Ensure writable
          for (;;) {
            t = strchr(s, ':'); if (t != NULL) *t = '\0';
            if (strncmp(domain, s, strlen(s)) == 0) {
              domain_is_local = TRUE;  // Note this for later.
              break;
            }
            if (t != NULL) *t = ':'; else /* last item */ break;
            s = t+1; /* next string to compare */
         }
        }

        if (!domain_is_local) {
          // if 'caller_is_local', it's a legit outgoing mail
          // otherwise it's a third-party spam which we might
          // want to pretend to accept in order to feed our
          // spam filter with bad mail...
          if (!caller_is_local) {

            // if we return a 2XX code below the spammer will send
            // the mail, and since we know to expect it, we can
            // accept it privately and use it as fodder to train
            // the spam filter.

            put(to_client, "454 <%s@%s>: Relay access denied\n",
                    username, domain);
            suppress_returncode = TRUE; thirdparty = TRUE;
            // Since for now we've already sent the RCPT TO command
            // to the remote server, it's too late to undo it.  However
            // we can explicitly catch the "data" command later.
            // We *could* just send a RSET comand here to undo this
            // but the proper solution is not to send this line until
            // until we've read it all and checked it.  However that
            // doesn't fit well with the 4-char buffer scheme and
            // this could be a arbitrary length line...
          }
        }
      } else { // All other commands...

        if (strncmp(comm, "rset", 4) == 0) {
          this_is_definitely_spam = FALSE; // BUG FIX AFTER RSET 20040828
        }

        // BEWARE NON-STD EHLO EXTENSIONS!
        // send the buffered 4-letter command first
        int j; for (j = 0; j < i; j++) put(to_server, "%c", comm[j]);
        // and then copy over any remaining text (eg extended options)
        for (;;) {
          c = get(from_client, ReadTimeout);
          if (c == '\r') continue;
          if (c == EOF) debug_exit(1);
          put(to_server, "%c", c);
          if (c == '\n') break;
        }
      }
    }

    // THIS IS THE DROP-THROUGH CODE WHICH MANY OF THE SEGMENTS ABOVE
    // PASS CONTROL TO NEXT, IN ORDER TO READ THE REPLY FROM THE SERVER.

    for (;;) { // loop over reply lines until all data received
      int j;
      i = 0;
      for (;;) {
        c = get(from_server, ReadTimeout);
        if (c == '\r') continue;
        if (c == EOF) {
          // TO DO: more appropriate error-number...
          put(to_client, "451 Connection aborted\n");
          debug_exit(1);
        }
        if (c == '\n') break;
        code[i] = c;  // We assume no redundant spaces from server
        i += 1;
        if (i == 4) break;
      }
      // lines with "250-" etc are continuation lines so there is more to come
      if (i < 4) break;
      if (code[3] != '-') break;
      // if it's a continuation line we don't need to buffer any more so
      // send out the current buffer plus the rest of the line...
      if (!suppress_returncode) {
        for (j = 0; j < i; j++) put(to_client, "%c", code[j]);
      }
      // the transfer the remainder (if any) of the return code message
      for (;;) {
        c = get(from_server, ReadTimeout);
        if (c == '\r') continue;
        if (c == EOF) {
          // TO DO: more appropriate error-number...
          put(to_client, "451 Connection aborted\n");
          debug_exit(1);
        }
        if (!suppress_returncode) put(to_client, "%c", c);
        if (c == '\n') break;
      }
     
      // There's a good tarpit scheme implemented at the link below:
      // http://www.iks-jena.de/mitarb/lutz/usenet/antispam.html
      // which inserts a delay between each line of a continuation
      // that's sent back to the spammer.  This ties them up for
      // a long time at low cost to us.  If we wanted to do that,
      // we'd do it here.  Note it does not hurt legitimate senders
      // very much, just spammers.  Neat idea.
    }

    if (strncmp(comm, "help", 4) == 0) {
      // just a debug test - always return a silly number for help ;-)
#ifdef DEBUG
      code[0] = '9'; code[1] = code[2] = '1';
#endif
      // then drop through and print
      put(to_client, "%c%c%c-VERSION=%s\n", code[0], code[1], code[2], VERSION);
      put(to_client, "%c%c%c-DOMAINS=%s\n", code[0], code[1], code[2], DOMAINS);
      put(to_client, "%c%c%c-DEFAULT_DOMAIN=%s\n", code[0], code[1], code[2], DEFAULT_DOMAIN);
      put(to_client, "%c%c%c-LOCALIPS=%s\n", code[0], code[1], code[2], LOCALIPS);
      put(to_client, "%c%c%c-RELAY_HOST=%s\n", code[0], code[1], code[2], RELAY_HOST);
      put(to_client, "%c%c%c-PORT=%d\n", code[0], code[1], code[2], PORT);
      put(to_client, "%c%c%c-GOOD_TAG=%s\n", code[0], code[1], code[2], GOOD_TAG);
      put(to_client, "%c%c%c-SPAM_TAG=%s\n", code[0], code[1], code[2], SPAM_TAG);
      // remember to add any more config options here.
      // Also, some MTA's do not implement HELP!  Postfix is especially
      // unfriendly to random typers, and pressing CR to postfix messes you up badly
    } else if ((strncmp(comm, "rcpt", 4) == 0) && (code[0] == '4')) {
      // 454 <victim@thirdparty.com>: Relay access denied
      // tweak the return code to a success, then handle the
      // rest of the session with a dummy mail program. [TO DO!]
      code[0] = '2';
      /* debug_exit(fake_mailer()); */
      // currently drop through.  *** UNTESTED ***
    }

    { // drop-through case prints the buffered RC and following text.
      // return the buffered 4-char response first
      int j;
      if (!suppress_returncode) {
        for (j = 0; j < i; j++) put(to_client, "%c", code[j]);
      }
      // then transfer the remainder (if any) of the return code message
      for (;;) {
        c = get(from_server, ReadTimeout);
        if (c == '\r') continue;
        if (c == EOF) debug_exit(1);
        if (!suppress_returncode) put(to_client, "%c", c);
        if (c == '\n') break;
      }
     
    }

    if ((strncmp(comm, "quit", 4) == 0) && (code[0] == '2')) {
      debug_exit(0); // successful QUIT.  (do we even care if it was successful?)
    }
  }
  (void) alarm((unsigned)0);  // Cancel the alarm call
  debug_exit(0);
  return(1); // Shouldn't get here
}

// --------------- separate code included here for convenience
// This code was extracted from an old copy of the "vrfy" command.
// I need to check the licensing, or preferably write a functional
// equivalent myself.
#include <errno.h>
#include <netdb.h>

int verbose = 0, debug = 0;

typedef int	bool;

#define incopy(a)	*((struct in_addr *)a)

#define NOT_DOTTED_QUAD	((u_long)-1)

#define MAXHOSTNAME 256		/* maximum size of an hostname */

#define MAXADDRS 35		/* max address count from gethostnamadr.c */

char *MyHostName = NULL;	/* my own fully qualified host name */

static jmp_buf Timeout;

static void timer(int sig)
{
	longjmp(Timeout, 1);
}

/*
** MAKECONNECTION -- Establish SMTP connection to renote host
** ----------------------------------------------------------
**
**	Returns:
**		Status code indicating success or failure.
**
**	Outputs:
**		Sets outfile and infile to the output and input channel.
*/

static int makeconnection(
  char *host,				/* host name to connect to */
  int *outfile,				/* smtp output channel */
  int *infile 				/* smtp input channel */
)
{
	struct hostent *hp;
	struct servent *sp;
	struct sockaddr_in sin;
	static char hostname[MAXHOSTNAME+1];
	struct in_addr inaddr[MAXADDRS];
	int naddrs;
	u_long addr;
	int sock;
	register int i;

/*
 * Reset state.
 */

        memset(&sin, 0, sizeof(sin));

	errno = 0;
	h_errno = 0;

	if (host == NULL || host[0] == '\0')
		host = "localhost";

	addr = inet_addr(host);

	{
		hp = gethostbyname(host);
		if (hp == NULL)
		{
			/* cannot contact nameserver, force retry */
			if (errno == ETIMEDOUT || errno == ECONNREFUSED)
				h_errno = TRY_AGAIN;

			/* nameserver could not resolve name properly */
			if (h_errno == TRY_AGAIN)
				return(EX_TEMPFAIL);

			/* no address found by nameserver */
			return(EX_NOHOST);
		}
		host = hp->h_name;

		for (i = 0; i < MAXADDRS && hp->h_addr_list[i]; i++)
			inaddr[i] = incopy(hp->h_addr_list[i]);
		naddrs = i;
	}

	(void) strncpy(hostname, host, MAXHOSTNAME);

/*
 * Try to make connection to each of the addresses in turn.
 */
	sp = getservbyname("smtp", "tcp");
	if (sp == NULL)
		return(EX_OSERR);

	for (i = 0; i < naddrs; i++)
	{
		sin.sin_family = AF_INET;
		sin.sin_port = sp->s_port;
		sin.sin_addr = inaddr[i];

		sock = socket(AF_INET, SOCK_STREAM, 0);
		if (sock < 0)
			return(EX_TEMPFAIL);

		if (verbose >= 2 || debug)
		{
			printf("connecting to %s port %d\n",
				inet_ntoa(sin.sin_addr), sin.sin_port);
	        }

		if (setjmp(Timeout) != 0)
		{
			(void) close(sock);
			errno = ETIMEDOUT;
			continue;
		}

		(void) signal(SIGALRM, timer);
		(void) alarm((unsigned)ConnTimeout);

		if (connect(sock, (struct sockaddr *)&sin, sizeof(sin)) < 0)
		{
			int err = errno;
			(void) alarm((unsigned)0);
			(void) close(sock);
			errno = err;
			if (errno == EINTR)
				errno = ETIMEDOUT;
			if (errno == ECONNREFUSED)
				return(EX_TEMPFAIL);
			continue;
		}
		(void) alarm((unsigned)0);

		*outfile = sock;
		*infile  = dup(sock);

		errno = 0;
		h_errno = 0;
		return(EX_OK);
	}

	return(EX_TEMPFAIL);
}
