#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>

/*
 * squeeze takes an ARM executable image file and compresses it, usually
 * to about half the size, adding decompression code so that the image
 * will automatically expand itself when it is run.
 *
 * For details of the compression scheme, see doc.squeeze.  Briefly,
 * the image is treated as a sequence of 32-bit words, and each word
 * is encoded in one of four ways, specified by a 4-bit nibble:
 *   zero -> nibble 0
 *   the 7*256 most common word values are encoded with one byte extra as
 *   an index into a table
 *   the 7*256 most common upper-3-byte values are encoded with one byte
 *   extra as index into another table, with the low byte separate
 *   anything else is given in full as 4 bytes.
 *
 * The tables of common values are sorted into ascending order
 * and encoded in a devious way.
 */

#define UNSQSIZE 0x248
unsigned char unsq[UNSQSIZE] = { /* Should first <N> words be no-ops to get patched? */
0xFB, 0x04, 0xA0, 0xE3, 0x3F, 0x13, 0xCE, 0xE3, 0x04, 0x00, 0x01, 0xE5, 0x2C, 0x00, 0x4F, 0xE2,
0x00, 0x3F, 0x90, 0xE8, 0x0A, 0xA0, 0x40, 0xE0, 0x09, 0x90, 0x4A, 0xE0, 0x08, 0x80, 0x89, 0xE0, 
0x08, 0x60, 0xA0, 0xE1, 0x0C, 0x10, 0x8B, 0xE0, 0x01, 0x71, 0x86, 0xE0, 0x0A, 0x50, 0xA0, 0xE1,
0x00, 0x40, 0xA0, 0xE3, 0x06, 0x20, 0xA0, 0xE1, 0x00, 0x30, 0xE0, 0xE3, 0x01, 0xB0, 0x5B, 0xE2, 
0x28, 0x00, 0x00, 0xBA, 0x01, 0x10, 0xD5, 0xE4, 0x0A, 0x00, 0x51, 0xE2, 0x12, 0x00, 0x00, 0xAA,
0x00, 0x00, 0x51, 0xE3, 0x09, 0x00, 0x00, 0x1A, 0x01, 0x00, 0xD5, 0xE4, 0x01, 0x10, 0xD5, 0xE4, 
0x01, 0x04, 0x80, 0xE1, 0x01, 0x10, 0xD5, 0xE4, 0x01, 0x08, 0x80, 0xE1, 0x01, 0x10, 0xD5, 0xE4,
0x01, 0x0C, 0x80, 0xE1, 0x00, 0x30, 0x83, 0xE0, 0x04, 0x30, 0x86, 0xE4, 0xEE, 0xFF, 0xFF, 0xEA, 
0x01, 0xB0, 0x4B, 0xE0, 0x01, 0xB0, 0x8B, 0xE2, 0x01, 0x30, 0x83, 0xE2, 0x04, 0x30, 0x86, 0xE4,
0x01, 0x10, 0x51, 0xE2, 0xFB, 0xFF, 0xFF, 0xCA, 0xE7, 0xFF, 0xFF, 0xEA, 0x5C, 0x00, 0x51, 0xE3, 
0x00, 0x30, 0x83, 0xB0, 0x04, 0x30, 0x86, 0xB4, 0xE3, 0xFF, 0xFF, 0xBA, 0xAE, 0x00, 0x51, 0xE2,
0x06, 0x00, 0x00, 0xBA, 0x01, 0x10, 0xD5, 0xE4, 0x00, 0x08, 0x81, 0xE1, 0x01, 0x10, 0xD5, 0xE4, 
0x01, 0x04, 0x80, 0xE1, 0x00, 0x30, 0x83, 0xE0, 0x04, 0x30, 0x86, 0xE4, 0xDA, 0xFF, 0xFF, 0xEA,
0x5C, 0x00, 0x51, 0xE2, 0x01, 0x10, 0xD5, 0xE4, 0x00, 0x04, 0x81, 0xE1, 0x00, 0x30, 0x83, 0xE0, 
0x04, 0x30, 0x86, 0xE4, 0xD4, 0xFF, 0xFF, 0xEA, 0x00, 0x00, 0x54, 0xE3, 0x04, 0x00, 0x00, 0x1A,
0x0C, 0xB0, 0xA0, 0xE1, 0x02, 0xC0, 0xA0, 0xE1, 0x06, 0x20, 0xA0, 0xE1, 0x01, 0x40, 0xA0, 0xE3, 
0xCB, 0xFF, 0xFF, 0xEA, 0x02, 0xB0, 0xA0, 0xE1, 0x18, 0x50, 0x8F, 0xE2, 0x4F, 0x6F, 0x8F, 0xE2,
0x07, 0x40, 0xA0, 0xE1, 0x0F, 0x00, 0xB5, 0xE8, 0x0F, 0x00, 0xA7, 0xE8, 0x06, 0x00, 0x55, 0xE1, 
0xFB, 0xFF, 0xFF, 0xBA, 0x04, 0xF0, 0xA0, 0xE1, 0x0D, 0x90, 0x89, 0xE0, 0x04, 0x80, 0x48, 0xE2,
0x09, 0x00, 0x5A, 0xE1, 0x37, 0x00, 0x00, 0xDA, 0x01, 0x60, 0x7A, 0xE5, 0x0F, 0x30, 0x06, 0xE2, 
0x09, 0x00, 0x53, 0xE2, 0x03, 0x00, 0x00, 0xBA, 0x01, 0x10, 0x7A, 0xE5, 0x00, 0x04, 0x81, 0xE1,
0x00, 0x41, 0x9C, 0xE7, 0x11, 0x00, 0x00, 0xEA, 0x02, 0x00, 0x53, 0xE2, 0x05, 0x00, 0x00, 0xBA, 
0x01, 0x10, 0x7A, 0xE5, 0x00, 0x04, 0x81, 0xE1, 0x00, 0x01, 0x9B, 0xE7, 0x01, 0x10, 0x7A, 0xE5,
0x00, 0x44, 0x81, 0xE1, 0x09, 0x00, 0x00, 0xEA, 0x00, 0x00, 0x53, 0xE3, 0x03, 0x40, 0xA0, 0x01, 
0x06, 0x00, 0x00, 0x0A, 0x01, 0x00, 0x7A, 0xE5, 0x01, 0x10, 0x7A, 0xE5, 0x01, 0x04, 0x80, 0xE1,
0x01, 0x10, 0x7A, 0xE5, 0x01, 0x08, 0x80, 0xE1, 0x01, 0x10, 0x7A, 0xE5, 0x01, 0x4C, 0x80, 0xE1, 
0x26, 0x32, 0xA0, 0xE1, 0x09, 0x00, 0x53, 0xE2, 0x04, 0x00, 0x00, 0xBA, 0x01, 0x10, 0x7A, 0xE5,
0x00, 0x04, 0x81, 0xE1, 0x00, 0x51, 0x9C, 0xE7, 0x30, 0x00, 0x28, 0xE9, 0xDB, 0xFF, 0xFF, 0xEA, 
0x02, 0x00, 0x53, 0xE2, 0x06, 0x00, 0x00, 0xBA, 0x01, 0x10, 0x7A, 0xE5, 0x00, 0x04, 0x81, 0xE1,
0x00, 0x01, 0x9B, 0xE7, 0x01, 0x10, 0x7A, 0xE5, 0x00, 0x54, 0x81, 0xE1, 0x30, 0x00, 0x28, 0xE9, 
0xD2, 0xFF, 0xFF, 0xEA, 0x00, 0x00, 0x53, 0xE3, 0x03, 0x50, 0xA0, 0x01, 0x30, 0x00, 0x28, 0x09,
0xCE, 0xFF, 0xFF, 0x0A, 0x01, 0x00, 0x7A, 0xE5, 0x01, 0x10, 0x7A, 0xE5, 0x01, 0x04, 0x80, 0xE1, 
0x01, 0x10, 0x7A, 0xE5, 0x01, 0x08, 0x80, 0xE1, 0x01, 0x10, 0x7A, 0xE5, 0x01, 0x5C, 0x80, 0xE1,
0x30, 0x00, 0x28, 0xE9, 0xC5, 0xFF, 0xFF, 0xEA, 0x00, 0x00, 0x5D, 0xE3, 0x07, 0x00, 0x00, 0xDA, 
0x0D, 0x60, 0x49, 0xE0, 0x07, 0x90, 0xA0, 0xE1, 0x0D, 0xA0, 0x89, 0xE0, 0x0F, 0x00, 0xB6, 0xE8,
0x0F, 0x00, 0xA7, 0xE8, 0x10, 0xD0, 0x5D, 0xE2, 0xFB, 0xFF, 0xFF, 0xCA, 0xBB, 0xFF, 0xFF, 0xEA, 
0x80, 0x80, 0x48, 0xE2, 0x08, 0xF0, 0xA0, 0xE1
};

#include <Arthur.h>
#define READINFO 5
#define FILFOUND 1
typedef struct BrazilWordParamBlock
{
        int p1, p2, p3, p4, p5, p6, p7;
} BrazilWordParamBlock;

typedef struct BrazilFileParamBlock
{
        int action;             /* action or object type if output data */
        char * name;            /* pointer to filename or pathname */
        unsigned int load, exec;         /* load, exec addresses */
        unsigned int start, end;         /* start address/length, end add./attributes */
        int reserved[4];        /* space to allow treatment as reg_block */
} BrazilFileParamBlock;

typedef struct BrazilByteReturn
{
        int a;
        int x;
        int y;
} BrazilByteReturn;

int Brazil_Byte(int code, int x, int y, BrazilByteReturn *info) {
art_error *rc;
art_reg_set b;
  b.r[0] = code;
  b.r[1] = x;
  b.r[2] = y;
  rc = art_osbyte(&b);
  info->a = b.r[0];
  info->x = b.r[1];
  info->y = b.r[2];
  return(info->a);
}

int Brazil_Word(int code, BrazilWordParamBlock *w) {
art_error *rc;
  rc = art_osword(code, w);
  return(0);
}

int Brazil_File(int code, char *in, int size, BrazilFileParamBlock *info) {
art_error *rc;
  info->action = code;
  info->name = in;
  rc = art_osfile((art_osfile_block *) info);
  return(info->action);
}

void UnSqueeze_FindUnSqueezeCode(int *base, int *limit) {
  *base = (int) &unsq[0]; *limit = (*base)+UNSQSIZE;
}

/*> c.squeeze
 * Title:     squeeze - compression of ADFS executable images
 * Author:    RCC
 * Copyright: (C) 1987, Acorn Computers Ltd, Cambridge, England.
 * Date:      03-Nov-87
 * LastEdit:  09-Dec-87
 */


#define VERSION "0.21"
#define HASHSIZE (8*1024)
#define CHUNKSIZE (16*1024)

#define AIFWORDS 32 /* size in words of an AIF header */
#define AIFBYTES (AIFWORDS * sizeof(int))
#define AIFPRELUDE 3 /* no of extra instructions in AIF decompression */
#define PREFETCH 2 /* words of ARM prefetch */

extern void UnSqueeze_FindUnSqueezeCode(int *base, int *limit);

static int force;

static clock_t lastticks;

void crashed(void) {
}

int ticks()
{ int last;
  last = lastticks; lastticks = clock(); return(lastticks-last); 
}

/*
 * Declarations of nibble values for the encoding.
 */

#define NSHORTS 7
#define NLONGS  (14-NSHORTS)
#define MAXTAB  (7*256)

#define ZERO    0
#define LITERAL 1
#define LONG    2
#define SHORT   (LONG+NLONGS)

/*
 * Data structure declarations.
 */

typedef unsigned int word;

/*
 * The Info structure is really a 3-word thing, but we are keen to save
 * space, so pack together the pointer to the next element in the list
 * and the count of number of occurrences of this value into a single
 * word.  To get as many bits as possible for the count, we constrain
 * all Info structures to be 8-byte aligned (freeing 3 low bits) and
 * take the top 8 bits of the address off.  This will only work if we
 * are in the low 16MBytes of address space, but it leaves us 11 bits
 * for the count, which is nearly always enough.
 *
 * Later on, we use these same records to build hash tables mapping
 * word values -> index in table of common values, for fast encoding.
 * Fortunately, the tables are of size 7*256 < 2**11, so the same
 * packing dodge still works.  I'm afraid this is all a bit tweaky,
 * but making it fast and small is worth the effort.
 *
 * The alternative is to look up each word by binary search, but that
 * would be slower (I think), viz 10 iterations for each table.
 */

typedef struct Info { 
  word nextandcount;
  word value;
} Info;

#define COUNTBITS 11
#define MAXCOUNT  ((1<<COUNTBITS)-1)

#define unpack(p,n,c)     { word t = p->nextandcount;                 \
                            n = (Info *)((t>>COUNTBITS)<<3);          \
                            c = t & MAXCOUNT; }
                           
#define pack(p,n,c)       { word t = (((word)n)<<(COUNTBITS-3)) | c;  \
                            p->nextandcount = t; }

#define inccount(p,n,c)   { if (c < MAXCOUNT) ++c;                    \
                            p->nextandcount = (((word)n)<<(COUNTBITS-3)) | c; }

typedef Info *HashTable[HASHSIZE];

typedef struct VTable { /* sorted vector of common values */
  int nhits;        /* sum of frequencies of words in this table */
  Info *misses;     /* list of (freq, word) pairs not in this table */
  int size;         /* number of els in the table */
  word els[MAXTAB]; /* table els: [0..size-1] are valid */
} VTable;

typedef struct Freqs { /* list of (value, frequency) pairs */
  int nzeros;       /* no of zero words */
  int maxfreq;      /* max frequency in list - useful for sorting */
  Info *freqs;      /* list of (value, frequency) */
} Freqs;

/*
 * Some ugly stuff to allocate 2-word Info structures efficiently with
 * correct (8-byte) alignment.
 */

typedef struct InfoChunk {
  struct InfoChunk *next;
  Info *free;
  Info *limit;
  Info chunks[(CHUNKSIZE-12)/sizeof(Info)];
} InfoChunk;

Info *align(Info *p, int n)
{ int x = (int)p;
  x += (n - 1); x -= (x % n); return (Info *)x;
}

char *heaplwm;
char *heaphwm;

void *xalloc(int bytes, char *what)
{ void *p = malloc(bytes);
  if (p == NULL) { crashed(); exit(1); }
  if ((int)p < 0) { 
    crashed(); exit(1); 
  }
  if ((char *)p + bytes > heaphwm) heaphwm = (char *)p + bytes;
  if ((char *)p < heaplwm) heaplwm = (char *)p;
  return(p);
}

void xfree(void *p)
{
  free(p);
}

static InfoChunk *curchunk;

void freechunks(void)
{ InfoChunk *p, *next;
  for (p = curchunk; p != NULL; p = next) { next = p->next; xfree(p); }
  curchunk = NULL;
}

Info *newinfo(Info *next, word v)
{ InfoChunk *chunk;
  Info *p;

  chunk = curchunk;
  if ((chunk == NULL) || ((p = chunk->free) >= chunk->limit)) {
    chunk = (InfoChunk *)xalloc(CHUNKSIZE, "InfoChunk");
    chunk->next  = curchunk;
    chunk->free  = p = align(chunk->chunks, 8);
    chunk->limit = (Info *)(((int)chunk) + CHUNKSIZE - sizeof(Info));
    curchunk = chunk;
  }
  chunk->free = (p + 1);
  pack(p, next, 1); p->value = v;
  return(p);
}

void countwords(word *code, word *limit, Freqs *ans)
/*
 * Counts number of occurrences of each word value in the specified block
 * of code [code, limit), and returns list of (value, freqency) pairs.
 */
{ HashTable *hash;
  Info **list;
  Info *p, *next, *freqs;
  int j, nzeros, maxfreq;
  word w;

  hash = xalloc(sizeof(HashTable), "HashTable");
  for (j = 0; j < HASHSIZE; ++j) (*hash)[j] = NULL;
  nzeros = 0;
  while (code < limit) {
    w = *code++;
    if (w == 0) { ++nzeros; continue; }
    j = (w + (w >> 11) + (w >> 22)) % HASHSIZE; /* simple hash function */
    list = &((*hash)[j]);
    p = *list;
    while (1) {
      if (p == NULL) { *list = newinfo(*list, w); break; }
      unpack(p, next, j);
      if (w == p->value) { inccount(p, next, j); break; }
      p = next;
    }
  } /* while code < limit */
  /*
   * Now flatten the hash table into a single list, and free the vector.
   */
  freqs = NULL; maxfreq = 0;
  for (j = 0; j < HASHSIZE; ++j) {
    for (p = (*hash)[j]; p != NULL; p = next) {
      unpack(p, next, w); pack(p, freqs, w); freqs = p;
      if (w > maxfreq) maxfreq = w; /* keep track of max frequency */
    }
  }
  ans->nzeros  = nzeros;
  ans->maxfreq = maxfreq;
  ans->freqs   = freqs;
  xfree(hash);
}

int comparewords(const void *a, const void *b)
/*
 * This proc is passed to the library qsort for sorting table elements.
 * We know that all table elements are distinct, so can cheat a little.
 */
{ word x = *(word *)a;
  word y = *(word *)b;
  if (x > y) return(+1);
  return(-1);
}

void maketable(Freqs *freqs, int maxsize, int wantmisses, VTable *tab)
/*
 * Builds a VTable of the most common values in the list freqs,
 * taking at most maxsize of them, destroying the freqs list
 * in the process, and leaving the remnants hung off the VTable
 * record.
 */
{ Info **withfreq = xalloc((freqs->maxfreq+1) * sizeof(Info *), "withfreq");
  Info **list;
  Info *p, *next, *misses;
  int  freq, nhits, size;

  /*
   * It is easy to sort things by frequency, as frequency range is
   * limited to 1..freqs->maxfreq.  So just build a vector of lists.
   */
  for (list = withfreq + freqs->maxfreq; list >= withfreq; *list-- = NULL);
  
  for (p = freqs->freqs; p != NULL; p = next) { /* put p into bucket */
    unpack(p, next, freq);
    pack(p, (withfreq[freq]), freq);
    withfreq[freq] = p;
  }
  freqs->freqs = NULL;

  nhits  = 0;
  misses = NULL;
  size   = 0;
  for (list = withfreq + freqs->maxfreq; list >= withfreq; --list) {
    for (p = *list; p != NULL; p = next) {
      unpack(p, next, freq);
      if (size < maxsize) {                        /* add to table */
        tab->els[size++] = p->value; nhits += freq;
      } else {                                     /* add to misses list */
        if (!wantmisses) break;
        pack(p, misses, freq); misses = p;
      }
    }
  }
  tab->nhits  = nhits;
  tab->misses = misses;
  tab->size   = size;
  xfree(withfreq);
  qsort((void *)(tab->els), size, sizeof(word), &comparewords);
}

void masklowbyte(Info *list, Freqs *ans)
/*
 * Take a list of (value, frequency) of 4-byte values, merge values
 * with the same low byte to produce list of 3-byte values.
 */
#define VECBITS 12
#define VECSIZE (1<<VECBITS)
{ Info **vec = xalloc(VECSIZE * sizeof(Info *), "mergevec");
  Info **pp;
  Info *p, *next;
  Info *q, *qnext, *qprev;
  int freq, qfreq, qprevfreq, maxfreq;
  word val, qval;

  for (pp = vec + VECSIZE-1; pp >= vec; *pp-- = NULL);
  for (p = list; p != NULL; p = next) {
    unpack(p, next, freq); 
    val = (p->value >> 8); p->value = val;
    pp = vec + ((val + (val >> 9) + (val >> 12)) % VECSIZE);
    /*
     * Now insert p in the ascending-value sorted list pp.
     * This is tricky because of the packing of the nextandcount field,
     * so have to handle start of list specially.
     */
    q = *pp;
    if (q == NULL) { pack(p, NULL, freq); *pp = p; continue; }
    unpack(q, qnext, qfreq); qval = q->value;
    if (val < qval) { pack(p, q, freq); *pp = p; continue; }
    if (val == qval) { 
      qfreq += freq; if (qfreq > MAXCOUNT) qfreq = MAXCOUNT;
      pack(q, qnext, qfreq); continue; 
    }
    while (1) {
      qprev = q; qprevfreq = qfreq; q = qnext;
      if (q == NULL) {   /* end of list: add it here */
        pack(p, NULL, freq); pack(qprev, p, qprevfreq); break;
      }
      unpack(q, qnext, qfreq); qval = q->value;
      if (val < qval) {  /* not in list: add it */
        pack(p, q, freq); pack(qprev, p, qprevfreq); break; 
      }
      if (val == qval) { /* value matches: add frequency */
        qfreq += freq; if (qfreq > MAXCOUNT) qfreq = MAXCOUNT;
        pack(q, qnext, qfreq); break;
      }
    }
  }
  /*
   * Phew! That should keep the register allocator busy.
   * Now we have a vector of sorted lists: just have to flatten it.
   */
  q = NULL; maxfreq = 0;
  for (pp = vec + VECSIZE-1; pp >= vec; --pp) {
    for (p = *pp; p != NULL; p = next) {
      unpack(p, next, freq); pack(p, q, freq); q = p;
      if (freq > maxfreq) maxfreq = freq;
    }
  }
  ans->maxfreq = maxfreq;
  ans->freqs   = q;
  xfree(vec);
}

#define FASTSIZE 4096
#define FASTHASH(v) ((v + (v >> 7) + (v >> 15)) % FASTSIZE)

Info **fasttab(VTable *tab)
/*
 * Builds a hash table for translating value -> index in table.
 */
{ Info **vec = xalloc(FASTSIZE * sizeof(Info *), "fasthash");
  Info **pp;
  int idx;
  word val;
  Info *p;

  for (pp = vec + FASTSIZE; pp > vec; *--pp = NULL);

  for (idx = 0; idx < tab->size; ++idx) {
    val = tab->els[idx];
    pp = vec + FASTHASH(val);
    /*
     * Values in table are unique, so just add it to chain.
     */
    p = newinfo(NULL, val); pack(p, *pp, idx); *pp = p;
  }
  return(vec);
}

int lookup(Info **fast, word val)
{ Info *p, *next;
  int idx;

  for (p = fast[FASTHASH(val)]; p != NULL; p = next) {
    unpack(p, next, idx);
    if (val == p->value) return(idx);
  }
  return(-1);
}

typedef struct Header {
  int decodedsize;
  int encodedsize;
  int encodedtabs;
  int nshorts;
  int nlongs;
  int bytestomove;
} Header;

#define TOPBYTE(n) ((n)>>24)
#define LOWBYTE(n) ((n)&0xff)
#define PUTLOWBYTE(p, n) (*p++ = (n)) /* relies on store masking low byte */

#define ENCODEVALUE(w, nibble, p) \
    if (w == 0) {                                             \
      nibble = ZERO;                                          \
    } else if ((idx = lookup(fshorts, w)) >= 0) {             \
      PUTLOWBYTE(p, idx);                                     \
      nibble = SHORT + (idx >> 8);                            \
    } else if ((idx = lookup(flongs, w>>8)) >= 0) {           \
      PUTLOWBYTE(p, w); PUTLOWBYTE(p, idx);                   \
      nibble = LONG + (idx >> 8);                             \
    } else {                                                  \
      *p++ = TOPBYTE(w); w <<= 8; *p++ = TOPBYTE(w); w <<= 8; \
      *p++ = TOPBYTE(w); w <<= 8; *p++ = TOPBYTE(w);          \
      nibble = LITERAL;                                       \
    }                                                         

#define ENCSIZE 8192

/*
 * We encode a pair of words at a time.  To avoid unnecessary copying of data,
 * things are done in a rather curious order, not quite the opposite of the
 * optimal decoding order.  I can't quite convince myself that this is optimal,
 * but I think it is quite good.
 */

char *encode(word *base, word *limit, Info **fshorts, Info **flongs,
             Header *h)
/*
 * Returns pointer to byte after the encoded data.
 */
{ word *code;
  word w;
  int idx, nib0, nib1;
  char *buf, *p;

  buf = xalloc(ENCSIZE, "encodebuf"); p = buf;
  
  h->decodedsize = ((char *)limit - (char *)base);
  for (code = base; code < limit; code += 2) {
    w = code[1]; ENCODEVALUE(w, nib1, p);
    w = code[0]; ENCODEVALUE(w, nib0, p);
    *p++ = (nib0 | (nib1 << 4));
    if (buf != NULL) {
      idx = ((int)code - (int)base - 12 - (p - buf));
      if (idx > 0) { h->bytestomove = (p - buf); }
      if ((idx > 256) || (p - buf > ENCSIZE - 16)) {
        /*
         * Swap from encoding into buf to encoding on top of old stuff
         * once we get 256 bytes clear, or run out of buf space.
         */
        memcpy(base+1, buf, p-buf); p = (p-buf) + (char *)(base+1);
        xfree(buf); buf = NULL;
      }
    } else {
      if (p >= (char *)(code-2)) {
        crashed(); exit(1);
      }
    }
  }
  h->encodedsize = p - (char *)(base+1);
  return(p);
}

char *encodetable(VTable *tab, int threebytes, char *out)
/*
 * Encode the table of 3 or 4 byte values, starting at address p,
 * return pointer to first free byte after table.
 */
{ word *p, *limit;
  word prev, w;
  int delta, ones;

  ones = 0; prev = (word)(-1);
  p = tab->els; limit = p + tab->size;
  while (p < limit) {
    w = *p++; delta = (w - prev); prev = w;
    if (delta == 1) ++ones;
    if ((ones > 0) && ((delta != 1) || (ones >= 9))) {
      *out++ = ones; ones = 0;
    }
    if (delta < 2) {  /* dealt with above: no zeros, ones are peepholed */ }
    else if (delta <= 91-10) { *out++ = delta+10; }
    else if (delta < 82*256) {
      *out++ = (delta>>8)+92; *out++ = LOWBYTE(delta); 
    }
    else if (delta < 82*256*256) {
      *out++ = (delta>>16)+174;
      *out++ = LOWBYTE(delta); delta >>= 8;
      *out++ = LOWBYTE(delta);
    }
    else {
      *out++ = 0;
      *out++ = LOWBYTE(delta); delta >>= 8;
      *out++ = LOWBYTE(delta); delta >>= 8;
      *out++ = LOWBYTE(delta);
      if (!threebytes) { delta >>= 8; *out++ = delta; }
    }
  } /* end loop over values in table */
  if (ones > 0) *out++ = ones;
  return(out);
}

char *writeunsqueeze(char *out, int execoffset)
{ int base, limit;
  word *p;
  int n, rotr, op;

  UnSqueeze_FindUnSqueezeCode(&base, &limit);
  n = limit - base;
  memcpy(out, (void *)base, n); out += n;
  /*
   * Now construct ARM code to jump to exec address, starting with
   * load address in R8.
   */
  op = 0xe2888000; /* ADD R8, R8, ... */
  if (execoffset < 0) { op = 0xe2488000; execoffset = -execoffset; }
  rotr = 32; p = (word *)out;
  while (execoffset > 0) {
    /* Generate ADD/SUB R8, R8, #thing */
    n = LOWBYTE(execoffset); execoffset >>= 8;
    if (n != 0) {
      *p++ = op + (0x100 * ((rotr % 32) / 2)) + n;
    }
    rotr -= 8;
  }
  /* Generate MOV PC, R8 */
  *p++ = 0xe1a0f008;
  return((char *)p);
}

char *compresscode(word *code, int size, int execoffset)
/*
 * Returns NULL if no compression, else pointer to top of compressed thing.
 */
{ Freqs  freqs;
  word   *limit;
  VTable *shorts, *longs;
  Info **fshorts, **flongs;
  int    nwords, guess, nliterals;
  Header header;
  char *pos, *tabstart;

  size += 7; size &= ~7; /* round up to an even number of words */
  limit = (word *)((char *)code + size);
  countwords(code, limit, &freqs);
  /*
   * Allocate the VTables here to avoid nasty storage interactions, which
   * can lose us some chunks if we're not careful.
   */
  shorts = xalloc(sizeof(VTable), "shorts");
  longs  = xalloc(sizeof(VTable), "longs");
  maketable(&freqs, NSHORTS*256, 1, shorts);
  masklowbyte(shorts->misses, &freqs);
  maketable(&freqs, NLONGS*256, 0, longs);
  freechunks();
  /*
   * Now guess what the size of the compressed thing would be.
   * The estimates of size of encoded data are exact, but the
   * estimates of encoded table size are a guess, so we over-estimate
   * the size of the decompression code to be on the safe side.
   */
  nwords    = (size / sizeof(word));
  nliterals = nwords - freqs.nzeros - shorts->nhits - longs->nhits;

  guess     =   (nwords / 2)           /* 0.5 bytes per word of original */
              + (1 * shorts->nhits)    /* 1 more byte for each short */
              + (2 * longs->nhits)     /* 2 for each long */
              + (4 * nliterals)        /* 4 for each literal */
              + (2 * shorts->size)     /* rough size of shorts table */
              + (2 * longs->size)      /* rough size of longs table */
              + 1024;                  /* decompression code + safety margin */

  if (guess > (9*size)/10) { /* not much squeeze to be had */
    if ((!force) || (guess > size)) return(NULL);
  }

  /*
   * Now can actually start encoding stuff.
   */
  fshorts = fasttab(shorts);
  flongs  = fasttab(longs);
  pos = encode(code, limit, fshorts, flongs, &header);
  xfree(flongs);
  xfree(fshorts);
  freechunks();
  tabstart = pos;
  pos = encodetable(shorts, 0, pos); xfree(shorts);
  pos = encodetable(longs,  1, pos); xfree(longs);
  header.nshorts = shorts->size;
  header.nlongs  = longs->size;
  /* now word-align before the header words */
  while (((int)pos & 3) != 0) *pos++ = 0;
  header.encodedtabs = (pos - tabstart);
  memcpy(pos, &header, sizeof(Header)); pos += sizeof(Header);
  /* 
   * Now the branch instruction to be put at the start: this has a word
   * offset, with suitable allowance for ARM prefetch.
   * In fact we want to skip the first 3 instructions of decompression
   * code, as these are executed only for an AIF image.
   */
  *code = 0xea000000 + ((word *)pos - code) + AIFPRELUDE - PREFETCH;
  pos = writeunsqueeze(pos, execoffset);
  pos += sprintf(pos, "rcc %s\n", VERSION);
  /* Pad size to be 15 mod 16 */
  while ((((int)pos - (int)code) & 0xf) != 0xf) *pos++ = ' ';
  return(pos);
}

char *compress(word *code, int size, int execoffset)
/*
 * This handles the AIF-specific stuff.
 * Returns NULL if no compression, else pointer to top of compressed thing.
 */
{ char *top;

  if (code[0] != 0xfb000000) { /* not BLNV $+0 => not an AIF image */
    return compresscode(code, size, execoffset);
  }
  top = compresscode(code+AIFWORDS, size-AIFBYTES, execoffset-AIFBYTES);
  /*
   * Now first word of the stuff we have just compressed is 
   * B UnsqueezeADFSImage.  We want the first word of the header to
   * be BL UnsqueezeAIFImage, i.e. destination AIFPRELUDE words earlier.
   */
  code[0] = 0xeb000000 + (code[32] & 0x00ffffff) + AIFWORDS - AIFPRELUDE;
  return(top);
}

int savedload, savedexec, usesaved;

void arthurise(BrazilFileParamBlock *info)
{ BrazilByteReturn     byte;
  BrazilWordParamBlock w;
  int rc;

  if ((info->load == info->exec) && (info->load == 0x8000)) {
    /* can we use Arthur 'FF8' filetype ? */
    if (usesaved) {
      info->load = savedload; info->exec = savedexec;
    } else {
      rc = Brazil_Byte(0, 1, 0, &byte);
      if ((rc >= 0) && (byte.x == 6)) {
        /* This is Arthur - get time of day */
        w.p1 = 3; rc = Brazil_Word(14, &w);
        if (rc >= 0) {
          info->exec = w.p1;
          info->load = 0xfffff800 + (w.p2 & 0xff);
        }
      }
    }
  }
}

int squeeze(char *in, char *out)
{ BrazilFileParamBlock info;
  int rc, size, t, squeezed;
  word *code;
  char *top;
  
  usesaved = (0!=0);
  squeezed = 0;
  rc = Brazil_File(READINFO, in, 256, &info);
  if (rc != FILFOUND) { return(1); }
  size = info.start;
  if ((!force) && (strcmp(in, out) == 0)) { 
    /* Check quickly to see if worth loading */
    if (size < 2048) {
      return(0);
    }
    if ((size & 0xf) == 0xf) {
      return(0);
    }
  }
  code = xalloc(size+8, "code"); /* Space to pad to even no of words */
  info.load = (int)code; info.exec = 0;
  rc = Brazil_File(0xff, in, 256, &info); /* LOAD */
  if (rc != FILFOUND) { return(1); }
  savedload = info.load; savedexec = info.exec;
  if ((info.load & 0xfc000000) != 0) { /* Not a valid address */
    if ((info.load >> 8) == 0xfffff8) { /* Arthur absolute file */
      info.load = 0x8000; info.exec = 0x8000; usesaved = (0==0);
    } else {
      info.exec = info.load;
    }
  } 
  t = clock();
  if ((top = compress(code, size, info.exec-info.load)) != NULL) {
    t = clock() - t;
    rc = (top - (char *)code);
    info.exec = info.load; squeezed = 1;
  } else {
    top = (char *)code + size;
  }
  if (squeezed || (strcmp(in, out) != 0)) { /* Write it out */
    arthurise(&info);
    info.start = (int)code; info.end = (int)top;
    rc = Brazil_File(0, out, 256, &info); /* SAVE */
    if (rc < 0) {
      /* if (verbose) printf("squeeze: failed to write '%s'\n", out); */
    }
  }
  xfree(code);
  return(0); 
}

void help(void)
{
/*
  printf("squeeze takes an executable ARM-code program and tries to\n");
  printf("compress it, producing a program roughly half the size which\n");
  printf("will decompress itself whenever it is run\n\n");
  printf("syntax: squeeze [-v] [-f] <unsqueezed-file> [<squeezed-file>]\n\n");
  printf("  -Force    try harder to squeeze unpleasant things\n");
  printf("  -Verbose  output various messages to say how it is going\n");
 */
}

int main(int argc, char *argv[])
{ int j;
  char *arg;
  char *a = NULL;
  char *b = NULL;
  char c;

  force = 0; 
  curchunk = NULL; 
  heaplwm = (char *)0x03ffffff; heaphwm = NULL; ticks();

  for (j = 1; j < argc; ++j) {
    arg = argv[j];
    if (arg[0] == '-') {
      c = arg[1];
      if (('A' <= c) && (c <= 'Z')) c += ('a' - 'A');
      switch (c) {
        case 'f':  ++force;   break;
        case 'h':  help();    exit(0);
        default:
          exit(1);
      }
    } else { /* a filename */
      if      (a == NULL) a = arg;
      else if (b == NULL) b = arg;
      else {
        exit(1);
      }
    }
  }
  if (a == NULL) { exit(1); }
  if (b == NULL) b = a; /* squeeze it to itself */
  return(squeeze(a, b));
}
