int hackymaxx;
int hackymaxy;
/*
Take an image of a page, and apply shears in X and Y to straighten
lines in the page. The shearing is a "poor man's" approximation
to rotation. The assumption is that the skewed text will still be
acceptable to the accompanying OCR algorithm.
The algorithm is to sample a path across the page, the same as drawing a
line across the page with a DDA. This nearly-horizontal line is slid
vertically over the whole page, and this is repeated for various slopes
of the line. The angle which provides the most intersections between
the line and the white pixels in the image is presumably the angle
where the scanline was able to squeeze between horzontal lines of type.
Since the total number of white vs black pixels on the page remains the
same, no matter what the scanning angle, the discriminating function
has to specifically weigh long runs of white more heavily, so the
score will probably be something like (white-black) or white^2 ...
maybe some sort of root-mean-square algorithm to be determined by
experiment. Whether raw black vs white data is sufficient or whether
we need to specifically count the lengths of runs of white pixels
will be determined by experiment.
The first hack will be crude and assume 1-bit images. Later versions
may use greyscale or colour if needed. Code should accept any input
format including high-res colour - we'll probably use the 'netpbm'
package so that we can work with one standard format and use their
large range of conversion tools (notably "anytopnm") to convert
any sort of graphics file on input. During development we will
probably want immediate graphical feedback but this may be too cumbersome
under the command-line unix I use, so I'll probably generate jpg's to
view on a web page by way of feedback.
Note: we could possibly also find horizontal lines (underlines, rules
etc) with the same algorithm, looking for black rather than white runs.
Perhaps the best approach may be to look for both at once. In fact the
RMS test could be extra useful here as (white-black)^2 will be positive
for an imbalance of black or white pixels.
This is fast code. Time to analyse a page is about equivalent to the
time to load and save the graphics image. No attempt has been made yet
to do bit-level optimisation, and a faster line-drawing algorithm
should speed it up by a factor of say 4 or more.
Once the angle of skew is determined, we can rotate the page by using
essentially the same algorithm; moving across the page horizontally,
following the slope of the DDA. This will give an output page of the
same width as the input page. Actually to be more precise we should
also apply a shear along the other axis, which can be done with
the very same algoritm, just rotated 90deg. In other words, instead
of just moving the horizontal scanning line up the page, we move it
up and also across a couple of pixels to account for the vertical
skew too. The skew is simply the horizontal skew rotated 90deg. This
is a fast way to do rotation; the only problem being that the resulting
bitmap will be slightly stretched in X and Y, by a few pixels. For OCR
this is not an issue. However the code could be adapted to become a
general-purpose rotator by simply adding a post-processing phase where
the bitmap is scaled in X and X by an appropriate factor. Code to
do this (rectilinearly) already exists elsewhere (eg Acorn had
Floyd-Steinberg dithered x/y re-scaling on the ARM years ago). It is
this latter stage which slows the process of rotation down, no matter
how it is implemented; fortunately for us we don't need it.
By carefully chosing the order of processing it should be possible
to update the bitmap in place, but that is probably overkill because
we need to write out an output file anyway and can do the scanning on
the fly as we write to the output.
Note: the complexity of this algorithm is primarily x*y, however
if we scan with slopes whose slant increases by only one pixel per
attempt, we end up with a complexity of x*y*y. A much more sensible
thing to do would be to quantise the scan into say 32 fixed angles,
so that a larger bitmap does not require more passes. The accuracy
of the algorithm shouldn't be overly affected. Perhaps having found
a local maximum with this crude filter, we could fill in the gaps
by doing a +/- scan in the gaps immediately adjacent to the rougher
estimate.
Implementation note: the vertical sliding of the scan line window
across the page is currently in an outer-loop relative to the DDA
procedure itself. In fact we really only need to calculate the pixels
making up the slope of this line just once, because all other lines
will be parallel to it, so we could in fact implement the sliding
as an inner loop inside the DDA procedure, as we move across the
page one pixel at a time.
*/
#include <stdio.h>
#include <stdlib.h>
#include <values.h>
/* based on pnmshear.c - read a portable anymap and shear it by some angle
**
** Copyright (C) 1989, 1991 by Jef Poskanzer.
**
** Permission to use, copy, modify, and distribute this software and its
** documentation for any purpose and without fee is hereby granted, provided
** that the above copyright notice appear in all copies and that both that
** copyright notice and this permission notice appear in supporting
** documentation. This software is provided "as is" without express or
** implied warranty.
*/
#include <math.h>
#include "pnm.h"
#include "shhopt.h"
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif /*M_PI*/
#define SCALE 4096
#define HALFSCALE 2048
struct cmdline_info {
/* All the information the user supplied in the command line,
in a form easy for the program to use.
*/
const char *input_filespec; /* Filespec of input file */
};
FILE* ifp;
xel *xelrow;
xel **xelrows;
xel bgxel;
int rows, cols, format;
int row;
xelval maxval;
struct cmdline_info cmdline;
static void
parse_command_line(int argc, char ** argv,
struct cmdline_info *cmdlineP) {
char *endptr;
optStruct3 opt;
unsigned int option_def_index = 0;
optEntry *option_def = malloc(100*sizeof(optEntry));
opt.opt_table = option_def;
opt.short_allowed = FALSE;
opt.allowNegNum = TRUE;
optParseOptions3(&argc, argv, opt, sizeof(opt), 0);
if (argc-1 < 1) {
cmdlineP->input_filespec = "-";
} else {
cmdlineP->input_filespec = argv[1];
if (argc-1 > 1) {
pm_error("too many arguments (%d). "
"The only argument is the filespec.",
argc-1);
}
}
}
void read_image(int *maxx, int *maxy)
{
/* Read a bitmap, return its size;
Numbers are inclusive, ie bounds are 0:maxx-1, 0:maxy-1 */
ifp = pm_openr( cmdline.input_filespec );
pnm_readpnminit( ifp, &cols, &rows, &maxval, &format );
*maxx = cols; *maxy = rows;
hackymaxx = cols;
hackymaxy = rows;
xelrows = malloc(rows*sizeof(xel *));
for ( row = 0; row < rows; row += 1 ) {
xelrow = pnm_allocrow( cols );
if (row == 0) bgxel = pnm_backgroundxelrow( xelrow, cols, maxval, format );
pnm_readpnmrow( ifp, xelrow, cols, maxval, format );
xelrows[row] = xelrow;
}
}
unsigned int samplepixel(int x, int y)
{
xel *el = &xelrows[y][x];
int rp = el->r&255;
int gp = el->g&255;
int bp = el->b&255;
int c = bp&3;
/* get the value of a pixel. For now, apply threshholding and return 0/1 */
if (c == 3) return(1); /* white */
if (c == 0) return(0); /* black */
return(2); /* our drawn line for display purposes */
}
void setpixel(int x, int y, int px)
{
xel *el = &xelrows[y][x];
el->b = px; el->g = 0; el->r = 0;
}
/*
The code below is a DDA trivially adapted from a line-drawing algorithm;
I have faster algorithms than this - even an anti-aliased one - but
this is rock-solid reliable code that can be trusted during the debugging
phase (and also it appears to be fast enough already!)
*/
int countwhite(int x1,int y1,int x2, int y2)
{
/* calculate a DDA from (xl,yb) to (xr,yt). */
/* instead of plotting pixels, add them up somehow */
int temp,dx,dy,x,y,x_sign,y_sign,flag;
int tot = 0;
dx=abs(x2-x1); // Delta of X
dy=abs(y2-y1); // Delta of Y
if (((dx >=dy) && (x1>x2)) || // Make sure that first coordinate
((dy>dx) && (y1>y2))) // is the one with least value
{
temp=x1;
x1=x2;
x2=temp;
temp=y1;
y1=y2;
y2=temp;
};
if ((y2-y1)<0) y_sign=-1; // The direction into which Y-coord shall travel
else y_sign=1; // Same for X
if ((x2-x1)<0) x_sign=-1; // ---- " ----
else x_sign=1; // ---- " ----
if (dx>=dy) // Which one of the deltas is the greatest one
{
for (x=x1,y=y1,flag=0;x<=x2;x++,flag+=dy) // From x1 to x2
{ // Also increase the
if (flag>=dx) // Increase/decrease // flag (displacement value)
{ // y!
flag-=dx;
y+=y_sign;
};
tot += samplepixel(x,y); // Plot the pixel
};
}
else
{
/* This is the same as above, just with x as y and vice versa.*/
for (x=x1,y=y1,flag=0;y<=y2;y++,flag+=dx)
{
if (flag>=dy)
{
flag-=dy;
x+=x_sign;
};
tot += samplepixel(x,y);
};
};
return(tot);
}
void drawline(int x1,int y1,int x2, int y2)
{
/* calculate a DDA from (xl,yb) to (xr,yt). */
/* instead of plotting pixels, add them up somehow */
int temp,dx,dy,x,y,x_sign,y_sign,flag;
dx=abs(x2-x1); // Delta of X
dy=abs(y2-y1); // Delta of Y
if (((dx >=dy) && (x1>x2)) || // Make sure that first coordinate
((dy>dx) && (y1>y2))) // is the one with least value
{
temp=x1;
x1=x2;
x2=temp;
temp=y1;
y1=y2;
y2=temp;
};
if ((y2-y1)<0) y_sign=-1; // The direction into which Y-coord shall travel
else y_sign=1; // Same for X
if ((x2-x1)<0) x_sign=-1; // ---- " ----
else x_sign=1; // ---- " ----
if (dx>=dy) // Which one of the deltas is the greatest one
{
for (x=x1,y=y1,flag=0;x<=x2;x++,flag+=dy) // From x1 to x2
{ // Also increase the
if (flag>=dx) // Increase/decrease // flag (displacement value)
{ // y!
flag-=dx;
y+=y_sign;
};
setpixel(x,y,2); // Plot the pixel
};
}
else
{
/* This is the same as above, just with x as y and vice versa.*/
for (x=x1,y=y1,flag=0;y<=y2;y++,flag+=dx)
{
if (flag>=dy)
{
flag-=dy;
x+=x_sign;
};
setpixel(x,y,2);
};
};
}
void copypixel(int tox, int toy, int fromx, int fromy)
{
xel *fromel;
xel *toel;
if ((tox == (hackymaxx/2))
|| (fromx < 0) || (fromy < 0)
|| (tox < 0) || (toy < 0)
|| (fromx >= hackymaxx) || (fromy >= hackymaxy)
|| (tox >= hackymaxx) || (toy >= hackymaxy))
{
if (
(fromx < 0) || (fromy < 0)
|| (tox < 0) || (toy < 0)
|| (fromx >= hackymaxx) || (fromy >= hackymaxy)
|| (tox >= hackymaxx) || (toy >= hackymaxy)) fprintf(stderr, "***");
fprintf(stderr, "moving pixel from (%d,%d) to (%d,%d)\n", fromx,fromy, tox,toy);
}
if (
(fromx < 0) || (fromy < 0)
|| (tox < 0) || (toy < 0)
|| (fromx >= hackymaxx) || (fromy >= hackymaxy)
|| (tox >= hackymaxx) || (toy >= hackymaxy)) return;
fromel = &xelrows[fromy][fromx];
toel = &xelrows[toy][tox];
toel->r = fromel->r;
toel->g = fromel->g;
toel->b = fromel->b;
}
void doshear(int x1,int y1,int x2, int y2, int direction, int offset)
{
/* calculate a DDA from (xl,yb) to (xr,yt). */
/* instead of plotting pixels, add them up somehow */
int temp,dx,dy,x,y,x_sign,y_sign,flag,sliding_window;
fprintf(stderr, "doshear line from %d,%d -> %d,%d\n",x1,y1,x2,y2);
fprintf(stderr, "sliding to %d,%d -> %d,%d by steps of %d\n", x1,y1+offset,
x2,y2+offset,direction);
dx=abs(x2-x1); // Delta of X
dy=abs(y2-y1); // Delta of Y
if (((dx >=dy) && (x1>x2)) || // Make sure that first coordinate
((dy>dx) && (y1>y2))) // is the one with least value
{
temp=x1;
x1=x2;
x2=temp;
temp=y1;
y1=y2;
y2=temp;
};
if ((y2-y1)<0) y_sign=-1; // The direction into which Y-coord shall travel
else y_sign=1; // Same for X
if ((x2-x1)<0) x_sign=-1; // ---- " ----
else x_sign=1; // ---- " ----
if (dx>=dy) // Which one of the deltas is the greatest one
{
for (x=x1,y=y1,flag=0;x<=x2;x++,flag+=dy) // From x1 to x2
{ // Also increase the
if (flag>=dx) // Increase/decrease // flag (displacement value)
{ // y!
flag-=dx;
y+=y_sign;
};
for (sliding_window = y; sliding_window != y+offset; sliding_window += direction) {
copypixel(x,sliding_window, x,sliding_window-(direction*abs(y1-y))); // Plot the pixel
}
};
}
else
{
/* This is the same as above, just with x as y and vice versa.*/
for (x=x1,y=y1,flag=0;y<=y2;y++,flag+=dx)
{
if (flag>=dy)
{
flag-=dy;
x+=x_sign;
};
for (sliding_window = y; sliding_window != y+offset; sliding_window += direction) {
copypixel(x,sliding_window, x,sliding_window+(direction*abs(y1-y))); // Plot the pixel
}
};
};
}
int main(int xargc, char **axrgv)
{
long long int besttot;
int bestdelta, newbestdelta;
int y, v, delta, maxx, maxy;
int step;
int **score;
long long int *total;
int argc = 2;
char *argv[3] = {"straighten", "test.pnm", NULL};
pnm_init( &argc, argv );
parse_command_line( argc, argv, &cmdline );
read_image(&maxx, &maxy); /* get width and height of image */
pm_close( ifp );
fprintf(stderr, "Page loaded; size is %dx%d\n", maxx, maxy);
/* calculate shear in vertical pixels for -5deg to +5deg */
/* tan alpha = opp/adj. (opp = Y, adj = x) -> y = x * tan.alpha */
v = (maxx / 16);
/* very crude approximation for tan - approx 6deg. is it enough? */
/* NOTE: some care is taken here to get the boundary conditions right;
be very careful when tweaking this code to avoid +1 errors */
score = malloc(((v*2)+1) * sizeof(int *));
total = malloc(((v*2)+1) * sizeof(long long int));
if (score == NULL || total == NULL) {
fprintf(stderr, "straighten: very low on ram\n"); exit(1);
}
score += v; /* adjust origin, so have -v:v */
total += v;
step = (2*v)/32; /* 32 fixed angles. */
if (step == 0) step = 1;
{int sx, sy, x, y, c;
sx = 470; sy = 539;
for (y = sy; y < sy+21; y++) {
fprintf(stderr, "%04d: ", y);
for (x = sx; x < sx+72; x++) {
c = samplepixel(x, y);
fprintf(stderr, "%c", (c == 0 ? '*' : (c == 1 ? ' ' : '+')));
}
fprintf(stderr, "\n");
}
}
for (delta = -v; delta <= v; delta += step) { /* typically -32:32 */
fprintf(stderr, "p2: delta = %d\n", delta);
score[delta] = malloc(maxy * sizeof(int));
if (score[delta] == NULL) {
fprintf(stderr, "straighten: low on ram #1\n"); exit(2);
}
total[delta] = 0;
for (y = v; y < maxy-v; y++) {
/* tweaking by +/-v is so we don't run off the page */
/* draw a line from 0,0 to maxx,v for all v */
score[delta][y] = countwhite(0,y, maxx-1,y+delta);
total[delta] += (long long int)(score[delta][y] * score[delta][y]);
}
}
/* Choose best score from -v to v-1 and generate image with this skew.
With a little care we can apply the skew either in situ or as we write
the output file, and avoid a second buffer */
besttot = 0; bestdelta = 0;
for (delta = -v; delta <= v; delta += step) {
fprintf(stderr, "total[%d] = %lld\n", delta, total[delta]);
if (total[delta] > besttot) {
besttot = total[delta]; bestdelta = delta;
}
}
fprintf(stderr, "Rough skew angle = %d/%d\n", bestdelta, maxx);
for (delta = bestdelta-step; delta <= bestdelta+step; delta += 1) {
fprintf(stderr, "p3: delta = %d\n", delta);
if (score[delta] == NULL) score[delta] = malloc(maxy * sizeof(int));
if (score[delta] == NULL) {
fprintf(stderr, "straighten: low on ram #2\n"); exit(2);
}
total[delta] = 0;
for (y = v; y < maxy-v; y++) {
/* tweaking by +/-v is so we don't run off the page */
/* draw a line from 0,0 to maxx,v for all v */
score[delta][y] = countwhite(0,y, maxx-1,y+delta);
total[delta] += (long long int)(score[delta][y] * score[delta][y]);
}
}
besttot = 0; newbestdelta = 0;
for (delta = bestdelta-step; delta <= bestdelta+step; delta += 1) {
fprintf(stderr, "total[%d] = %lld\n", delta, total[delta]);
if (total[delta] > besttot) {
besttot = total[delta]; newbestdelta = delta;
}
}
fflush(stderr);
drawline(0, 559, maxx-1, 559+bestdelta);
{int sx, sy, x, y, c;
sx = 470; sy = 539;
for (y = sy; y < sy+21; y++) {
fprintf(stderr, "%04d: ", y);
for (x = sx; x < sx+72; x++) {
c = samplepixel(x, y);
fprintf(stderr, "%c", (c == 0 ? '*' : (c == 1 ? ' ' : '+')));
}
fprintf(stderr, "\n");
}
}
fprintf(stdout, "%s: Fine skew angle = %d/%d\n",
(argv[1] == NULL ? "stdin" : argv[1]), newbestdelta, maxx);
{
FILE *fixed;
fixed = fopen("unstraightened.pnm", "wb");
pnm_writepnminit(fixed, cols, rows, maxval, format, 0 );
for (y = 0; y < maxy; y++) {
pnm_writepnmrow(fixed, xelrows[y], cols, maxval, format, 0 );
}
pm_close(fixed);
}
/* Now apply the skew */
/* these two calls here not yet checked for edge condition accuracy */
if (newbestdelta > 0) {
doshear(0, 0, maxx-1, newbestdelta, 1, maxy-newbestdelta-1);
/* slide from the bottom up */
} else if (newbestdelta < 0) {
doshear(0, maxy-1, maxx-1, maxy+newbestdelta-1, -1, -(maxy+newbestdelta-1));
/* slide from the top down */
}
{
FILE *fixed;
fixed = fopen("straightened.pnm", "wb");
pnm_writepnminit(fixed, cols, rows, maxval, format, 0 );
for (y = 0; y < maxy; y++) {
pnm_writepnmrow(fixed, xelrows[y], cols, maxval, format, 0 );
}
pm_close(fixed);
}
exit(0);
}