#!/usr/bin/env perl

#    Copyright (C) 2007 Tommy Persson, tpe@ida.liu.se
#
#    mobi2html, Copyright (C) 2007 Tommy Persson, tpe@ida.liu.se
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


use HTML::TreeBuilder;
use Palm::PDB;
use Palm::Doc;
use Image::Size;
use Date::Parse;
use Date::Format;
use Getopt::Mixed;

use strict;

use vars qw ($opt_fixhtml $opt_record0);

Getopt::Mixed::getOptions ("fixhtml record0");

my %exth_item_ids = (1 => "drm_server_id",
		   2 => "drm_commerce_id",
		   3 => "drm_ebookbase_book_id",
		   100 => "Author",
		   101 => "Publisher",
		   102 => "Imprint",
		   104 => "ISBN",
		   105 => "Subject",
		   106 => "PublishingDate",
		   107 => "Review",
		   108 => "Contributor",
		   109 => "Rights",
		   110 => "SubjectCode",
		   111 => "Type",
		   112 => "Source",
		   113 => "ASIN",
		   114 => "VersionNumber",
		   115 => "Sample",
		   116 => "StartReading",
		   203 => "hasFakeCover");


my $filename = shift;

my $pdb = new Palm::PDB;
$pdb->Load($filename);

my $name = $pdb->{"name"};
my $version = $pdb->{"version"};
my $type = $pdb->{"type"};
my $creator = $pdb->{"creator"};
my $seed = $pdb->{"uniqueIDseed"};
my $ctime = $pdb->{"ctime"};
my $mtime = $pdb->{"mtime"};
my $sctime = ctime ($ctime);
my $smtime = ctime ($mtime);

print STDERR "Name: $name\n";
print STDERR "Version: $version\n";
print STDERR "Type: $type\n";
print STDERR "Creator: $creator\n";
print STDERR "Seed: $seed\n";
print STDERR "Resdb: " . $pdb->{"attributes"}{"ResDB"} . "\n";
print STDERR "AppInfoDirty: " . $pdb->{"attributes"}{"AppInfoDirty"} . "\n";
print STDERR "ctime: $ctime - $sctime\n";
print STDERR "mtime: $mtime - $smtime\n";
print STDERR "baktime: " . $pdb->{"baktime"} . "\n";

my @records = @{$pdb->{"records"}};
print STDERR "Number of record: " . $#records . "\n";


my $image_index = 0;
my %image_index_to_filename = ();

foreach my $r (@records) {
	my $id = $r->{"id"};
	my $cat = $r->{"category"};
	my $offset = $r->{"offset"};
	my $data = $r->{"data"};
	my $size = length ($data);
	my $filename = "record-$id";
	if ($id == 0) {
	    parse_record_0 ($data);
	}
	print STDERR "Record $id - $cat - $offset - $size\n";
	my ($x, $y) = imgsize(\$data);
	if (defined $x) {
	    $image_index++;
	    $image_index_to_filename{$image_index} = $filename;
	    open DATA, ">$filename";
	    print DATA $data;
	    close DATA;
#	    print STDERR "SIZE: $x $y\n";
	} else {
	    open DATA, ">$filename";
	    print DATA $data;
	    close DATA;
	}
	if (defined $opt_record0) {
	    exit (0);
	}
}

#my @resources = @{$pdb->{"resources"}};
#print STDERR "Number of resources: " . $#resources . "\n";

my $text = $pdb->text;

if (defined $opt_fixhtml) {
    my $tree = HTML::TreeBuilder->new_from_content ($text);
    fix_image_tags ($tree);
    print STDOUT $tree->as_HTML;
} else {
    print $text;
}



$pdb->Write("t.prc");


sub fix_image_tags {
    my $tree = shift;
    my @imgel = $tree->find ("img");
    foreach my $img (@imgel) {
	my $recindex = $img->attr ("recindex");
	my $ind = int ($recindex);
	my $filename = $image_index_to_filename{$ind};
	print STDERR "FIX IMAGE TAGS: $recindex - $ind - $filename\n";
	$img->attr ("recindex", undef);
	$img->attr ("src", $filename);
    }
}

sub parse_record_0 {
    my $rec = shift;
    my $palmdocheader = substr ($rec, 0, 16);
    parse_palmdoc_header ($palmdocheader);
    if ($type eq "BOOK" and $creator eq "MOBI") {
	my $mobiheader = substr ($rec, 16);
	parse_mobi_header ($mobiheader);
    }
}

sub parse_palmdoc_header {
    my $data = shift;
    my ($version, $length, $nrecords, $recsize, $unknown) =
	unpack ("nxxNnnN", $data);
    print STDERR "PDHEADER  Version: $version\n";
    print STDERR "PDHEADER   Length: $length\n";
    print STDERR "PDHEADER NRecords: $nrecords\n";
    print STDERR "PDHEADER  Recsize: $recsize\n";
    print STDERR "PDHEADER  Unknown: $unknown\n";
}

sub parse_mobi_header {
    my $data = shift;
    my ($doctype, $length, $type, $codepage, $uniqueid, $ver) =
	unpack ("a4NNNNN", $data);
    my ($exthflg) = unpack ("N", substr ($data, 0x70));
    print STDERR "MOBIHEADER doctype: $doctype\n";
    print STDERR "MOBIHEADER  length: $length\n";
    print STDERR "MOBIHEADER    type: $type\n";
    print STDERR "MOBIHEADER   codep: $codepage\n";
    print STDERR "MOBIHEADER  uniqid: $uniqueid\n";
    print STDERR "MOBIHEADER     ver: $ver\n";
    print STDERR "MOBIHEADER exthflg: $exthflg\n";

    if ($exthflg & 0x40) {
	my $exth = substr ($data, $length);
	parse_mobi_exth ($exth);
    }
}

sub parse_mobi_exth {
    my $data = shift;
    my ($doctype, $len, $n_items) = unpack ("a4NN", $data);
    print STDERR "EXTH doctype: $doctype\n";
    print STDERR "EXTH  length: $len\n";
    print STDERR "EXTH n_items: $n_items\n";
    my $pos = 12;
    foreach (1..$n_items) {
	my ($id, $size) = unpack ("NN", substr ($data, $pos));
	my $contlen = $size-8;
	my ($id, $size, $content) = unpack ("NNa$contlen", substr ($data, $pos));
	my $hid = sprintf ("%x", $id);
	my $hsize = sprintf ("%x", $size);
	print STDERR "ITEM: $hid $hsize - $id $size - $content\n";
	$pos += $size;
    }
}

=pod

=head1 NAME

mobi2html - A script to convert a mobi file to html

=head1 SYNOPSIS

mobi2html file1.mobi > file2.html

=head1 DESCRIPTION

A script to convert a mobi file to html.

=head1 OPTIONS

=over 4

=item B<--fixhtml>

Fix the unpacked html code so that it works in a browser.

=back

=head1 EXAMPLES

   mobi2html Alice_In_Wonderland.mobi > exp.html

=head1 AUTHOR

Tommy Persson (tpe@ida.liu.se)

=cut




