#!/usr/bin/env perl

#    html2mobi, Copyright (C) 2007 Tommy Persson, tpe@ida.liu.se
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

use FindBin qw($RealBin);
use lib "$RealBin";

use MobiHeader;

use XML::Parser::Lite::Tree;
use Palm::PDB;
use Palm::Doc;
use Data::Dumper;
##use Image::Magick;
use GD;
use File::Copy;

use HTML::TreeBuilder;
use Getopt::Mixed;

use constant DOC_UNCOMPRESSED => scalar 1;
use constant DOC_COMPRESSED => scalar 2;
use constant DOC_RECSIZE => scalar 4096;

use strict;

use vars qw ($opt_title $opt_author $opt_htmlfile $opt_mobifile $opt_gentoc
	     $opt_coverimage $opt_noimages $opt_scale $opt_gen3 $opt_pda
	     $opt_covertext $opt_tocfirst $opt_addcoverlink
	     $opt_prefixtitle);

Getopt::Mixed::getOptions ("title=s author=s htmlfile=s mobifile=s gentoc
                            coverimage=s noimages scale=s gen3 pda
                            covertext=s tocfirst addcoverlink
                            prefixtitle=s");

#
# For release 0.02
#
#   - Nesbit litfile conversion, images do not work...
#   - Fix table of content
#

#
# For release 0.03
#
# news argument, wget, ....
#

# 
# Version 4 still gave me trouble with multi book format.
# Version 3 worked if i shrunk the images. Can it be so that
# big images are not supported in version 3 and in version 4 I 
# have not a correct MOBI header.

#
# Is the author information stored anywhere?
#

#
# If there is a first image and no cover image specified use this
# image. Test what happens if we specify another library image (diffferent
# from cover page)
#
# No images in html code, add one link to cover image
#
# Test cover image for simplehtml
#
# Generate cover image from text
#
# Add cover text to existing image
#

#
# Feed it a lit file and automatically convert it
#

#
# If filename is bbc or nytime than download news
# or use a flag --news
#

#
# plucker to mobi? http://cvs.plkr.org/docs/DBFormat.html?revision=HEAD
#

#
# expand html document with links...
# Generate TOC automatically, guide thingy
# Getting images to work...
# Small image in library, 8 record, 180x240 jpeg
# make testhtmlsgentoc, links in non generatec toc is not working
#
# packages needed: XML::Parser::Lite::Tree
#                  Palm::Doc
#
# Debian: libpalm-perl
#         libimage-size-perl
#

# 8 DWord dwType //pub type: 2=book,3=palmdoc,4=audio,news=257,feed=258,magazin e=259 etc
# C DWord dwCodepage //1252=western, 65001 = UTF8. Better not use anything else



my @filenames = @ARGV;

my $devicetype = "gen3";
$devicetype = "gen3" if (defined $opt_gen3);
$devicetype = "pda" if (defined $opt_pda);

my $tree = 0;
my $title = "dummytitle";
my $author = "Author not specified";


my %file_to_tree = ();
my %file_to_title = ();

my $namerefindex = 0;
my %file_to_nameref = ();

my %opf_id_to_href = ();
my %opf_id_to_mediatype = ();
my @opf_manifest_ids = ();
my @opf_spine_ids = ();

my %link_exists = ();

my $record_index = 0;
my %record_to_image_file = ();
my $mobifile = "t.mobi";
my $coverimage = "";   # strangely enough it crashes on gen3 without a 
                       # library cover image last... That is probably
                       # because of other error not releated to images
                       # since simple.mobi does not load on Palm T5

my $rescale_large_images = 1;

my $tochref = "";

my $clit_flag = 0;

if ($#filenames == 0) {
    my $filename = $filenames[0];
    if ($filename =~ /\.lit$/) {
	unpack_lit_file ($filename, "ctmp");
	$filename =~ s/\.lit/.opf/;
	chdir "ctmp";
	$clit_flag = 1;
    }
    if ($filename =~ /\.opf$/) {
	print STDERR "OPFFILE: $filename\n";
	$mobifile = $filename;
	$mobifile =~ s/\.opf/\.mobi/;
	$tree = get_tree_from_opf ($filename);
    } else {
	$mobifile = $filename;
	$mobifile =~ s/\.html/\.mobi/;
	$mobifile =~ s/\.htm/\.mobi/;
	$tree = one_html_file ($filename);
    }
} else {
    $title = "dummycollectiontitle";
    if (defined $opt_title) {
	$title = $opt_title;
    }
    $tree = get_collection ($title, @filenames);
}

if (defined $opt_title) {
    $title = $opt_title;
}

if (defined $opt_prefixtitle) {
    $title = $opt_prefixtitle . $title;
}

print STDERR "BOOKTITLE: $title\n";

if (defined $opt_htmlfile) {
    open HTML, ">$opt_htmlfile" or die "Could not open html file $opt_htmlfile: $!\n";
    print HTML $tree->as_HTML;
    close HTML;
}

if (defined $opt_mobifile) {
    $mobifile = $opt_mobifile;
}

$author = $opt_author if defined $opt_author;


save_mobi_file ($tree, $title, $author, $mobifile);

if ($clit_flag) {
    move ($mobifile, "../");
}

#
# Mobi file related funtions
#

sub save_mobi_file {
    my $html = shift;
    my $title = shift;
    my $author = shift;
    my $filename = shift;

    print STDERR "Saving mobi file: $mobifile\n";

    my $mobi = new Palm::Doc;
    $mobi->{attributes}{"resource"} = 0;
    $mobi->{attributes}{"ResDB"} = 0;

    $mobi->{"name"} = $title;
    $mobi->{"type"} = "BOOK";
    $mobi->{"creator"} = "MOBI";
    $mobi->{"version"} = 0;
    $mobi->{"uniqueIDseed"} = 28;

#    $mobi->{"attributes"}{"resource"} = $data;

#    my $header = Palm::PDB->new_Record();
#    $header->{"categori"} = 0;
#    $header->{"attributes"}{"Dirty"} = 1;
#    $header->{"id"} = 0;
#    $header->{"data"} = $data;
#    $mobi->append_Record ($header);

##    $mobi->text ([$data, $html->as_HTML ()]);
##    $mobi->text ($html->as_HTML ());

#
# From Doc.pm and modified
#

    my $version = DOC_COMPRESSED;
    $mobi->{'records'} = [];
    $mobi->{'resources'} = [];
    my $header = $mobi->append_Record();    
    $header->{'version'} = $version;
    $header->{'length'} = 0;
    $header->{'records'} = 0;
    $header->{'recsize'} = DOC_RECSIZE;

    my $body = $html->as_HTML ();

#    print STDERR "HTMLSIZE: " . length ($body) . "\n";

    my $current_record_index = 1;
    # break the document into record-sized chunks
    for( my $i = 0; $i < length($body); $i += DOC_RECSIZE ) {
	my $record = $mobi->append_Record;
	my $chunk = substr($body,$i,DOC_RECSIZE);
	$record->{'data'} = Palm::Doc::_compress_record( $version, $chunk );
	$record->{'id'} = $current_record_index++;
	$header->{'records'} ++;
    }
    $header->{'length'} += length $body;

    $header->{'recsize'} = $header->{'length'} if $header->{'length'} < DOC_RECSIZE;

    #
    # pack the Palm Doc  header
    #
    $header->{'data'} = pack( 'n xx N n n N',
			      $header->{'version'}, $header->{'length'},
			      $header->{'records'}, $header->{'recsize'}, 0 );
    #
    # Add MOBI header
    #

    my $mh = new MobiHeader;
    $mh->set_title ($title);
    $mh->set_author ($opt_author) if defined $opt_author;
    $mh->set_image_record_index ($current_record_index);

##    my $codepage = 65001; # utf-8
#    my $codepage = 1252; # westerner
#    my $ver = 3;
#    my $type = 2; # book
#    my $mobiheadersize = 0x74;
#    my $unique_id = 17;
#    if ($ver == 4) {
#	$mobiheadersize = 0xE4;
#    }
#
#    my $extended_title_offset = $mobiheadersize + 16;
#    my $extended_title_length = length ($title);
#
#    my $use_extended_header = 1;
#    my $extended_header_flag = 0x00;
#    if ($use_extended_header) {
#	$extended_header_flag = 0x50; # At MOBI+0x70
#    }
#
#    my $exth = "";
#    if ($use_extended_header) {
#	$exth = pack ("a*", "EXTH");
#	my $content = "";
#	my $n_items = 1;
#	$content .= pack ("NNa*", 100, length ($author)+8, $author);
#	$exth .= pack ("NN", length ($content), $n_items);
#	$exth .= $content;
#	$extended_title_offset += length ($exth);
#    }
#
#    
#    # NNNN    Number of char, N1 N2 N3
#    # N3 = Pointer to start of Title
#    # Not true in Alice Case...
#    #
#
#    my $vie1 = 0; # 0x11 Alice 0x0D Rosenbaum
#
#    print STDERR "MOBIHDR: imgrecpointer: $current_record_index\n";
#
#    $header->{'data'} .= pack ("a*NNNNN", "MOBI",
#			       $mobiheadersize, $type, 
#			       $codepage, $unique_id, $ver);
#
#    $header->{'data'} .= pack ("NN", 0xFFFFFFFF, 0xFFFFFFFF);
#    $header->{'data'} .= pack ("NNNN", 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
#    $header->{'data'} .= pack ("NNNN", 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
#    $header->{'data'} .= pack ("NNNN", $vie1, $extended_title_offset, $extended_title_length, 0x0409);
#    $header->{'data'} .= pack ("NNNN", 0, 0, 0x04, $current_record_index);
#    $header->{'data'} .= pack ("NNNN", 0, 0, 0, 0);
#    $header->{'data'} .= pack ("N", $extended_header_flag);
##    print STDERR "MOBIHEADERSIZE: $mobiheadersize " . length ($header->{'data'}). "\n";
#    while (length ($header->{'data'}) < ($mobiheadersize+16)) {
#	print STDERR "LEN: " . length ($header->{'data'}). " - $mobiheadersize
#\n";
#	$header->{'data'} .= pack ("N", 0);
#    }
#    $header->{'data'} .= $exth;
#    $header->{'data'} .= pack ("a*", $title);
#    for (1..48) {
#	$header->{'data'} .= pack ("N", 0);
#    }
#

    $header->{'data'} .= $mh->get_data ();
   

#
# End from Doc.pm
#

    if (not $opt_noimages) {
	for my $i (1..$record_index) {
	    my $filename = $record_to_image_file{$i};
	    print STDERR "New record for image $i: $filename\n";

#
# Is it really correct to assign id and categori?
#	    
	    my $img = Palm::PDB->new_Record();
	    $img->{"categori"} = 0;
	    $img->{"attributes"}{"Dirty"} = 1;
	    $img->{"id"} = $current_record_index++;
	    my $data = get_image_data ($filename);
	    $img->{"data"} = $data;
	    $mobi->append_Record ($img);
	}
	
	if (defined $opt_coverimage) {
	    $coverimage = $opt_coverimage;
	}
	if ($coverimage) {
	    print STDERR "New record for library image: $coverimage\n";
	    my $img = Palm::PDB->new_Record();
	    $img->{"categori"} = 0;
	    $img->{"attributes"}{"Dirty"} = 1;
	    $img->{"id"} = $current_record_index++;
	    my $data = get_cover_image_data ($coverimage);
	    $img->{"data"} = $data;
	    $mobi->append_Record ($img);
	}
    }

#
# Test to add this last record...
#

#    my $last = Palm::PDB->new_Record();
#    $last->{"categori"} = 0;
#    $last->{"attributes"}{"Dirty"} = 1;
#    $last->{"id"} = $current_record_index++;
#    my $data = pack ("N", 0xE98E0D0A);
#    $last->{"data"} = $data;
#    $mobi->append_Record ($last);


    $mobi->Write ($filename);
}

#
# HTML manipulation functions
#


sub one_html_file {
    my $filename = shift;
    print STDERR "ONEHTMLFILE: $filename\n";
    my $tree = new HTML::TreeBuilder ();
    $tree->ignore_unknown (0);
    $tree->parse_file ($filename) || die "Could not find file: $filename\n";

    check_for_links ($tree);

    $title = $filename;
    my $titleelement = $tree->find ("title");
    if ($titleelement) {
	$title = $titleelement->as_trimmed_text ();
    }
    return $tree;
}

sub get_trees {
    my @files = @_;
    my @res = ();
    foreach my $filename (@files) {
	my $tree = new HTML::TreeBuilder ();
	$tree->ignore_unknown (0);
	$tree->parse_file ($filename) || die "Could not find file: $filename\n";
	push @res, $tree;
    }
    return @res;
}

sub get_title {
    my $t = shift;
    my $res = "";
    my $titleelement = $t->find ("title");
    if ($titleelement) {
	$res = $titleelement->as_trimmed_text ();
    }
    return $res;
}

sub get_titles {
    my @trees = @_;
    my @res = ();
    foreach my $t (@trees) {
	my $title = get_title ($t);
	push @res, $title;
    }
    return @res;
}

sub get_toc_tree {
    my @files = @_;
    my $res = HTML::Element->new('ul');
    my @trees = get_trees (@filenames);
    my @titles = get_titles (@trees);
    foreach my $i (0..@titles-1) {
	my $title = $titles[$i];
	my $file = $files[$i];
	my $name  = "tocname-" . $namerefindex++;

	$file_to_tree{$file} = $trees[$i];
	$file_to_title{$file} = $title;
	$file_to_nameref{$file} = $name;

	print STDERR "GETTOCTREETITLE: $title - $file - $name\n";
	my $li = HTML::Element->new('li');
	my $a = HTML::Element->new('a', href => "\#$name");
	$a->push_content ($title);
	$li->push_content ($a);
	$res->push_content ($li);
    }
    return $res;
}

sub get_collection {
    my $title = shift;
    my @files = @_;
    my $toctree = get_toc_tree (@filenames);
#    print $toctree->as_HTML;
    print STDERR "TITLE: $title\n";
    my $html = HTML::Element->new('html');
    my $head = HTML::Element->new('head');
    my $titleel = HTML::Element->new('title');
    $titleel->push_content ($title);
    $head->push_content ($titleel);

    my $body = HTML::Element->new('body',
				  topmargin => "0",
				  leftmargin => "0",
				  bottommargin => "0",
				  rightmargin => "0");
   

    # topmargin="1em" leftmargin="2em" bottommargin="0" rightmargin="0"

    #
    # Title
    #

    my $h1 = HTML::Element->new('h1');
    $h1->push_content ($title);
    $body->push_content ($h1);

    #
    # Table of content
    #

    if (defined $opt_gentoc) {
	my $h2 = HTML::Element->new('h2');
	$h2->push_content ("TABLE OF CONTENTS");
	$body->push_content ($h2);
	$body->push_content ($toctree);
    }

    #
    # All files
    #

    foreach my $file (@files) {
	my $tree = $file_to_tree{$file};
	check_for_links ($tree);
	my $title = $file_to_title{$file};
	my $nameref = $file_to_nameref{$file};
	my $h2 = HTML::Element->new('h2');
	my $a = HTML::Element->new('a', name => "$nameref");
	$a->push_content ("$title");
	$h2->push_content ($a);
	$body->push_content ($h2);
	my $b = $tree->find ("body");
	$body->push_content ($b->content_list());
    }



    $html->push_content ($head);
    $html->push_content ($body);
    return $html;
}

sub check_for_links {
    my $html = shift;
    for (@{$html->extract_links('a', 'img')}) {
	my($link, $element, $attr, $tag) = @$_;
	next if ($link =~ /http/);
	next if ($link =~ /mailto/);
	next if ($link =~ /www/);
#	print STDERR "LINK: $tag $link $attr at ", $element->address(), " ";
	if ($tag eq "a") {
	    my $filename = $element->as_trimmed_text ();
##	    print STDERR "LINKEXISTS $filename -> $link - ";
	    #
	    # Remove possible prefix file name in link
	    #
	    
	    $link =~ s/^.*\#//;
##	    print STDERR "$link\n";

	    $element->attr("href", "\#$link");
	    $link_exists{$link} = 1;
	    next;
	}
	if ($tag eq "img") {
	    my $src = $element->attr("src");
	    $element->attr("src", undef);
	    $record_index++;
	    #
	    # Does not work for more than 9 images
	    #
	    $element->attr("recindex", "0000" . $record_index);
	    $record_to_image_file{$record_index} = $src;
	    next;
	}
	print STDERR "LINK: $tag $link $attr at ", $element->address(), " ";
#	print STDERR $element->as_HTML;
    }
}

#
# OPF related functions
#

sub get_tree_from_opf {
    my $file = shift;
    open OPF, "<$file" or die "Could not open opf file: $file\n";
    local $/;
    my $content = <OPF>;
    my $tree_parser = XML::Parser::Lite::Tree::instance();
    my $opf = $tree_parser->parse($content);

    $title = opf_get_title ($opf);
    # global variable $title, bad...
    print STDERR "OPFTITLE: $title\n";

    parse_manifest ($opf);

    #
    # If cover image not assigned search all files in current dir
    # and see if some file is a coverimage
    #

    if ($coverimage eq "") {
	opendir DIR, ".";
	my @files = readdir (DIR);
	foreach my $f (@files) {
	    if ($f =~ /\.jpg/ or 
		$f =~ /\.JPG/ or 
		$f =~ /\.gif/) {
#		print STDERR "Checking if file is coverimage: $f\n";
		if (is_cover_image ($f)) {
		    $coverimage = $f;
		}
	    }
	}
    }



    parse_spine ($opf);

    parse_guide ($opf);

#    print STDERR Dumper($opf);

    my $html = HTML::Element->new('html');
    my $head = HTML::Element->new('head');

    #
    # Generate guide tag, specific for Mobipocket and is
    # not understood by HTML::TreeBuilder...
    #

    
    my $guide = HTML::Element->new('guide');
    if ($tochref) {
	my $tocref = HTML::Element->new('reference', 
					title=>"Table of Contents",
					type=>"toc",
					href=>"\#$tochref");
	$guide->push_content ($tocref);
    }

    if (defined $opt_addcoverlink) {
	my $coverref = HTML::Element->new('reference', 
					  title=>"Cover",
					  type=>"cover",
					  href=>"\#addedcoverlink");
	$guide->push_content ($coverref);
    }
    $head->push_content ($guide);

    my $titleel = HTML::Element->new('title');
    $titleel->push_content ($title);
    $head->push_content ($titleel);

    #
    # Generate body
    #

    my $body = HTML::Element->new('body');

#				  topmargin => "0",
#				  leftmargin => "0",
#				  bottommargin => "0",
#				  rightmargin => "0");


    my $coverp = HTML::Element->new('p', 
				    id=>"addedcoverlink",
				    align=>"center");
    my $coverimageel = HTML::Element->new('a', 
					  onclick => 
					  "document.goto_page_relative(1)");
    $coverp->push_content ($coverimageel);

    if (defined $opt_addcoverlink) {
	$body->push_content ($coverp);
	$body->push_content (HTML::Element->new('mbp:pagebreak'));
    }

#<p align="center"><a onclick="document.goto_page_relative(1)"><img src="pda_cover.gif" hisrc="pc_cover.gif" /></a></p>

    #
    # Add TOC first also if --tocfirst
    #
    if ($tochref and defined $opt_tocfirst) {
	print STDERR "ADDING TOC FIRST ALSO: $tochref\n";
	my $tree = new HTML::TreeBuilder ();
	$tree->ignore_unknown (0);
	$tree->parse_file ($tochref) || die "Could not find file: $tochref\n";
	check_for_links ($tree);
	my $b = $tree->find ("body");
	$body->push_content ($b->content_list());
	$body->push_content (HTML::Element->new('mbp:pagebreak'));
    }


    #
    # All files in manifest
    #

    foreach my $id (@opf_spine_ids) {
	my $filename = $opf_id_to_href{$id};
	my $mediatype = $opf_id_to_mediatype{$id};

	next unless ($mediatype =~ /text/); # only include text content

	my $tree = new HTML::TreeBuilder ();
	$tree->ignore_unknown (0);
	$tree->parse_file ($filename) || die "Could not find file: $filename\n";

	check_for_links ($tree);

	print STDERR "Adding: $filename - $id\n";

#	my $tree = $file_to_tree{$file};
#	my $title = $file_to_title{$file};
#	my $nameref = $file_to_nameref{$file};
#	my $h2 = HTML::Element->new('h2');
#	my $a = HTML::Element->new('a', name => "$nameref");
#	$a->push_content ("$title");
#	$h2->push_content ($a);
#	$body->push_content ($h2);

##	print STDERR "FILETOLINKCHECK:$filename:\n";
	if ($link_exists{$filename}) {
##	    print STDERR "FILETOLINKCHECK:$filename: SUCCESS\n";
	    my $a = HTML::Element->new('a', name => $filename);
	    $body->push_content ($a);
	}

	my $b = $tree->find ("body");
	$body->push_content ($b->content_list());
    }

    #
    # Check if no images in document and include cover image if it exists
    #

    if (defined $opt_coverimage) {
	$coverimage = $opt_coverimage;
    }

    if (keys %record_to_image_file == 0) {
	if ($coverimage) {
	    print STDERR "NO IMAGES IN BOOK: Adding cover image: $coverimage\n";
	    $record_index++;
	    $record_to_image_file{$record_index} = $coverimage;
	    if (defined $opt_addcoverlink) {
		my $el = HTML::Element->new ('img', recindex => "00001");
		$coverimageel->push_content ($el);
	    }
	}
    }

    #
    #  Fix anchor to positions given by id="III"...
    #
    # filepos="0000057579"
    #

    my @refs = $body->look_down ("href", qr/^\#/);
    push @refs, $head->look_down ("href", qr/^\#/);
    my @hrefs = ();
    my @refels = ();
    my %href_to_ref = ();
    foreach my $r (@refs) {
	$r->attr ("filepos", "0000000000");
	my $key = $r->attr ("href");
	$key =~ s/\#//g;
	push @hrefs, $key;
	push @refels, $r;
#	$r->attr ("href", undef);
    }

    $html->push_content ($head);
    $html->push_content ($body);
    my $data = $html->as_HTML ();
    foreach my $i (0..$#hrefs) {
	my $h = $hrefs[$i];
	my $r = $refels[$i];
	my $searchfor1 = "id=\"$h\"";
	my $searchfor2 = "<a name=\"$h\"";
	
	print STDERR "SEARCHFOR1: $searchfor1\n";
	my $pos = index ($data, $searchfor1);
	if ($pos >= 0) {
	    #
	    # search backwards for <
	    #
	    
	    while (substr ($data, $pos, 1) ne "<") {
		$pos--;
	    }

##	    $pos -=4; # back 4 positions to get to <h2 id=
	    my $form = "0" x (10-length($pos)) . "$pos";
	    print STDERR "POSITION: $pos - $searchfor1 - $form\n";
	    $r->attr ("filepos", "$form");
	} else {
	    print STDERR "SEARCHFOR2: $searchfor2\n";
	    $pos = index ($data, $searchfor2);
	    if ($pos >= 0) {
		my $form = "0" x (10-length($pos)) . "$pos";
		print STDERR "POSITION: $pos - $searchfor2 - $form\n";
		$r->attr ("filepos", "$form");
	    } else {
	    }
	}
    }
    

#    my @anchors = $body->look_down ("id", qr/./);
#    foreach my $a (@anchors) {
#	my $name = $a->attr("id");
#	my $tag = $a->tag ();
#	my $text = $a->as_trimmed_text ();
#	if ($link_exists{$name}) {
#	    $a->delete_content ();
#	    my $ael = HTML::Element->new('a', name => $name);
#	    $ael->push_content ($text);
#	    $a->push_content ($ael);
#	}
#	print STDERR "ANCHORS: $tag - $name - $text\n";
#    }



#    $html->push_content ($head);
#    $html->push_content ($body);
    return $html;
}


sub opf_get_title {
    my $opf = shift;
    my $type = $opf->{"type"};
#    print STDERR "TYPE: $type - ";
    
    if ($type eq "tag") {
	my $name = $opf->{"name"};
#	print STDERR "$name\n";
	if ($name eq "dc:Title") {
	    my $children = $opf->{"children"};
	    return @{$children}[0]->{"content"};
	}
    }

    if ($type eq "data") {
	return "";
	my $content = $opf->{"content"};
	chomp $content;
	chomp $content;
	print STDERR "$content\n";
    }

    if ($type eq "tag" or $type eq "root") {
	my $children = $opf->{"children"};
	foreach my $c (@{$children}) {
	    my $res = opf_get_title ($c);
	    if ($res) {
		return $res;
	    }
	}
    }
    return "";
}

sub parse_manifest {
    my $opf = shift;
    my $type = $opf->{"type"};
#    print STDERR "TYPE: $type - ";

    if ($type eq "tag") {
	my $name = $opf->{"name"};
#	print STDERR "$name\n";
	if ($name eq "manifest") {
	    print STDERR "Init from manifest\n";
	    my $children = $opf->{"children"};
	    foreach my $c (@{$children}) {
		if ($c->{name} eq "item") {
		    my $id = $c->{"attributes"}->{"id"};
		    my $href= $c->{"attributes"}->{"href"};
		    my $mediatype = $c->{"attributes"}->{"media-type"};
		    print STDERR "$id - $href - $mediatype\n";
		    $opf_id_to_href{$id} = $href;
		    $opf_id_to_mediatype{$id} = $mediatype;
		    push @opf_manifest_ids, $id;

		    #
		    # Check if image is coverimage file
		    #

		    if ($mediatype =~ /image/) {
			if (is_cover_image ($href)) {
			    $coverimage = $href;
			}
		    }
		}
	    }
	    
	    return;
	}
    }

    if ($type eq "data") {
	return "";
    }
	
    if ($type eq "tag" or $type eq "root") {
	my $children = $opf->{"children"};
	foreach my $c (@{$children}) {
	    parse_manifest ($c);
	}
    }
}

sub is_cover_image {
    my $file = shift;
    my $res = 0;
    if (not -e $file) {
	die "ERROR: File does not exist: $file";
    }
    my $p = new GD::Image ($file);
    if (not defined $p) {
	print STDERR "Could not read image file: $file\n";
    }
    my ($x, $y) = $p->getBounds();
#    my $x = $p->width;
#    my $y = $p->height;
    if ($x == 510 and $y == 680) {
	print STDERR "GUESSING COVERIMAGE: $file\n";
	$res = 1;
    }
    if ($x == 600 and $y == 800) {
	print STDERR "GUESSING COVERIMAGE: $file\n";
	$res = 1;
    }
    return $res;
}

sub parse_spine {
    my $opf = shift;
    my $type = $opf->{"type"};
#    print STDERR "TYPE: $type - ";

    if ($type eq "tag") {
	my $name = $opf->{"name"};
#	print STDERR "$name\n";
	if ($name eq "spine") {
#	    print STDERR "Init from spine\n";
	    my $children = $opf->{"children"};
	    my %idcheck = ();
	    foreach my $c (@{$children}) {
		if ($c->{name} eq "itemref") {
		    my $idref = $c->{"attributes"}->{"idref"};
		    if ($idcheck{$idref}) {
			print STDERR "WARNING: Spine, duplice idref: $idref\n";
		    } else {
			push @opf_spine_ids, $idref;
			$idcheck{$idref} = 1;
		    }
		}
	    }
	    foreach my $id (@opf_manifest_ids) {
		if (not $idcheck{$id}) {
		    print STDERR "Warning, $id missing from spine, adding\n";
		    push @opf_spine_ids, $id;
		}
	    }
	    
	    return;
	}
    }

    if ($type eq "data") {
	return "";
    }

    if ($type eq "tag" or $type eq "root") {
	my $children = $opf->{"children"};
	foreach my $c (@{$children}) {
	    parse_spine ($c);
	}
    }
}


sub parse_guide {
    my $opf = shift;
    my $type = $opf->{"type"};
#    print STDERR "TYPE: $type - ";

    if ($type eq "tag") {
	my $name = $opf->{"name"};
#	print STDERR "$name\n";
	if ($name eq "guide") {
#	    print STDERR "Init from guide\n";
	    my $children = $opf->{"children"};
	    foreach my $c (@{$children}) {
		if ($c->{name} eq "reference") {
		    my $type = $c->{"attributes"}->{"type"};
#		    print STDERR "TYPE: $type\n";
		    if ($type eq "toc") {
			$tochref = $c->{"attributes"}->{"href"};
#			print STDERR "TOCHREF: $tochref\n";
		    }
		}
	    }
	    return;
	}
    }

    if ($type eq "data") {
	return "";
    }

    if ($type eq "tag" or $type eq "root") {
	my $children = $opf->{"children"};
	foreach my $c (@{$children}) {
	    parse_guide ($c);
	}
    }
}




#
# Misc help functions
#

sub get_image_data {
    my $filename = shift;

    # Let first image be library image
    if (not $coverimage) {
	$coverimage = $filename;
    }

    my $data = "";

    if (not -e $filename) {
	print STDERR "Image file does not exist: $filename\n";
	return $data;
    }

    my $p = new GD::Image ("$filename");
    my ($x, $y) = $p->getBounds();
#    my $x = $p->width;
#    my $y = $p->height;

    #
    # If I do not resize 600x800 images it does not work on Gen3
    #
    # check this one more time, 600x800 gif and jpeg with size
    # less than 64K does not work on Gen3
    #

    if ($rescale_large_images) {
	if ($x > 480) {
	    # width might be the problem...
	    my $scale = 480.0/$x; # 0.99 does not work, 480x640 works
	    $p = scale_gd_image ($p, $scale);
	}
    }

    #
    #   Scale if scale option given
    #   or does it work just setting width?
    #

    my $quality = -1;
    my $size = length (get_gd_image_data ($p, $filename));
    my $maxsize = 65536;
    if ($size > $maxsize) {
	$quality = 100;
	while (length (get_gd_image_data ($p, $filename, $quality)) >
	       $maxsize and $quality >= 0) {
	    $quality -= 10;
	}
	if ($quality < 0) {
	    die "Could not shrink image file size for $filename";
	}
    } 

    if ($y < 640 and $x < 480 and defined $opt_scale) {
	my $scale = $opt_scale;
	$p = scale_gd_image ($p, $scale);
	print STDERR "Rescaling $$scale\n";
    }

    my $data = get_gd_image_data ($p, $filename, $quality);
    return $data;
}

sub get_text_image {
    my $width = shift;
    my $height = shift;
    my $text = shift;
    my $image = Image::Magick->new;
#    $image->Set(size=>"$width x $height");
#    $image->ReadImage('xc:white');
#    $image->Draw (pen => "red",
#		  primitive => "text",
#		  x => 200,
#		  y => 200,
#		  font => "Bookman-DemiItalic",
#		  text => "QQQQ$text, 200, 200",
#		  fill => "black",
#		  pointsize => 40);
#    $image->Draw(pen => 'red', fill => 'red', primitive => 'rectangle',
#		 points => '20,20 100,100');
#    $image->Write (filename => "draw2.jpg");
}

sub get_gd_image_data {
    my $im = shift;
    my $filename = shift;
    my $quality = shift;

    $quality = -1 if not defined $quality;

    #
    # For some strange reason it does not work if using
    # the gif file with size 600x800
    #

#    if ($filename =~ /\.gif/) {
#	return $im->gif ();
#    }

    if ($quality <= 0) {
	return $im->jpeg ();
    } else {
	return $im->jpeg ($quality);
    }
}

sub scale_gd_image {
    my $im = shift;
    my $x = shift;
    my $y = shift;
    my ($w0, $h0) = $im->getBounds();
#    my $w0 = $im->width;
#    my $h0 = $im->height;
    my $w1 = $w0*$x;
    my $h1 = $h0*$x;
##    print STDERR "SCALE GD: $w0 $h0 $w1 $h1\n";
    if (defined $y) {
	$w1 = $x;
	$h1 = $y;
    }
    my $res = new GD::Image ($w1, $h1);
    $res->copyResized ($im, 0, 0, 0, 0, $w1, $h1, $w0, $h0);
    return $res;
}

sub add_text_to_image {
    my $im = shift;
    my $text = shift;
    my $x = $im->Get ("width");
    my $y = $im->Get ("height");

    if (defined $text and $text) {
	print STDERR "DRAW TEXT: $text\n";
	my $textim = get_text_image ($x, $y, $text);
	$im->Draw (primitive => "text",
		   text => $text,
		   points => "50,50",
		   fill => "red",
		   pointsize => 72);
    }
    $im->Write (filename => "draw.jpg");

}

sub get_cover_image_data {
    my $filename = shift;
##    print STDERR "COVERIMAGE: $filename\n";
    my $data = "";

    if (not -e $filename) {
	print STDERR "Image file does not exist: $filename\n";
	return $data;
    }

    my $p = new GD::Image ("$filename");
    my ($x, $y) = $p->getBounds();
#    my $x = $p->width;
#    my $y = $p->height;
##    add_text_to_image ($p, $opt_covertext);
    my $scaled = scale_gd_image ($p, 180, 240);
    print STDERR "Resizing image $x x $y -> 180 x 240 -> scaled.jpg\n";
    return $scaled->jpeg ();
}


#
# lit file functons
#

sub unpack_lit_file {
    my $litfile = shift;
    my $unpackdir = shift;

    print STDERR "Unpack file $litfile in dir $unpackdir\n";

    mkdir $unpackdir;

    opendir DIR, $unpackdir;
    my @files = readdir (DIR);
    foreach my $f (@files) {
	if ($f =~ /^\./) {
	    next;
	}
	if ($f =~ /^\.\./) {
	    next;
	}
#    print STDERR "FILE: $f\n";
	unlink "$unpackdir/$f";
    }

    system ("clit \"$litfile\" $unpackdir") == 0
	or die "system (clit $litfile $unpackdir) failed: $?";

}


=pod

=head1 NAME

html2mobi - A script to convert html files or an opf file to mobi

=head1 SYNOPSIS

html2mobi file.html

html2mobi file1.html file2.html ...

html2mobi file.opf

html2mobi file.lit

=head1 DESCRIPTION

A script to convert html files or an opf file to a mobi format file.

If clit is installed the script will convert a lit file to mobi.

=head1 OPTIONS

=over 4

=item B<--title TITLE>

Specify the title for the book. This overrides the value given in the
opf file.

=item B<--prefixtitle PREFIX>

Add a prefix to the title of the book. Useful for specifying number
for books in series.

=item B<--author AUTHOR>

Specify the author of the book. This overrides the value given in the
opf file. This value is stored in the EXTH part of record 0.

=item B<--mobifile MOBIFILE>

Name of the output file. This overrides the default value.

=item B<--htmlfile HTMLFILE>

Saves the html that is packed into mobi format. This html code contains
Mobipocket specific things that are added automatically. This is mostly
useful for debugging.

=item B<--coverimage IMAGE>

The image to be used as cover in a library listing like the one in
Cybook Gen3. The image will be rescaled to a suitable format (180x240).
If no image is specified the first image in the source files is used.

=item B<--addcoverlink>

Add link to cover image first in main document.

=item B<--gentoc>

For a collection of html files generate the table of contents automatically.

=item B<--tocfirst>

If generating from an opf file make a copy of the toc and place it first.

=item B<--pda>

Scale images to work for pda's (must be used for Alice to work on my Palm T5).

=item B<--scale f>

Scale all images that are smaller then a certain size with scale factor f.

=back

=head1 EXAMPLES

   html2mobi Alice_In_Wonderland.opf

   html2mobi Alice_In_Wonderland.html

   html2mobi Alice_In_Wonderland.lit

=head1 TODO

   - Specify margins with flags

   - Follow local links when given a root html file

   - Get meta information from somewhere...

=head1 BUGS

   - --addcoverlink does not work for The_Railway_Children.mobi

   - Images larger than a certain size less than 600x800 does
     not work on the Gen3. I now resize so that maximum width
     is 480. But 600x800 gif file did work with the demo Alice
     mobi file. So there is a bug here somewhere.

   - Guide specified toc seems to work but maybe it jumps to the
     wrong position since the header is in the middle of the 
     screen when using FBReader.

   - When including the cover image from a link as in the Alice
     example you get two images in the Gen3.

   - Plus a lot more.... this is an alpha version

   


=head1 AUTHOR

Tommy Persson (tpe@ida.liu.se)

=cut




