|
From: <sh...@us...> - 2008-01-27 17:58:48
|
Revision: 56
http://fb2-perl-tools.svn.sourceforge.net/fb2-perl-tools/?rev=56&view=rev
Author: shaplov
Date: 2008-01-27 09:58:54 -0800 (Sun, 27 Jan 2008)
Log Message:
-----------
Editing fb22htmls Step1
Modified Paths:
--------------
trunk/fb2-perl-tools/XSL/fb22htmls.xsl
trunk/fb2-perl-tools/fb2/Convert/Htmls.pm
trunk/fb2-perl-tools/fb22htmls
Modified: trunk/fb2-perl-tools/XSL/fb22htmls.xsl
===================================================================
--- trunk/fb2-perl-tools/XSL/fb22htmls.xsl 2008-01-27 17:52:52 UTC (rev 55)
+++ trunk/fb2-perl-tools/XSL/fb22htmls.xsl 2008-01-27 17:58:54 UTC (rev 56)
@@ -1,5 +1,31 @@
+<!-- Copyright (c) 2004 Dmitry Gribov (GribUser)
+ 2008 Nikolay Shaplov
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -->
+
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0">
- <xsl:output method="html" encoding="windows-1251"/>
+ <xsl:output method="html" encoding="utf-8"/>
<xsl:param name="PageN">1</xsl:param>
<xsl:param name="TotalPages">1</xsl:param>
<xsl:param name="BookTitle">NoName</xsl:param>
Modified: trunk/fb2-perl-tools/fb2/Convert/Htmls.pm
===================================================================
--- trunk/fb2-perl-tools/fb2/Convert/Htmls.pm 2008-01-27 17:52:52 UTC (rev 55)
+++ trunk/fb2-perl-tools/fb2/Convert/Htmls.pm 2008-01-27 17:58:54 UTC (rev 56)
@@ -1,186 +1,214 @@
-#!/usr/bin/perl
-package FB2ToManyHTML;
-
-use XML::Parser;
-use XML::LibXSLT;
-use XML::LibXML;
-use strict;
-no warnings;
-
-my $Mute;
-my $SectionSize;
-my $MinSectionSize;
-
-my @BodyParts;
-my $RootAttrs;
-my $BookTitle;
-
-sub Kolbasim{
- my $FileToParce=shift;
- my $StyleSheet=shift;
- my $OutFileName=shift;
- $SectionSize=shift;
- $MinSectionSize=shift;
- $Mute=shift;
- ($RootAttrs,$BookTitle,@BodyParts)=&SplitBook($FileToParce);
- return TransformParts($StyleSheet,$OutFileName,$RootAttrs,$BookTitle,@BodyParts);
-}
-
-sub SplitBook{
- my $FileToParce=shift;
- my $I;
- my $CurDeepness=0;
- my ($CurPart,$InBinary,$InHead);
- my $Description;
- my @BodyAsArray;
- my $PartSize=0;
- my $PartStarted=1;
- my $InSectionTitle=0;
- my $AllowSectionTitle=0;
- my $CanCutHere=1;
- my $SectionTitle;
- my @ContentParts;
- my $InNotesBody;
- my $RootAttrs;
- my $BookTitle;
- my $InBookTitle;
- my $SplitParser=new XML::Parser(Handlers => {
- Start => sub {
- my $expat=shift;
- my $elem=shift;
- my %Params=@_;
- $I++;
- print "Working element #$I\r" unless $Mute;
-
- $InHead = 1 if $elem eq 'description';
- $InBinary=($elem eq 'description')?1:0;
- $CurPart='' if $elem=~/\Adescription\Z/;
- if ($elem eq 'FictionBook'){
- for (keys(%Params)){
- $RootAttrs.=" $_=\"".xmlescape($Params{$_})."\"" unless $_ eq 'xmlns';
- }
- }
- $InBookTitle=$elem eq 'book-title'?1:0 ;
- unless ($elem eq 'section'){
- if ($elem eq 'title'){
- $Params{'deepness'}=$InNotesBody?5:$CurDeepness;
- $Params{'number'}=scalar @BodyAsArray;
- }
- $CurPart.="<$elem";
- for (keys(%Params)){
- $CurPart.=" $_=\"".xmlescape($Params{$_})."\"";
- }
- $CurPart.=">";
- }else{
- $CurDeepness++;
- }
- $AllowSectionTitle=0 if $elem eq 'poem';
- $InSectionTitle=1 if ($AllowSectionTitle && $elem eq 'title');
- if ($elem=~/\A(title|epigraph|annotation|poem|cite)\Z/){
- $CanCutHere=0;
- }
- if ($elem=~/\A(section|body)\Z/){
-# $CurPart='' unless $InNotesBody;
- $CurPart='' if $elem eq 'body';
- $PartSize=0 if $elem eq 'body';
- $PartStarted=1;
- $AllowSectionTitle=1;
- $SectionTitle='';
- $InNotesBody=1 if ($elem eq 'body' && $Params{'name'}=~/\Anotes\Z/i);
- }
- },
- Char => sub {
- $PartSize+=length($_[1]);
- $CurPart.=xmlescape($_[1]) unless $InBinary;
- $SectionTitle.=xmlescape($_[1]) if $InSectionTitle;
- $BookTitle.=xmlescape($_[1]) if $InBookTitle;
- },
- End => sub {
- my $elem=$_[1];
- $CurPart.="</".$_[1].">" unless $elem=~ /(section|body)/;
- if (((!$InHead && $CanCutHere && $elem eq 'p' && $PartSize>=$SectionSize) ||
- ($elem eq 'section' && $PartSize>=$MinSectionSize) || $elem eq 'description') && !$InNotesBody || $elem eq 'body') {
- my %t=(
- 'parstart'=>$PartStarted,
- 'partcontent'=>$CurPart,
- 'level'=>$CurDeepness
- );
- push(@BodyAsArray,\%t) unless $CurPart=~/\A\s+\Z/;
- $CurPart='';
- $PartSize=0;
- $PartStarted=0;
- $InHead=0 if $_[1] eq 'description';
- }
- $CurDeepness-- if $_[1] eq 'section';
- if ($elem=~/\A(title|epigraph|annotation|poem|cite)\Z/){
- $CanCutHere=1;
- }
- $AllowSectionTitle=0 if $elem eq 'section';
-
- if ($elem eq 'p' && $InSectionTitle && $SectionTitle){
- my %t=('title'=>$SectionTitle,'N'=>(scalar @BodyAsArray), 'deep'=>$CurDeepness);
- push (@ContentParts,\%t);
- $InSectionTitle=0;
- $SectionTitle='';
- $AllowSectionTitle=0;
- }
- }
- });
-
- $SplitParser->parsefile($FileToParce) or die $!;
- $SplitParser=undef;
- for (@ContentParts){
- $BodyAsArray[0]->{'partcontent'}.="<toc-item n=\"".$_->{'N'}."\" deep=\"".$_->{'deep'}."\">".$_->{'title'}."</toc-item>\n";
- }
- return ($RootAttrs,$BookTitle,@BodyAsArray);
-}
-
-sub TransformParts{
- my $StyleSheet=shift;
- my $OutFileName=shift;
- my $RootAttrs=shift;
- my $BookTitle=shift;
- my @Parts=@_;
- my $OutFileSHort=$OutFileName;
- $OutFileSHort=~s/\A(.*[\/\\])?([^\/\\]*)\Z/$2/;
- for (my $I=0;$I<@Parts;$I++){
- my $ItemLength=$Parts[$I]->{'title'};
- print "Generating file ${OutFileName}_$I.html...\n" unless $Mute;
- my $Result=TransformXML("<part$RootAttrs>".$Parts[$I]->{'partcontent'}."</part>",
- $StyleSheet,'PageN'=>"'$I'",
- 'TotalPages'=>'"'.(@Parts-1).'"',
- 'FileName'=>"'$OutFileSHort'",
- 'BookTitle'=>"'$BookTitle'");
- open OUTFILE,">${OutFileName}_$I.html";
- print OUTFILE $Result;
- close OUTFILE;
- }
- return scalar(@Parts);
-}
-
-sub xmlescape {
- my %escapes=(
- '&' => '&',
- '<' => '<',
- '>' => '>',
- '"' => '"',
- "'" => '''
- );
- $b=shift;
- $_=$b;
- s/([&<>'"])/$escapes{$1}/gs;
- $_;
-}
-
-sub TransformXML{
- my $XML=shift;
- my $XSL=shift;
- my $parser = XML::LibXML->new();
- my $xslt = XML::LibXSLT->new();
- my $source = $parser->parse_string($XML);
- my $style_doc = $parser->parse_file($XSL);
- my $stylesheet = $xslt->parse_stylesheet($style_doc);
- my $results = $stylesheet->transform($source,@_);
- $stylesheet->output_string($results);
-}
+#!/usr/bin/perl
+
+# Copyright (c) 2004 Dmitry Gribov (GribUser)
+# 2008 Nikolay Shaplov
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+package fb2::Convert::Htmls;
+
+use XML::Parser;
+use XML::LibXSLT;
+use XML::LibXML;
+use strict;
+no warnings;
+
+my $Mute;
+my $SectionSize;
+my $MinSectionSize;
+
+my @BodyParts;
+my $RootAttrs;
+my $BookTitle;
+
+sub Kolbasim{
+ my $FileToParce=shift;
+ my $StyleSheet=shift;
+ my $OutFileName=shift;
+ $SectionSize=shift;
+ $MinSectionSize=shift;
+ $Mute=shift;
+ ($RootAttrs,$BookTitle,@BodyParts)=&SplitBook($FileToParce);
+ return TransformParts($StyleSheet,$OutFileName,$RootAttrs,$BookTitle,@BodyParts);
+}
+
+sub SplitBook{
+ my $FileToParce=shift;
+ my $I;
+ my $CurDeepness=0;
+ my ($CurPart,$InBinary,$InHead);
+ my $Description;
+ my @BodyAsArray;
+ my $PartSize=0;
+ my $PartStarted=1;
+ my $InSectionTitle=0;
+ my $AllowSectionTitle=0;
+ my $CanCutHere=1;
+ my $SectionTitle;
+ my @ContentParts;
+ my $InNotesBody;
+ my $RootAttrs;
+ my $BookTitle;
+ my $InBookTitle;
+ my $SplitParser=new XML::Parser(Handlers => {
+ Start => sub {
+ my $expat=shift;
+ my $elem=shift;
+ my %Params=@_;
+ $I++;
+ print "Working element #$I\r" unless $Mute;
+
+ $InHead = 1 if $elem eq 'description';
+ $InBinary=($elem eq 'description')?1:0;
+ $CurPart='' if $elem=~/\Adescription\Z/;
+ if ($elem eq 'FictionBook'){
+ for (keys(%Params)){
+ $RootAttrs.=" $_=\"".xmlescape($Params{$_})."\"" unless $_ eq 'xmlns';
+ }
+ }
+ $InBookTitle=$elem eq 'book-title'?1:0 ;
+ unless ($elem eq 'section'){
+ if ($elem eq 'title'){
+ $Params{'deepness'}=$InNotesBody?5:$CurDeepness;
+ $Params{'number'}=scalar @BodyAsArray;
+ }
+ $CurPart.="<$elem";
+ for (keys(%Params)){
+ $CurPart.=" $_=\"".xmlescape($Params{$_})."\"";
+ }
+ $CurPart.=">";
+ }else{
+ $CurDeepness++;
+ }
+ $AllowSectionTitle=0 if $elem eq 'poem';
+ $InSectionTitle=1 if ($AllowSectionTitle && $elem eq 'title');
+ if ($elem=~/\A(title|epigraph|annotation|poem|cite)\Z/){
+ $CanCutHere=0;
+ }
+ if ($elem=~/\A(section|body)\Z/){
+# $CurPart='' unless $InNotesBody;
+ $CurPart='' if $elem eq 'body';
+ $PartSize=0 if $elem eq 'body';
+ $PartStarted=1;
+ $AllowSectionTitle=1;
+ $SectionTitle='';
+ $InNotesBody=1 if ($elem eq 'body' && $Params{'name'}=~/\Anotes\Z/i);
+ }
+ },
+ Char => sub {
+ $PartSize+=length($_[1]);
+ $CurPart.=xmlescape($_[1]) unless $InBinary;
+ $SectionTitle.=xmlescape($_[1]) if $InSectionTitle;
+ $BookTitle.=xmlescape($_[1]) if $InBookTitle;
+ },
+ End => sub {
+ my $elem=$_[1];
+ $CurPart.="</".$_[1].">" unless $elem=~ /(section|body)/;
+ if (((!$InHead && $CanCutHere && $elem eq 'p' && $PartSize>=$SectionSize) ||
+ ($elem eq 'section' && $PartSize>=$MinSectionSize) || $elem eq 'description') && !$InNotesBody || $elem eq 'body') {
+ my %t=(
+ 'parstart'=>$PartStarted,
+ 'partcontent'=>$CurPart,
+ 'level'=>$CurDeepness
+ );
+ push(@BodyAsArray,\%t) unless $CurPart=~/\A\s+\Z/;
+ $CurPart='';
+ $PartSize=0;
+ $PartStarted=0;
+ $InHead=0 if $_[1] eq 'description';
+ }
+ $CurDeepness-- if $_[1] eq 'section';
+ if ($elem=~/\A(title|epigraph|annotation|poem|cite)\Z/){
+ $CanCutHere=1;
+ }
+ $AllowSectionTitle=0 if $elem eq 'section';
+
+ if ($elem eq 'p' && $InSectionTitle && $SectionTitle){
+ my %t=('title'=>$SectionTitle,'N'=>(scalar @BodyAsArray), 'deep'=>$CurDeepness);
+ push (@ContentParts,\%t);
+ $InSectionTitle=0;
+ $SectionTitle='';
+ $AllowSectionTitle=0;
+ }
+ }
+ });
+
+ $SplitParser->parsefile($FileToParce) or die $!;
+ $SplitParser=undef;
+ for (@ContentParts){
+ $BodyAsArray[0]->{'partcontent'}.="<toc-item n=\"".$_->{'N'}."\" deep=\"".$_->{'deep'}."\">".$_->{'title'}."</toc-item>\n";
+ }
+ return ($RootAttrs,$BookTitle,@BodyAsArray);
+}
+
+sub TransformParts{
+ my $StyleSheet=shift;
+ my $OutFileName=shift;
+ my $RootAttrs=shift;
+ my $BookTitle=shift;
+ my @Parts=@_;
+ my $OutFileSHort=$OutFileName;
+ $OutFileSHort=~s/\A(.*[\/\\])?([^\/\\]*)\Z/$2/;
+ for (my $I=0;$I<@Parts;$I++){
+ my $ItemLength=$Parts[$I]->{'title'};
+ print "Generating file ${OutFileName}_$I.html...\n" unless $Mute;
+ my $Result=TransformXML("<part$RootAttrs>".$Parts[$I]->{'partcontent'}."</part>",
+ $StyleSheet,'PageN'=>"'$I'",
+ 'TotalPages'=>'"'.(@Parts-1).'"',
+ 'FileName'=>"'$OutFileSHort'",
+ 'BookTitle'=>"'$BookTitle'");
+ open OUTFILE,">${OutFileName}_$I.html";
+ print OUTFILE $Result;
+ close OUTFILE;
+ }
+ return scalar(@Parts);
+}
+
+sub xmlescape {
+ my %escapes=(
+ '&' => '&',
+ '<' => '<',
+ '>' => '>',
+ '"' => '"',
+ "'" => '''
+ );
+ $b=shift;
+ $_=$b;
+ s/([&<>'"])/$escapes{$1}/gs; #'
+ $_;
+}
+
+sub TransformXML{
+ my $XML=shift;
+ my $XSL=shift;
+ my $parser = XML::LibXML->new();
+ my $xslt = XML::LibXSLT->new();
+ my $source = $parser->parse_string($XML);
+ my $style_doc = $parser->parse_file($XSL);
+ my $stylesheet = $xslt->parse_stylesheet($style_doc);
+ my $results = $stylesheet->transform($source,@_);
+ $stylesheet->output_string($results);
+}
1;
\ No newline at end of file
Modified: trunk/fb2-perl-tools/fb22htmls
===================================================================
--- trunk/fb2-perl-tools/fb22htmls 2008-01-27 17:52:52 UTC (rev 55)
+++ trunk/fb2-perl-tools/fb22htmls 2008-01-27 17:58:54 UTC (rev 56)
@@ -1,12 +1,44 @@
#!/usr/bin/perl
-use FB2ToManyHTML;
+# Copyright (c) 2004 Dmitry Gribov (GribUser),
+# 2008 Nikolay Shaplov
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+use fb2::Convert::Htmls;
+use strict;
+
+my $xsl_file=$ENV{FB2_PERL_TOOLS};
+$xsl_file.="/" if $xsl_file && $xsl_file =~ /[^\/]$/;
+$xsl_file.="XSL/fb22htmls.xsl";
+
my $Mute=0;
my $SectionSize=30000;
my $MinSectionSize=20000;
#=============================================================
-if (!$ARGV[2]){print "\nkolbasorezka.pl by GribUser v 0.01\nUsage:\n\nkolbasorezka.pl <inputfile.fb2> <stylesheet.xsl> <outputfile> [-options]
+if (!$ARGV[1]){print "\nkolbasorezka.pl by GribUser v 0.01\nUsage:\n\nkolbasorezka.pl <inputfile.fb2> <outputfile> [-options]
outputfile name will be used to create new files.
outputfile_page#.html files will be created
@@ -20,9 +52,8 @@
#=============================================================
my $FileToParce=$ARGV[0];
-my $StyleSheet=$ARGV[1];
-my $OutFileName=$ARGV[2];
-for (my $I=3;$I<@ARGV;$I++){
+my $OutFileName=$ARGV[1];
+for (my $I=2;$I<@ARGV;$I++){
$Mute=1 if $ARGV[$I] eq '-mute';
if ($ARGV[$I] eq '-partsize'){
$SectionSize=$ARGV[$I+1] if $ARGV[$I+1];
@@ -35,4 +66,4 @@
}
-FB2ToManyHTML::Kolbasim($FileToParce,$StyleSheet,$OutFileName,$SectionSize,$MinSectionSize,$Mute);
+fb2::Convert::Htmls::Kolbasim($FileToParce,$xsl_file,$OutFileName,$SectionSize,$MinSectionSize,$Mute);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|