From 562739d75878cb1818c9e07d57e27991085b7e22 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Sun, 12 May 2013 16:34:55 +0200 Subject: Duplicate styling drop and document headers drop from htm2html-default to html2html-1. --- mkhtm2html-1 | 7 +++++++ mkhtm2html-default | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mkhtm2html-1 b/mkhtm2html-1 index 2d9e00d..307a584 100755 --- a/mkhtm2html-1 +++ b/mkhtm2html-1 @@ -30,6 +30,10 @@ foreach my $class ( map /(?<=\.)(ft\d+)(?={font-size:6px;)/mg, $_ ) { s{]*>\(

\s*]*\sclass="$class">\d+

\s*]*>\) [^<]*

\s*}{}mg; }; +# drop document headers +s{.*?\s*}{}msg; +s{\n\n.*?]*>\s*}{}msg; + # unwrap similarly styled bolded paragraphs s{]*class="([^"]+)"[^>]*>[^<]+\K

\s*]*class="\1"[^>]*>}{ }mg; @@ -46,6 +50,9 @@ s{]*>\(([ivx]+)\)

}{
$1
}mg; s{(?<=\S)-(
|

\s*]*>)(?=[[:lower:]])}{}mg; s{\s*
\s*}{ }mg; +# drop styling +s{]*>}{

}mg; + write_file( $stem . '.html', $_ ); print "DONE: $0 stem $stem\n"; diff --git a/mkhtm2html-default b/mkhtm2html-default index e3ca90b..a4b7bc8 100755 --- a/mkhtm2html-default +++ b/mkhtm2html-default @@ -31,7 +31,7 @@ foreach my $class ( map /(?<=\.)(ft\d+)(?={font-size:7px;)/mg, $_ ) { s{\(

\s*]*\sclass="$class">\d+

\s*]*>\)}{}mg; }; -# document headers +# drop document headers s{.*?\s*}{}msg; s{\n\n.*?]*>\s*}{}msg; @@ -58,7 +58,7 @@ s{(?<=class="(ft\d{3})">)[^<]+?\K\s*

\s*]*class="\1"[^>]*>}{ }mg; #s{]*class="([^"]+)"[^>]*>([^<]+?)\s*

\s*(?=]*class="\1"[^>]*>)}{$2 }mg; s{\s*
\s*}{ }mg; -# styling +# drop styling s{]*>}{

}mg; write_file( $stem . '.html', $_ ); -- cgit v1.2.3