diff options
author | Jonas Smedegaard <dr@jones.dk> | 2013-05-12 16:34:55 +0200 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2013-05-12 16:34:55 +0200 |
commit | 562739d75878cb1818c9e07d57e27991085b7e22 (patch) | |
tree | 515baf984d554593b1c8f1991dfb104709feb746 | |
parent | 670e67000c542490147afa3c459d5c578ba71371 (diff) |
Duplicate styling drop and document headers drop from htm2html-default to html2html-1.
-rwxr-xr-x | mkhtm2html-1 | 7 | ||||
-rwxr-xr-x | mkhtm2html-default | 4 |
2 files changed, 9 insertions, 2 deletions
diff --git a/mkhtm2html-1 b/mkhtm2html-1 index 2d9e00d..307a584 100755 --- a/mkhtm2html-1 +++ b/mkhtm2html-1 @@ -30,6 +30,10 @@ foreach my $class ( map /(?<=\.)(ft\d+)(?={font-size:6px;)/mg, $_ ) { s{<P\b[^>]*>\(</P>\s*<P\b[^>]*\sclass="$class">\d+</P>\s*<P\b[^>]*>\) [^<]*</P>\s*}{}mg; }; +# drop document headers +s{<HEAD>.*?</HEAD>\s*}{}msg; +s{</DIV>\n</BODY>\n</HTML>.*?<DIV\b[^>]*>\s*}{}msg; + # unwrap similarly styled bolded paragraphs s{<P\b[^>]*class="([^"]+)"[^>]*><b>[^<]+\K</b></P>\s*<P\b[^>]*class="\1"[^>]*><b>}{ }mg; @@ -46,6 +50,9 @@ s{<P\b[^>]*>\(([ivx]+)\)</P>}{<H5>$1</H5>}mg; s{(?<=\S)-(<br/>|</P>\s*<P\b[^>]*>)(?=[[:lower:]])}{}mg; s{\s*<br/>\s*}{ }mg; +# drop styling +s{<P\b[^>]*>}{<P>}mg; + write_file( $stem . '.html', $_ ); print "DONE: $0 stem $stem\n"; diff --git a/mkhtm2html-default b/mkhtm2html-default index e3ca90b..a4b7bc8 100755 --- a/mkhtm2html-default +++ b/mkhtm2html-default @@ -31,7 +31,7 @@ foreach my $class ( map /(?<=\.)(ft\d+)(?={font-size:7px;)/mg, $_ ) { s{\(</P>\s*<P\s[^>]*\sclass="$class">\d+</P>\s*<P\s[^>]*>\)}{}mg; }; -# document headers +# drop document headers s{<HEAD>.*?</HEAD>\s*}{}msg; s{</DIV>\n</BODY>\n</HTML>.*?<DIV\b[^>]*>\s*}{}msg; @@ -58,7 +58,7 @@ s{(?<=class="(ft\d{3})">)[^<]+?\K\s*</P>\s*<P\b[^>]*class="\1"[^>]*>}{ }mg; #s{<P\b[^>]*class="([^"]+)"[^>]*>([^<]+?)\s*</P>\s*(?=<P\b[^>]*class="\1"[^>]*>)}{$2 }mg; s{\s*<br/>\s*}{ }mg; -# styling +# drop styling s{<P\b[^>]*>}{<P>}mg; write_file( $stem . '.html', $_ ); |