summaryrefslogtreecommitdiff
path: root/mkhtm2html-1
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2013-05-12 16:34:55 +0200
committerJonas Smedegaard <dr@jones.dk>2013-05-12 16:34:55 +0200
commit562739d75878cb1818c9e07d57e27991085b7e22 (patch)
tree515baf984d554593b1c8f1991dfb104709feb746 /mkhtm2html-1
parent670e67000c542490147afa3c459d5c578ba71371 (diff)
Duplicate styling drop and document headers drop from htm2html-default to html2html-1.
Diffstat (limited to 'mkhtm2html-1')
-rwxr-xr-xmkhtm2html-17
1 files changed, 7 insertions, 0 deletions
diff --git a/mkhtm2html-1 b/mkhtm2html-1
index 2d9e00d..307a584 100755
--- a/mkhtm2html-1
+++ b/mkhtm2html-1
@@ -30,6 +30,10 @@ foreach my $class ( map /(?<=\.)(ft\d+)(?={font-size:6px;)/mg, $_ ) {
s{<P\b[^>]*>\(</P>\s*<P\b[^>]*\sclass="$class">\d+</P>\s*<P\b[^>]*>\) [^<]*</P>\s*}{}mg;
};
+# drop document headers
+s{<HEAD>.*?</HEAD>\s*}{}msg;
+s{</DIV>\n</BODY>\n</HTML>.*?<DIV\b[^>]*>\s*}{}msg;
+
# unwrap similarly styled bolded paragraphs
s{<P\b[^>]*class="([^"]+)"[^>]*><b>[^<]+\K</b></P>\s*<P\b[^>]*class="\1"[^>]*><b>}{ }mg;
@@ -46,6 +50,9 @@ s{<P\b[^>]*>\(([ivx]+)\)</P>}{<H5>$1</H5>}mg;
s{(?<=\S)-(<br/>|</P>\s*<P\b[^>]*>)(?=[[:lower:]])}{}mg;
s{\s*<br/>\s*}{ }mg;
+# drop styling
+s{<P\b[^>]*>}{<P>}mg;
+
write_file( $stem . '.html', $_ );
print "DONE: $0 stem $stem\n";