summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xreport/mkmd.sh18
-rwxr-xr-xreport/normalize-html.pl29
2 files changed, 47 insertions, 0 deletions
diff --git a/report/mkmd.sh b/report/mkmd.sh
new file mode 100755
index 0000000..39ec83d
--- /dev/null
+++ b/report/mkmd.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+set -e
+
+#stem="Greens-IslandsofResiliences"
+stem="Greens-IslandsofResilience"
+
+pdftohtml -q -c -s -i -noframes -stdout "$stem.pdf" tmp
+
+infile=tmp.html
+
+outfile="${2:-$(basename "$infile" .html).md}"
+
+#perl ./normalize-html.pl tmp.html | less
+perl ./normalize-html.pl tmp.html \
+ |pandoc --normalize --no-wrap --parse-raw -f html -t markdown -o "$stem.md"
+
+pandoc --standalone --toc -f markdown -t html5 -i "$stem.md" -o "$stem.html"
diff --git a/report/normalize-html.pl b/report/normalize-html.pl
new file mode 100755
index 0000000..7245a4e
--- /dev/null
+++ b/report/normalize-html.pl
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+
+use File::Slurp;
+
+my $_ = read_file( $ARGV[0] ) ;
+
+#s!( )! !g; # filler horisontal space
+s!( )+! !g; # filler horisontal space
+#s!(\s*<br>\s*)+!<br>!g; # filler vertical space
+s!(\s*<br/>\s*)+!<br/>!g; # filler vertical space
+s,<br/>(?=(?:</b>)?</p>),,ig; # filler horisontal space
+s,<p>\s*</p>,,ig; # filler horisontal space
+s!\s*<b>\s*</b>\s*!!g; # filler vertical space
+s!<a name="?\d+"?></a>!!gi; # page dividers
+#s!<body><img[^>]+>\K<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1
+#s!<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1
+s!\.(ft\d+){font\-size:52px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h1>$2</h1>!is;
+s!\.(ft\d+){font\-size:19px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h2 class="front">$2</h2>!isg;
+s!\.(ft\d+){font\-size:16px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h3 class="front">$2</h3>!isg;
+s!<p\b[^>]*><b>([^<\s][^<]*?)</b></p>!<h2>$1</h2>!isg;
+s,<p\b[^>]*>(?!<b>),<p>,isg;
+s,<style.*?</style>,,isg;
+s,<!-- .*? -->,,isg;
+s,<div id="page[^>]*>,,isg;
+s,</div>,,isg;
+
+$ARGV[1] ? write_file( $ARGV[1], $_ ) : print;
+
+#pandoc --normalize --no-wrap --parse-raw -f html -t markdown -o "$outfile"