diff options
Diffstat (limited to 'report')
-rwxr-xr-x | report/mkmd.sh | 18 | ||||
-rwxr-xr-x | report/normalize-html.pl | 29 |
2 files changed, 47 insertions, 0 deletions
diff --git a/report/mkmd.sh b/report/mkmd.sh new file mode 100755 index 0000000..39ec83d --- /dev/null +++ b/report/mkmd.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +set -e + +#stem="Greens-IslandsofResiliences" +stem="Greens-IslandsofResilience" + +pdftohtml -q -c -s -i -noframes -stdout "$stem.pdf" tmp + +infile=tmp.html + +outfile="${2:-$(basename "$infile" .html).md}" + +#perl ./normalize-html.pl tmp.html | less +perl ./normalize-html.pl tmp.html \ + |pandoc --normalize --no-wrap --parse-raw -f html -t markdown -o "$stem.md" + +pandoc --standalone --toc -f markdown -t html5 -i "$stem.md" -o "$stem.html" diff --git a/report/normalize-html.pl b/report/normalize-html.pl new file mode 100755 index 0000000..7245a4e --- /dev/null +++ b/report/normalize-html.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl + +use File::Slurp; + +my $_ = read_file( $ARGV[0] ) ; + +#s!( )! !g; # filler horisontal space +s!( )+! !g; # filler horisontal space +#s!(\s*<br>\s*)+!<br>!g; # filler vertical space +s!(\s*<br/>\s*)+!<br/>!g; # filler vertical space +s,<br/>(?=(?:</b>)?</p>),,ig; # filler horisontal space +s,<p>\s*</p>,,ig; # filler horisontal space +s!\s*<b>\s*</b>\s*!!g; # filler vertical space +s!<a name="?\d+"?></a>!!gi; # page dividers +#s!<body><img[^>]+>\K<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1 +#s!<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1 +s!\.(ft\d+){font\-size:52px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h1>$2</h1>!is; +s!\.(ft\d+){font\-size:19px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h2 class="front">$2</h2>!isg; +s!\.(ft\d+){font\-size:16px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h3 class="front">$2</h3>!isg; +s!<p\b[^>]*><b>([^<\s][^<]*?)</b></p>!<h2>$1</h2>!isg; +s,<p\b[^>]*>(?!<b>),<p>,isg; +s,<style.*?</style>,,isg; +s,<!-- .*? -->,,isg; +s,<div id="page[^>]*>,,isg; +s,</div>,,isg; + +$ARGV[1] ? write_file( $ARGV[1], $_ ) : print; + +#pandoc --normalize --no-wrap --parse-raw -f html -t markdown -o "$outfile" |