- #!/usr/bin/perl
- use File::Slurp;
- my $_ = read_file( $ARGV[0] ) ;
- #s!( )! !g; # filler horisontal space
- s!( )+! !g; # filler horisontal space
- #s!(\s*<br>\s*)+!<br>!g; # filler vertical space
- s!(\s*<br/>\s*)+!<br/>!g; # filler vertical space
- s,<br/>(?=(?:</b>)?</p>),,ig; # filler horisontal space
- s,<p>\s*</p>,,ig; # filler horisontal space
- s!\s*<b>\s*</b>\s*!!g; # filler vertical space
- s!<a name="?\d+"?></a>!!gi; # page dividers
- #s!<body><img[^>]+>\K<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1
- #s!<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1
- s!\.(ft\d+){font\-size:52px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h1>$2</h1>!is;
- s!\.(ft\d+){font\-size:19px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h2 class="front">$2</h2>!isg;
- s!\.(ft\d+){font\-size:16px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h3 class="front">$2</h3>!isg;
- s!<p\b[^>]*><b>([^<\s][^<]*?)</b></p>!<h2>$1</h2>!isg;
- s,<p\b[^>]*>(?!<b>),<p>,isg;
- s,<style.*?</style>,,isg;
- s,<!-- .*? -->,,isg;
- s,<div id="page[^>]*>,,isg;
- s,</div>,,isg;
- $ARGV[1] ? write_file( $ARGV[1], $_ ) : print;
- #pandoc --normalize --no-wrap --parse-raw -f html -t markdown -o "$outfile"
|