summaryrefslogtreecommitdiff
path: root/report/normalize-html.pl
blob: 7245a4eb57c1b83c03ba64b0cd05fd797ba8ea2b (plain)
  1. #!/usr/bin/perl
  2. use File::Slurp;
  3. my $_ = read_file( $ARGV[0] ) ;
  4. #s!( )! !g; # filler horisontal space
  5. s!( )+! !g; # filler horisontal space
  6. #s!(\s*<br>\s*)+!<br>!g; # filler vertical space
  7. s!(\s*<br/>\s*)+!<br/>!g; # filler vertical space
  8. s,<br/>(?=(?:</b>)?</p>),,ig; # filler horisontal space
  9. s,<p>\s*</p>,,ig; # filler horisontal space
  10. s!\s*<b>\s*</b>\s*!!g; # filler vertical space
  11. s!<a name="?\d+"?></a>!!gi; # page dividers
  12. #s!<body><img[^>]+>\K<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1
  13. #s!<br><b>(.+?)</b>!<h1>$1</h1>!i; # H1
  14. s!\.(ft\d+){font\-size:52px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h1>$2</h1>!is;
  15. s!\.(ft\d+){font\-size:19px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h2 class="front">$2</h2>!isg;
  16. s!\.(ft\d+){font\-size:16px;.*?\K<p\b[^>]*\bclass="\1"[^>]*><b>(.*?)</b></p>!<h3 class="front">$2</h3>!isg;
  17. s!<p\b[^>]*><b>([^<\s][^<]*?)</b></p>!<h2>$1</h2>!isg;
  18. s,<p\b[^>]*>(?!<b>),<p>,isg;
  19. s,<style.*?</style>,,isg;
  20. s,<!-- .*? -->,,isg;
  21. s,<div id="page[^>]*>,,isg;
  22. s,</div>,,isg;
  23. $ARGV[1] ? write_file( $ARGV[1], $_ ) : print;
  24. #pandoc --normalize --no-wrap --parse-raw -f html -t markdown -o "$outfile"