summaryrefslogtreecommitdiff
path: root/IkiWiki/Plugin/htmlbalance.pm
blob: dcd92055fbebcdeefdbf07d45337e7f0b2f9e360 (plain)
  1. #!/usr/bin/perl
  2. package IkiWiki::Plugin::htmlbalance;
  3. # htmlbalance: Parse and re-serialize HTML to ensure balanced tags
  4. #
  5. # Copyright 2008 Simon McVittie <http://smcv.pseudorandom.co.uk/>
  6. # Licensed under the GNU GPL, version 2, or any later version published by the
  7. # Free Software Foundation
  8. use warnings;
  9. use strict;
  10. use IkiWiki 2.00;
  11. use HTML::TreeBuilder;
  12. use HTML::Entities;
  13. sub import { #{{{
  14. hook(type => "getsetup", id => "htmlbalance", call => \&getsetup);
  15. hook(type => "sanitize", id => "htmlbalance", call => \&sanitize);
  16. } # }}}
  17. sub getsetup () { #{{{
  18. return
  19. plugin => {
  20. safe => 1,
  21. rebuild => undef,
  22. },
  23. } #}}}
  24. sub sanitize (@) { #{{{
  25. my %params=@_;
  26. my $ret = '';
  27. my $tree = HTML::TreeBuilder->new();
  28. $tree->ignore_unknown(0);
  29. $tree->ignore_ignorable_whitespace(0);
  30. $tree->no_space_compacting(1);
  31. $tree->p_strict(1);
  32. $tree->store_comments(0);
  33. $tree->store_declarations(0);
  34. $tree->store_pis(0);
  35. $tree->parse_content($params{content});
  36. my @nodes = $tree->disembowel();
  37. foreach my $node (@nodes) {
  38. if (ref $node) {
  39. $ret .= $node->as_XML();
  40. chomp $ret;
  41. $node->delete();
  42. }
  43. else {
  44. $ret .= encode_entities($node);
  45. }
  46. }
  47. $tree->delete();
  48. return $ret;
  49. } # }}}
  50. 1