summaryrefslogtreecommitdiff
path: root/IkiWiki/Plugin/htmlbalance.pm
blob: 26f8e494bf537dba9f2b0c967c71fb9f26067b84 (plain)
  1. #!/usr/bin/perl
  2. package IkiWiki::Plugin::htmlbalance;
  3. # htmlbalance: Parse and re-serialize HTML to ensure balanced tags
  4. #
  5. # Copyright 2008 Simon McVittie <http://smcv.pseudorandom.co.uk/>
  6. # Licensed under the GNU GPL, version 2, or any later version published by the
  7. # Free Software Foundation
  8. use warnings;
  9. use strict;
  10. use IkiWiki 3.00;
  11. use HTML::Entities;
  12. sub import {
  13. hook(type => "getsetup", id => "htmlbalance", call => \&getsetup);
  14. hook(type => "sanitize", id => "htmlbalance", call => \&sanitize);
  15. }
  16. sub getsetup () {
  17. return
  18. plugin => {
  19. safe => 1,
  20. rebuild => undef,
  21. },
  22. }
  23. sub sanitize (@) {
  24. my %params=@_;
  25. my $ret = '';
  26. eval q{use HTML::TreeBuilder};
  27. error $@ if $@;
  28. my $tree = HTML::TreeBuilder->new();
  29. $tree->ignore_unknown(0);
  30. $tree->ignore_ignorable_whitespace(0);
  31. $tree->no_space_compacting(1);
  32. $tree->p_strict(1);
  33. $tree->store_comments(0);
  34. $tree->store_declarations(0);
  35. $tree->store_pis(0);
  36. $tree->parse_content($params{content});
  37. my @nodes = $tree->disembowel();
  38. foreach my $node (@nodes) {
  39. if (ref $node) {
  40. $ret .= $node->as_XML();
  41. chomp $ret;
  42. $node->delete();
  43. }
  44. else {
  45. $ret .= encode_entities($node);
  46. }
  47. }
  48. $tree->delete();
  49. return $ret;
  50. }
  51. 1