summaryrefslogtreecommitdiff
path: root/IkiWiki/Plugin/htmlscrubber.pm
blob: bc613f92477ef55ef384f8a1bbf2887670c0e7a7 (plain)
  1. #!/usr/bin/perl
  2. package IkiWiki::Plugin::htmlscrubber;
  3. use warnings;
  4. use strict;
  5. use IkiWiki 2.00;
  6. sub import { #{{{
  7. hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
  8. } # }}}
  9. sub sanitize (@) { #{{{
  10. my %params=@_;
  11. return scrubber()->scrub($params{content});
  12. } # }}}
  13. my $_scrubber;
  14. sub scrubber { #{{{
  15. return $_scrubber if defined $_scrubber;
  16. eval q{use HTML::Scrubber};
  17. error($@) if $@;
  18. # Lists based on http://feedparser.org/docs/html-sanitization.html
  19. # With html 5 video and audio tags added.
  20. $_scrubber = HTML::Scrubber->new(
  21. allow => [qw{
  22. a abbr acronym address area b big blockquote br br/
  23. button caption center cite code col colgroup dd del
  24. dfn dir div dl dt em fieldset font form h1 h2 h3 h4
  25. h5 h6 hr hr/ i img input ins kbd label legend li map
  26. menu ol optgroup option p p/ pre q s samp select small
  27. span strike strong sub sup table tbody td textarea
  28. tfoot th thead tr tt u ul var
  29. video audio
  30. }],
  31. default => [undef, { (
  32. map { $_ => 1 } qw{
  33. abbr accept accept-charset accesskey action
  34. align alt axis border cellpadding cellspacing
  35. char charoff charset checked cite class
  36. clear cols colspan color compact coords
  37. datetime dir disabled enctype for frame
  38. headers height href hreflang hspace id ismap
  39. label lang longdesc maxlength media method
  40. multiple name nohref noshade nowrap prompt
  41. readonly rel rev rows rowspan rules scope
  42. selected shape size span src start summary
  43. tabindex target title type usemap valign
  44. value vspace width
  45. poster autoplay loopstart loopend end
  46. playcount controls
  47. } ),
  48. "/" => 1, # emit proper <hr /> XHTML
  49. }],
  50. );
  51. return $_scrubber;
  52. } # }}}
  53. 1