From e7a840ed9a817cf4db59c90e680afd89e146b581 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Sun, 16 Nov 2008 18:11:39 +0000 Subject: htmlbalance: new plugin that balances tags by parsing and re-serializing --- IkiWiki/Plugin/htmlbalance.pm | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 IkiWiki/Plugin/htmlbalance.pm (limited to 'IkiWiki/Plugin') diff --git a/IkiWiki/Plugin/htmlbalance.pm b/IkiWiki/Plugin/htmlbalance.pm new file mode 100644 index 000000000..667d73b6c --- /dev/null +++ b/IkiWiki/Plugin/htmlbalance.pm @@ -0,0 +1,57 @@ +#!/usr/bin/perl +package IkiWiki::Plugin::htmlbalance; + +# htmlbalance: Parse and re-serialize HTML to ensure balanced tags +# +# Copyright 2008 Simon McVittie +# Licensed under the GNU GPL, version 2, or any later version published by the +# Free Software Foundation + +use warnings; +use strict; +use IkiWiki 2.00; + +sub import { #{{{ + hook(type => "getsetup", id => "htmlbalance", call => \&getsetup); + hook(type => "sanitize", id => "htmlbalance", call => \&sanitize); +} # }}} + +sub getsetup () { #{{{ + return + plugin => { + safe => 1, + rebuild => undef, + }, +} #}}} + +sub sanitize (@) { #{{{ + my %params=@_; + my $ret = ''; + + eval { + use HTML::TreeBuilder; + use XML::Atom::Util qw(encode_xml); + }; + + if ($@) { + error($@); + return $params{content}; + } + + my $tree = HTML::TreeBuilder->new_from_content($params{content}); + my @nodes = $tree->disembowel(); + foreach my $node (@nodes) { + if (ref $node) { + $ret .= $node->as_XML(); + chomp $ret; + $node->delete(); + } + else { + $ret .= encode_xml($node); + } + } + $tree->delete(); + return $ret; +} # }}} + +1 -- cgit v1.2.3