]> sipb.mit.edu Git - ikiwiki.git/blob - IkiWiki/Plugin/htmlbalance.pm
htmlbalance: don't compact whitespace, and set misc other options
[ikiwiki.git] / IkiWiki / Plugin / htmlbalance.pm
1 #!/usr/bin/perl
2 package IkiWiki::Plugin::htmlbalance;
3
4 # htmlbalance: Parse and re-serialize HTML to ensure balanced tags
5 #
6 # Copyright 2008 Simon McVittie <http://smcv.pseudorandom.co.uk/>
7 # Licensed under the GNU GPL, version 2, or any later version published by the
8 # Free Software Foundation
9
10 use warnings;
11 use strict;
12 use IkiWiki 2.00;
13 use HTML::TreeBuilder;
14 use HTML::Entities;
15
16 sub import { #{{{
17         hook(type => "getsetup", id => "htmlbalance", call => \&getsetup);
18         hook(type => "sanitize", id => "htmlbalance", call => \&sanitize);
19 } # }}}
20
21 sub getsetup () { #{{{
22         return
23                 plugin => {
24                         safe => 1,
25                         rebuild => undef,
26                 },
27 } #}}}
28
29 sub sanitize (@) { #{{{
30         my %params=@_;
31         my $ret = '';
32
33         my $tree = HTML::TreeBuilder->new();
34         $tree->ignore_unknown(0);
35         $tree->ignore_ignorable_whitespace(0);
36         $tree->no_space_compacting(1);
37         $tree->p_strict(1);
38         $tree->store_comments(0);
39         $tree->store_declarations(0);
40         $tree->store_pis(0);
41         $tree->parse_content($params{content});
42         my @nodes = $tree->disembowel();
43         foreach my $node (@nodes) {
44                 if (ref $node) {
45                         $ret .= $node->as_XML();
46                         chomp $ret;
47                         $node->delete();
48                 }
49                 else {
50                         $ret .= encode_entities($node);
51                 }
52         }
53         $tree->delete();
54         return $ret;
55 } # }}}
56
57 1