sub import { #{{{
hook(type => "checkconfig", id => "search", call => \&checkconfig);
hook(type => "pagetemplate", id => "search", call => \&pagetemplate);
+ # run last so other needsbuild hooks can modify the list
+ hook(type => "needsbuild", id => "search", call => \&needsbuild,
+ last => 1);
+ hook(type => "filter", id => "search", call => \&filter);
hook(type => "delete", id => "search", call => \&delete);
- hook(type => "change", id => "search", call => \&change);
hook(type => "cgi", id => "search", call => \&cgi);
} # }}}
}
} #}}}
-sub delete (@) { #{{{
- debug(gettext("cleaning xapian search index"));
+my %toindex;
+sub needsbuild ($) { #{{{
+ %toindex = map { pagename($_) => 1 } @{shift()};
+} #}}}
+
+my $scrubber;
+sub filter (@) { #{{{
+ my %params=@_;
+
+ if ($params{page} eq $params{destpage} && $toindex{$params{page}}) {
+ # index page
+ my $db=xapiandb();
+ my $doc=Search::Xapian::Document->new();
+ my $title;
+ if (exists $pagestate{$params{page}}{meta} &&
+ exists $pagestate{$params{page}}{meta}{title}) {
+ $title=$pagestate{$params{page}}{meta}{title};
+ }
+ else {
+ $title=IkiWiki::pagetitle($params{page});
+ }
+
+ # Remove any html from text to be indexed.
+ # TODO: This removes html that is in eg, a markdown pre,
+ # which should not be removed.
+ if (! defined $scrubber) {
+ eval q{use HTML::Scrubber};
+ error($@) if $@;
+ $scrubber=HTML::Scrubber->new(allow => []);
+ }
+ my $toindex=$scrubber->scrub($params{content});
+
+ # Take 512 characters for a sample, then extend it out
+ # if it stopped in the middle of a word.
+ my $size=512;
+ my ($sample)=substr($toindex, 0, $size);
+ if (length($sample) == $size) {
+ my $max=length($toindex);
+ my $next;
+ while ($size < $max &&
+ ($next=substr($toindex, $size++, 1)) !~ /\s/) {
+ $sample.=$next;
+ }
+ }
+ $sample=~s/\n/ /g;
+
+ # data used by omega
+ $doc->set_data(
+ "url=".urlto($params{page}, "")."\n".
+ "sample=$sample\n".
+ "caption=$title\n".
+ "modtime=$IkiWiki::pagemtime{$params{page}}\n".
+ "size=".length($params{content})."\n"
+ );
+
+ my $tg = Search::Xapian::TermGenerator->new();
+ $tg->set_stemmer(new Search::Xapian::Stem("english"));
+ $tg->set_document($doc);
+ $tg->index_text($params{page}, 2);
+ $tg->index_text($title, 2);
+ $tg->index_text($toindex);
+
+ my $pageterm=pageterm($params{page});
+ $doc->add_term($pageterm);
+ $db->replace_document_by_term($pageterm, $doc);
+ }
+
+ return $params{content};
} #}}}
-sub change (@) { #{{{
- debug(gettext("updating xapian search index"));
+sub delete (@) { #{{{
+ my $db=xapiandb();
+ foreach my $page (@_) {
+ $db->delete_document_by_term(pageterm($page));
+ }
} #}}}
sub cgi ($) { #{{{
}
} #}}}
+sub pageterm ($) { #{{{
+ my $page=shift;
+
+ # TODO: check if > 255 char page names overflow term
+ # length; use sha1 if so?
+ return "U:".$page;
+} #}}}
+
+my $db;
+sub xapiandb () { #{{{
+ if (! defined $db) {
+ eval q{
+ use Search::Xapian;
+ use Search::Xapian::WritableDatabase;
+ };
+ error($@) if $@;
+ $db=Search::Xapian::WritableDatabase->new($config{wikistatedir}."/xapian/default",
+ Search::Xapian::DB_CREATE_OR_OPEN());
+ }
+ return $db;
+} #}}}
+
1