]> sipb.mit.edu Git - ikiwiki.git/blobdiff - IkiWiki/Plugin/search.pm
have the xapian stemmer use a language based on LANG
[ikiwiki.git] / IkiWiki / Plugin / search.pm
index aa0a8085e16867c3cc54fbe1aeacd3481ef9a6d3..14bdb8dbc2651c2ae2ec7174e92cad738878a5c2 100644 (file)
@@ -54,6 +54,7 @@ sub pagetemplate (@) { #{{{
 } #}}}
 
 my $scrubber;
+my $stemmer;
 sub index (@) { #{{{
        my %params=@_;
        
@@ -94,16 +95,26 @@ sub index (@) { #{{{
        $sample=~s/\n/ /g;
        
        # data used by omega
+       # Decode html entities in it, since omega re-encodes them.
+       eval q{use HTML::Entities};
        $doc->set_data(
                "url=".urlto($params{page}, "")."\n".
-               "sample=$sample\n".
-               "caption=$title\n".
+               "sample=".decode_entities($sample)."\n".
+               "caption=".decode_entities($title)."\n".
                "modtime=$IkiWiki::pagemtime{$params{page}}\n".
                "size=".length($params{content})."\n"
        );
 
        my $tg = Search::Xapian::TermGenerator->new();
-       $tg->set_stemmer(new Search::Xapian::Stem("english"));
+       if (! $stemmer) {
+               my $langcode=$ENV{LANG} || "en";
+               $langcode=~s/_.*//;
+               eval { $stemmer=Search::Xapian::Stem->new($langcode) };
+               if ($@) {
+                       $stemmer=Search::Xapian::Stem->new("english");
+               }
+       }
+       $tg->set_stemmer($stemmer);
        $tg->set_document($doc);
        $tg->index_text($params{page}, 2);
        $tg->index_text($title, 2);
@@ -119,7 +130,7 @@ sub index (@) { #{{{
 sub delete (@) { #{{{
        my $db=xapiandb();
        foreach my $page (@_) {
-               $db->delete_document_by_term(pageterm($page));
+               $db->delete_document_by_term(pageterm(pagename($page)));
        }
 } #}}}