]> sipb.mit.edu Git - ikiwiki.git/blobdiff - IkiWiki/Plugin/aggregate.pm
more robust and faster handling of feedurls
[ikiwiki.git] / IkiWiki / Plugin / aggregate.pm
index 89885464106e5b2c5c6e89c15ff7fbb09f9bc9cf..cb04308e6a7dbf7818b181fc454aad7080807fff 100644 (file)
@@ -74,7 +74,7 @@ sub preprocess (@) { #{{{
        $dir=~s/^\/+//;
        ($dir)=$dir=~/$IkiWiki::config{wiki_file_regexp}/;
        $feed->{dir}=$dir;
-       $feed->{feedurl}=defined $params{feedurl} ? $params{feedurl} : $params{url};
+       $feed->{feedurl}=defined $params{feedurl} ? $params{feedurl} : "";
        $feed->{updateinterval}=defined $params{updateinterval} ? $params{updateinterval} * 60 : 15 * 60;
        $feed->{expireage}=defined $params{expireage} ? $params{expireage} : 0;
        $feed->{expirecount}=defined $params{expirecount} ? $params{expirecount} : 0;
@@ -93,7 +93,7 @@ sub preprocess (@) { #{{{
 
        return "<a href=\"".$feed->{url}."\">".$feed->{name}."</a>: ".
               "<i>".$feed->{message}."</i> (".$feed->{numposts}.
-              " stored posts; ".$feed->{newposts}." new)";
+              " stored posts; ".$feed->{newposts}." new)<br />";
 } # }}}
 
 sub delete (@) { #{{{
@@ -180,7 +180,7 @@ sub aggregate () { #{{{
        eval q{use HTML::Entities};
        die $@ if $@;
 
-FEED:  foreach my $feed (values %feeds) {
+       foreach my $feed (values %feeds) {
                next unless time - $feed->{lastupdate} >= $feed->{updateinterval};
                $feed->{lastupdate}=time;
                $feed->{newposts}=0;
@@ -188,30 +188,38 @@ FEED:     foreach my $feed (values %feeds) {
 
                IkiWiki::debug("checking feed ".$feed->{name}." ...");
 
-               my @urls=XML::Feed->find_feeds($feed->{feedurl});
-               if (! @urls) {
-                       $feed->{message}="could not find feed at ".$feed->{feedurl};
-                       IkiWiki::debug($feed->{message});
-               }
-               foreach my $url (@urls) {
-                       my $f=XML::Feed->parse(URI->new($url));
-                       if (! $f) {
-                               $feed->{message}=XML::Feed->errstr;
+               if (! length $feed->{feedurl}) {
+                       my @urls=XML::Feed->find_feeds($feed->{url});
+                       if (! @urls) {
+                               $feed->{message}="could not find feed at ".$feed->{feedurl};
                                IkiWiki::debug($feed->{message});
-                               next FEED;
+                               next;
                        }
+                       $feed->{feedurl}=pop @urls;
+               }
+               my $f=eval{XML::Feed->parse(URI->new($feed->{feedurl}))};
+               if ($@) {
+                       $feed->{message}="feed crashed XML::Feed! $@";
+                       IkiWiki::debug($feed->{message});
+                       next;
+               }
+               if (! $f) {
+                       $feed->{message}=XML::Feed->errstr;
+                       IkiWiki::debug($feed->{message});
+                       next;
+               }
 
-                       foreach my $entry ($f->entries) {
-                               add_page(
-                                       feed => $feed,
-                                       title => decode_entities($entry->title),
-                                       link => $entry->link,
-                                       content => $entry->content->body,
-                                       guid => defined $entry->id ? $entry->id : time."_".$feed->name,
-                                       ctime => $entry->issued ? ($entry->issued->epoch || time) : time,
-                               );
-                       }
+               foreach my $entry ($f->entries) {
+                       add_page(
+                               feed => $feed,
+                               title => defined $entry->title ? decode_entities($entry->title) : "untitled",
+                               link => $entry->link,
+                               content => $entry->content->body,
+                               guid => defined $entry->id ? $entry->id : time."_".$feed->name,
+                               ctime => $entry->issued ? ($entry->issued->epoch || time) : time,
+                       );
                }
+
                $feed->{message}="processed ok";
        }
 
@@ -220,7 +228,7 @@ FEED:       foreach my $feed (values %feeds) {
 
 sub add_page (@) { #{{{
        my %params=@_;
-
+       
        my $feed=$params{feed};
        my $guid={};
        my $mtime;
@@ -257,7 +265,8 @@ sub add_page (@) { #{{{
        # to avoid unneccessary rebuilding. The mtime from rss cannot be
        # trusted; let's use a digest.
        eval q{use Digest::MD5 'md5_hex'};
-       my $digest=md5_hex($params{content});
+       require Encode;
+       my $digest=md5_hex(Encode::encode_utf8($params{content}));
        return unless ! exists $guid->{md5} || $guid->{md5} ne $digest;
        $guid->{md5}=$digest;