From 71d2d930272bcdba0b5d2671506e2386ea416632 Mon Sep 17 00:00:00 2001 From: joey Date: Mon, 23 Apr 2007 18:36:44 +0000 Subject: [PATCH] * Fix aggregator to not warn when a feed contains no body content at all. * Work around bug #420636 by, if XML::Parser crashes, running the feed content though Encode::decode_utf8. --- IkiWiki/Plugin/aggregate.pm | 35 +++++++++++++++---- debian/changelog | 5 ++- doc/bugs/aggregate_plugin_errors.mdwn | 18 ++++++++-- .../aggregate_plugin_errors/discussion.mdwn | 5 ++- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/IkiWiki/Plugin/aggregate.pm b/IkiWiki/Plugin/aggregate.pm index 2295691aa..be2c96369 100644 --- a/IkiWiki/Plugin/aggregate.pm +++ b/IkiWiki/Plugin/aggregate.pm @@ -229,6 +229,8 @@ sub expire () { #{{{ sub aggregate () { #{{{ eval q{use XML::Feed}; error($@) if $@; + eval q{use URI::Fetch}; + error($@) if $@; eval q{use HTML::Entities}; error($@) if $@; @@ -237,6 +239,9 @@ sub aggregate () { #{{{ time - $feed->{lastupdate} >= $feed->{updateinterval}; $feed->{lastupdate}=time; $feed->{newposts}=0; + $feed->{message}=sprintf(gettext("processed ok at %s"), + displaytime($feed->{lastupdate})); + $feed->{error}=0; $IkiWiki::forcerebuild{$feed->{sourcepage}}=1; debug(sprintf(gettext("checking feed %s ..."), $feed->{name})); @@ -251,7 +256,29 @@ sub aggregate () { #{{{ } $feed->{feedurl}=pop @urls; } - my $f=eval{XML::Feed->parse(URI->new($feed->{feedurl}))}; + my $res=URI::Fetch->fetch($feed->{feedurl}); + if (! $res) { + $feed->{message}=URI::Fetch->errstr; + $feed->{error}=1; + debug($feed->{message}); + next; + } + if ($res->status == URI::Fetch::URI_GONE()) { + $feed->{message}=gettext("feed not found"); + $feed->{error}=1; + debug($feed->{message}); + next; + } + my $content=$res->content; + my $f=eval{XML::Feed->parse(\$content)}; + if ($@) { + # One common cause of XML::Feed crashing is a feed + # that contains invalid UTF-8 sequences. Convert + # feed to ascii to try to work around. + $feed->{message}=sprintf(gettext("invalid UTF-8 stripped from feed")); + $content=Encode::decode_utf8($content); + $f=eval{XML::Feed->parse(\$content)}; + } if ($@) { $feed->{message}=gettext("feed crashed XML::Feed!")." ($@)"; $feed->{error}=1; @@ -270,15 +297,11 @@ sub aggregate () { #{{{ feed => $feed, title => defined $entry->title ? decode_entities($entry->title) : "untitled", link => $entry->link, - content => $entry->content->body, + content => defined $entry->content->body ? $entry->content->body : "", guid => defined $entry->id ? $entry->id : time."_".$feed->name, ctime => $entry->issued ? ($entry->issued->epoch || time) : time, ); } - - $feed->{message}=sprintf(gettext("processed ok at %s"), - displaytime($feed->{lastupdate})); - $feed->{error}=0; } } #}}} diff --git a/debian/changelog b/debian/changelog index e4b0fc75f..7822a4eb0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,8 +5,11 @@ ikiwiki (1.51) UNRELEASED; urgency=low * Minor template improvements by Alessandro. * In mercurial backend, untaint ipaddr when using it as the user for the commit. Thanks, Alexander Wirt. Closes: #420428 + * Fix aggregator to not warn when a feed contains no body content at all. + * Work around bug #420636 by, if XML::Parser crashes, running the feed + content though Encode::decode_utf8. - -- Joey Hess Sun, 22 Apr 2007 13:43:49 -0400 + -- Joey Hess Mon, 23 Apr 2007 14:31:57 -0400 ikiwiki (1.50) unstable; urgency=low diff --git a/doc/bugs/aggregate_plugin_errors.mdwn b/doc/bugs/aggregate_plugin_errors.mdwn index 5d8b34f53..57a9869f0 100644 --- a/doc/bugs/aggregate_plugin_errors.mdwn +++ b/doc/bugs/aggregate_plugin_errors.mdwn @@ -13,7 +13,21 @@ When I run ikiwiki with "--aggregate" I get this error: Use of uninitialized value in subroutine entry at /usr/share/perl5/IkiWiki/Plugin/aggregate.pm line 414. Use of uninitialized value in subroutine entry at /usr/share/perl5/IkiWiki/Plugin/aggregate.pm line 414. -Also, feeds from DokuWiki seem to crash the aggregate plugin completely, it's not a completely valid feed but presumably crashing is still bad. The feed I'm seeing this with is http://www.wirelesscommons.org/feed.php +> Fixed, this occurred when a feed did not include any body content tag. +> --[[Joey]] + +Also, feeds from DokuWiki seem to crash the aggregate plugin completely, +it's not a completely valid feed but presumably crashing is still bad. The +feed I'm seeing this with is http://www.wirelesscommons.org/feed.php + +> This is a bug in XML::Parser. Unfortunately, perl does not have a feed +> parser that handles invalid feeds, and in particular, XML::Parser has +> issues with feeds that claim to be encoded in utf-8 and contain invalid +> utf sequences, as well as other encoding issues. See also [[debbug 380426]]. +> Note though that this invalid feed does not really crash the aggregate plugin, +> it just notes that XML::Parser crashed on it and continues. This is the +> best I can do in ikiwiki. I have filed a bug on XML::Parser about this, +> it's [[debbug 420636]]. I've also put in a workaround, so [[done]]. -- System Information: Debian Release: 3.1 @@ -43,4 +57,4 @@ Also, feeds from DokuWiki seem to crash the aggregate plugin completely, it's no Cheers, ---[[AdamShand]] \ No newline at end of file +--[[AdamShand]] diff --git a/doc/bugs/aggregate_plugin_errors/discussion.mdwn b/doc/bugs/aggregate_plugin_errors/discussion.mdwn index 691195595..3425b6d16 100644 --- a/doc/bugs/aggregate_plugin_errors/discussion.mdwn +++ b/doc/bugs/aggregate_plugin_errors/discussion.mdwn @@ -1,3 +1,6 @@ I have the same problem here when I use a feed from googles shared feed. http://www.google.com/reader/public/atom/user/04715560304044435944/state/com.google/broadcast -john \ No newline at end of file +john + +> I cannot reproduce any problem with this feed. Can you provide details? +> --[[Joey]] -- 2.44.0