From db3b72c4822cf9057460d47654c35f0a5115139e Mon Sep 17 00:00:00 2001 From: joey Date: Sat, 28 Oct 2006 05:07:56 +0000 Subject: [PATCH] instead of over and over. Typical speedup is ~4x. Max possible speedup: 8x. * Add "scan" parameter to hook(), which is used to make the hook be called during the scanning pass, as well as the render pass. The meta and tag plugins need to use the new scan parameter, so will any others that modify %links. * Now that links are calculated in a separate pass, it can also precalculate backlinks in one pass, which is O(N^2) instead of the previous code that was O(N^3). A very nice speedup for wikis with lots (thousands) of pages. --- IkiWiki.pm | 4 +- IkiWiki/Plugin/meta.pm | 2 +- IkiWiki/Plugin/tag.pm | 2 +- IkiWiki/Render.pm | 85 ++++++++++++++++++----------- debian/NEWS | 8 +++ debian/changelog | 17 ++++-- doc/plugins/contrib/googlemaps.mdwn | 2 +- doc/plugins/write.mdwn | 14 +++++ doc/roadmap.mdwn | 2 +- doc/todo/optimisations.mdwn | 18 +----- 10 files changed, 96 insertions(+), 58 deletions(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index 80208ef2b..a6869d454 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -446,10 +446,11 @@ sub linkify ($$$) { #{{{ } #}}} my %preprocessing; -sub preprocess ($$$) { #{{{ +sub preprocess ($$$;$) { #{{{ my $page=shift; # the page the data comes from my $destpage=shift; # the page the data will appear in (different for inline) my $content=shift; + my $scan=shift; my $handle=sub { my $escape=shift; @@ -459,6 +460,7 @@ sub preprocess ($$$) { #{{{ return "[[$command $params]]"; } elsif (exists $hooks{preprocess}{$command}) { + return "" if $scan && ! $hooks{preprocess}{$command}{scan}; # Note: preserve order of params, some plugins may # consider it significant. my @params; diff --git a/IkiWiki/Plugin/meta.pm b/IkiWiki/Plugin/meta.pm index 5bcd65837..2e5fd7e76 100644 --- a/IkiWiki/Plugin/meta.pm +++ b/IkiWiki/Plugin/meta.pm @@ -13,7 +13,7 @@ my %author; my %authorurl; sub import { #{{{ - hook(type => "preprocess", id => "meta", call => \&preprocess); + hook(type => "preprocess", id => "meta", call => \&preprocess, scan => 1); hook(type => "filter", id => "meta", call => \&filter); hook(type => "pagetemplate", id => "meta", call => \&pagetemplate); } # }}} diff --git a/IkiWiki/Plugin/tag.pm b/IkiWiki/Plugin/tag.pm index 7a1be6bec..6d22c49fd 100644 --- a/IkiWiki/Plugin/tag.pm +++ b/IkiWiki/Plugin/tag.pm @@ -10,7 +10,7 @@ my %tags; sub import { #{{{ hook(type => "getopt", id => "tag", call => \&getopt); - hook(type => "preprocess", id => "tag", call => \&preprocess); + hook(type => "preprocess", id => "tag", call => \&preprocess, scan => 1); hook(type => "pagetemplate", id => "tag", call => \&pagetemplate); } # }}} diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 026b3582e..da5a5510b 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -7,27 +7,42 @@ use strict; use IkiWiki; use Encode; +my %backlinks; +my $backlinks_calculated=0; + +sub calculate_backlinks () { #{{{ + %backlinks=(); + foreach my $page (keys %links) { + foreach my $link (@{$links{$page}}) { + my $bestlink=bestlink($page, $link); + if (length $bestlink && $bestlink ne $page) { + $backlinks{$bestlink}{$page}=1; + } + } + } + $backlinks_calculated=1; +} #}}} + sub backlinks ($) { #{{{ my $page=shift; - my @links; - foreach my $p (keys %links) { - next if bestlink($page, $p) eq $page; + calculate_backlinks() unless $backlinks_calculated; - if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) { - my $href=abs2rel(htmlpage($p), dirname($page)); + my @links; + return unless $backlinks{$page}; + foreach my $p (keys %{$backlinks{$page}}) { + my $href=abs2rel(htmlpage($p), dirname($page)); - # Trim common dir prefixes from both pages. - my $p_trimmed=$p; - my $page_trimmed=$page; - my $dir; - 1 while (($dir)=$page_trimmed=~m!^([^/]+/)!) && - defined $dir && - $p_trimmed=~s/^\Q$dir\E// && - $page_trimmed=~s/^\Q$dir\E//; - - push @links, { url => $href, page => pagetitle($p_trimmed) }; - } + # Trim common dir prefixes from both pages. + my $p_trimmed=$p; + my $page_trimmed=$page; + my $dir; + 1 while (($dir)=$page_trimmed=~m!^([^/]+/)!) && + defined $dir && + $p_trimmed=~s/^\Q$dir\E// && + $page_trimmed=~s/^\Q$dir\E//; + + push @links, { url => $href, page => pagetitle($p_trimmed) }; } return sort { $a->{page} cmp $b->{page} } @links; @@ -128,6 +143,11 @@ sub scan ($) { #{{{ my $srcfile=srcfile($file); my $content=readfile($srcfile); my $page=pagename($file); + will_render($page, htmlpage($page), 1); + + # Always needs to be done, since filters might add links + # to the content. + $content=filter($page, $content); my @links; while ($content =~ /(? $oldpagemtime{$page} || $forcerebuild{$page}) { debug("scanning $file"); + push @changed, $file; scan($file); } } - # render any updated files - foreach my $file (@files) { - my $page=pagename($file); - - if (! exists $oldpagemtime{$page} || - mtime(srcfile($file)) > $oldpagemtime{$page} || - $forcerebuild{$page}) { - debug("rendering $file"); - render($file); - $rendered{$file}=1; - } + # render changed and new pages + foreach my $file (@changed) { + debug("rendering $file"); + render($file); + $rendered{$file}=1; } # if any files were added or removed, check to see if each page @@ -310,9 +328,8 @@ FILE: foreach my $file (@files) { } } - # Handle backlinks; if a page has added/removed links, update the - # pages it links to. Also handles rebuilding dependant pages. if (%rendered || @del) { + # rebuild dependant pages foreach my $f (@files) { next if $rendered{$f}; my $p=pagename($f); @@ -330,6 +347,8 @@ FILE: foreach my $file (@files) { } } + # handle backlinks; if a page has added/removed links, + # update the pages it links to my %linkchanged; foreach my $file (keys %rendered, @del) { my $page=pagename($file); @@ -364,7 +383,7 @@ FILE: foreach my $file (@files) { } } - # Remove no longer rendered files. + # remove no longer rendered files foreach my $src (keys %rendered) { my $page=pagename($src); foreach my $file (@{$oldrenderedfiles{$page}}) { diff --git a/debian/NEWS b/debian/NEWS index f3556d622..781a32f59 100644 --- a/debian/NEWS +++ b/debian/NEWS @@ -1,3 +1,11 @@ +ikiwiki (1.32) unstable; urgency=low + + There is a change to the plugin interface in this version. Any plugins that + modify data in %links should pass scan => 1 when registering the hook that + does so. + + -- Joey Hess Sat, 28 Oct 2006 00:13:12 -0400 + ikiwiki (1.29) unstable; urgency=low Wikis need to be rebuilt on upgrade to this version. If you listed your wiki diff --git a/debian/changelog b/debian/changelog index e914d40b3..1f0394502 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,11 +1,18 @@ ikiwiki (1.32) UNRELEASED; urgency=low * Add a separate pass to find page links, and only render each page once, - instead of over and over. This is up to 8 times faster than before! - (This could have introduced some subtle bugs, so it needs to be tested - extensively.) - - -- Joey Hess Fri, 27 Oct 2006 23:21:35 -0400 + instead of over and over. Typical speedup is ~4x. Max possible speedup: + 8x. + * Add "scan" parameter to hook(), which is used to make the hook be called + during the scanning pass, as well as the render pass. The meta and tag + plugins need to use the new scan parameter, so will any others that modify + %links. + * Now that links are calculated in a separate pass, it can also + precalculate backlinks in one pass, which is O(N^2) instead of the + previous code that was O(N^3). A very nice speedup for wikis with lots + (thousands) of pages. + + -- Joey Hess Fri, 27 Oct 2006 23:27:29 -0400 ikiwiki (1.31) unstable; urgency=low diff --git a/doc/plugins/contrib/googlemaps.mdwn b/doc/plugins/contrib/googlemaps.mdwn index 58ccf5adc..30f630a2c 100644 --- a/doc/plugins/contrib/googlemaps.mdwn +++ b/doc/plugins/contrib/googlemaps.mdwn @@ -1,5 +1,5 @@ [[template id=plugin name=googlemaps author="Christian Mock"]] -[[tag special-purpose]] +[[tag type/special-purpose]] [[meta title="googlemaps (third-party plugin)"]] `googlemaps` is a plugin that allows using the [Google Maps API][2] diff --git a/doc/plugins/write.mdwn b/doc/plugins/write.mdwn index 8492b1756..5cace0911 100644 --- a/doc/plugins/write.mdwn +++ b/doc/plugins/write.mdwn @@ -30,6 +30,12 @@ hook, a "id" paramter, which should be a unique string for this plugin, and a "call" parameter, which is a reference to a function to call for the hook. +An optional "scan" parameter, if set to a true value, makes the hook be +called during the preliminary scan that ikiwiki makes of updated pages, +before begining to render pages. This parameter should be set to true if +the hook modifies data in `%links`. Note that doing so will make the hook +be run twice per page build, so avoid doing it for expensive hooks. + ## Types of hooks In roughly the order they are called. @@ -64,6 +70,14 @@ Runs on the raw source of a page, before anything else touches it, and can make arbitrary changes. The function is passed named parameters `page` and `content` and should return the filtered content. +### scan + + hook(type => "scan", id => "foo", call => \&scan); + +This is identical to a preprocess hook (see below), except that it is +called in the initial pass that scans pages for data that will be used in +later passes. Scan hooks are the only hook that should modify + ### preprocess Adding a [[PreProcessorDirective]] is probably the most common use of a diff --git a/doc/roadmap.mdwn b/doc/roadmap.mdwn index b393f254f..701365a25 100644 --- a/doc/roadmap.mdwn +++ b/doc/roadmap.mdwn @@ -18,7 +18,7 @@ Released 29 April 2006. * [[Tags]] _(status: fair)_ * Should have fully working [[todo/utf8]] support. _(status: good)_ * [[Optimised_rendering|todo/optimisations]] if possible. Deal with other - scalability issues. _(status: something like 9x speedup 1.0!)_ + scalability issues. _(status: should be faster, need to get numbers)_ * Improved [[todo/html]] stylesheets and templates. * Improved scalable [[logo]]. _(status: done)_ * Support for at other revision control systems aside from svn. diff --git a/doc/todo/optimisations.mdwn b/doc/todo/optimisations.mdwn index 13a270b8f..0eb830cd0 100644 --- a/doc/todo/optimisations.mdwn +++ b/doc/todo/optimisations.mdwn @@ -4,18 +4,6 @@ * Look at splitting up CGI.pm. But note that too much splitting can slow perl down. -* The backlinks code turns out to scale badly to wikis with thousands of - pages. The code is O(N^2)! It's called for each page, and it loops - through all the pages to find backlinks. - - Need to find a way to calculate and cache all the backlinks in one pass, - which could be done in at worst O(N), and possibly less (if they're - stored in the index, it could be constant time). But to do this, there - would need to be a way to invalidate or update the cache in these - situations: - - - A page is added. Note that this can change a backlink to point to - the new page instead of the page it pointed to before. - - A page is deleted. This can also change backlinks that pointed to that - page. - - A page is modified. Links added/removed. +* The backlinks calculation code is still O(N^2) on the number of pages. + If backlinks info were stored in the index file, it would go down to + constant time for iterative builds, though still N^2 for rebuilds. -- 2.44.0