]> sipb.mit.edu Git - ikiwiki.git/commitdiff
Optimize away most expensive file prune calls, when refreshing
authorJoey Hess <joey@gnu.kitenet.net>
Fri, 9 Oct 2009 00:27:56 +0000 (20:27 -0400)
committerJoey Hess <joey@gnu.kitenet.net>
Fri, 9 Oct 2009 00:27:56 +0000 (20:27 -0400)
Benchmarking refresh of a a wiki with 25 thousand pages showed
file_pruned() using most of the time. But, when refreshing, ikiwiki already
knows about nearly all the files. So we can skip calling file_pruned() for
those it knows about. While tricky to do, this sped up a refresh (that
otherwise does no work) by 10-50%.

IkiWiki.pm
IkiWiki/Render.pm
debian/changelog

index 97d84c9deb175b694bd303b10ff75fba6b95bb08..d667e7e10fe43c9162b9dd54a0105c41c80c3afe 100644 (file)
@@ -32,7 +32,6 @@ our $installdir='/usr'; # INSTALLDIR_AUTOREPLACE done by Makefile, DNE
 use Memoize;
 memoize("abs2rel");
 memoize("pagespec_translate");
-memoize("file_pruned");
 memoize("template_file");
 
 sub getsetup () {
@@ -1770,14 +1769,18 @@ sub add_depends ($$) {
        return 1;
 }
 
-sub file_pruned ($$) {
-       require File::Spec;
-       my $file=File::Spec->canonpath(shift);
-       my $base=File::Spec->canonpath(shift);
-       $file =~ s#^\Q$base\E/+##;
+sub file_pruned ($;$) {
+       my $file=shift;
+       if (@_) {
+               require File::Spec;
+               $file=File::Spec->canonpath($file);
+               my $base=File::Spec->canonpath(shift);
+               return if $file eq $base;
+               $file =~ s#^\Q$base\E/+##;
+       }
 
        my $regexp='('.join('|', @{$config{wiki_file_prune_regexps}}).')';
-       return $file =~ m/$regexp/ && $file ne $base;
+       return $file =~ m/$regexp/;
 }
 
 sub define_gettext () {
index 246c2260d7748e9fe2b3522d73035564e0fb6754..a8236b954fbbc334696542e1e61f17ff6f617bc6 100644 (file)
@@ -279,24 +279,26 @@ sub find_src_files () {
        find({
                no_chdir => 1,
                wanted => sub {
-                       $_=decode_utf8($_);
-                       if (file_pruned($_, $config{srcdir})) {
+                       my $file=decode_utf8($_);
+                       $file=~s/^\Q$config{srcdir}\E\/?//;
+                       my $page = pagename($file);
+                       if (! exists $pagesources{$page} &&
+                           file_pruned($file)) {
                                $File::Find::prune=1;
+                               return;
                        }
-                       elsif (! -l $_ && ! -d _) {
-                               my ($f)=/$config{wiki_file_regexp}/; # untaint
-                               if (! defined $f) {
-                                       warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
-                               }
-                               else {
-                                       $f=~s/^\Q$config{srcdir}\E\/?//;
-                                       push @files, $f;
-                                       my $pagename = pagename($f);
-                                       if ($pages{$pagename}) {
-                                               debug(sprintf(gettext("%s has multiple possible source pages"), $pagename));
-                                       }
-                                       $pages{$pagename}=1;
+                       return if -l $_ || -d _ || ! length $file;
+
+                       my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
+                       if (! defined $f) {
+                               warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
+                       }
+                       else {
+                               push @files, $f;
+                               if ($pages{$page}) {
+                                       debug(sprintf(gettext("%s has multiple possible source pages"), $page));
                                }
+                               $pages{$page}=1;
                        }
                },
        }, $config{srcdir});
@@ -304,27 +306,28 @@ sub find_src_files () {
                find({
                        no_chdir => 1,
                        wanted => sub {
-                               $_=decode_utf8($_);
-                               if (file_pruned($_, $dir)) {
+                               my $file=decode_utf8($_);
+                               $file=~s/^\Q$dir\E\/?//;
+                               my $page=pagename($file);
+                               if (! exists $pagesources{$page} &&
+                                   file_pruned($file)) {
                                        $File::Find::prune=1;
+                                       return;
                                }
-                               elsif (! -l $_ && ! -d _) {
-                                       my ($f)=/$config{wiki_file_regexp}/; # untaint
-                                       if (! defined $f) {
-                                               warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
-                                       }
-                                       else {
-                                               $f=~s/^\Q$dir\E\/?//;
-                                               # avoid underlaydir
-                                               # override attacks; see
-                                               # security.mdwn
-                                               if (! -l "$config{srcdir}/$f" && 
-                                                   ! -e _) {
-                                                       my $page=pagename($f);
-                                                       if (! $pages{$page}) {
-                                                               push @files, $f;
-                                                               $pages{$page}=1;
-                                                       }
+                               return if -l $_ || -d _ || ! length $file;
+
+                               my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
+                               if (! defined $f) {
+                                       warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
+                               }
+                               else {
+                                       # avoid underlaydir override
+                                       # attacks; see security.mdwn
+                                       if (! -l "$config{srcdir}/$f" && 
+                                           ! -e _) {
+                                               if (! $pages{$page}) {
+                                                       push @files, $f;
+                                                       $pages{$page}=1;
                                                }
                                        }
                                }
index ca5409af7949f7a7e6eb900ab697892c57134e25..6c435306537eb82a7d55c3a28b7b8022abba3e07 100644 (file)
@@ -10,6 +10,8 @@ ikiwiki (3.14159266) UNRELEASED; urgency=low
   * mirrorlist: Display nothing if list is empty.
   * Fix a bug that could lead to duplicate links being recorded
     for tags.
+  * Optimize away most expensive file prune calls, when refreshing,
+    by only checking new files.
 
  -- Joey Hess <joeyh@debian.org>  Sun, 27 Sep 2009 17:40:03 -0400