From fe4f176f725b069ac74561600bba982c2d9ad607 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 18 Jun 2009 15:54:53 +0100 Subject: Optimize the dependencies list On a large wiki you can spend a lot of time reading through large lists of dependencies to see whether files need to be rebuilt (album, with its one-page-per-photo arrangement, suffers particularly badly from this). The dependency list is currently a single pagespec, but it's not used like a normal pagespec - in practice, it's a list of pagespecs joined with the "or" operator. Accordingly, change it to be stored as a list of pagespecs. On a wiki with many tagged photo albums, this reduces the time to refresh after `touch tags/*.mdwn` from about 31 to 25 seconds. Getting the benefit of this change on an existing wiki requires a rebuild. --- IkiWiki.pm | 19 +++++++++++++------ IkiWiki/Render.pm | 24 +++++++++++++----------- ikiwiki-transition | 2 +- t/index.t | 12 ++++++------ 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index 43ffb1fd8..3e94c8a25 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -1507,8 +1507,11 @@ sub loadindex () { $links{$page}=$d->{links}; $oldlinks{$page}=[@{$d->{links}}]; } - if (exists $d->{depends}) { - $depends{$page}=$d->{depends}; + if (exists $d->{dependslist}) { + $depends{$page}=$d->{dependslist}; + } + elsif (exists $d->{depends}) { + $depends{$page}=[$d->{depends}]; } if (exists $d->{state}) { $pagestate{$page}=$d->{state}; @@ -1554,7 +1557,8 @@ sub saveindex () { }; if (exists $depends{$page}) { - $index{page}{$src}{depends} = $depends{$page}; + $index{page}{$src}{depends} = join(" or ", @{$depends{$page}}); + $index{page}{$src}{dependslist} = $depends{$page}; } if (exists $pagestate{$page}) { @@ -1724,14 +1728,17 @@ sub rcs_receive () { sub add_depends ($$) { my $page=shift; my $pagespec=shift; - + return unless pagespec_valid($pagespec); if (! exists $depends{$page}) { - $depends{$page}=$pagespec; + $depends{$page}=[$pagespec]; } else { - $depends{$page}=pagespec_merge($depends{$page}, $pagespec); + foreach my $p (@{$depends{$page}}) { + return 1 if $p eq $pagespec; + } + push @{$depends{$page}}, $pagespec; } return 1; diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 578142d2e..08d484847 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -455,20 +455,22 @@ sub refresh () { my @changed=(keys %rendered, @del); # rebuild dependant pages - foreach my $f (@$files) { + F: foreach my $f (@$files) { next if $rendered{$f}; my $p=pagename($f); if (exists $depends{$p}) { - # only consider internal files - # if the page explicitly depends on such files - foreach my $file (@changed, $depends{$p}=~/internal\(/ ? @internal : ()) { - next if $f eq $file; - my $page=pagename($file); - if (pagespec_match($page, $depends{$p}, location => $p)) { - debug(sprintf(gettext("building %s, which depends on %s"), $f, $page)); - render($f); - $rendered{$f}=1; - last; + foreach my $d (@{$depends{$p}}) { + # only consider internal files + # if the page explicitly depends on such files + foreach my $file (@changed, $d=~/internal\(/ ? @internal : ()) { + next if $f eq $file; + my $page=pagename($file); + if (pagespec_match($page, $d, location => $p)) { + debug(sprintf(gettext("building %s, which depends on %s"), $f, $page)); + render($f); + $rendered{$f}=1; + next F; + } } } } diff --git a/ikiwiki-transition b/ikiwiki-transition index 398b1a3c8..60cea3d54 100755 --- a/ikiwiki-transition +++ b/ikiwiki-transition @@ -299,7 +299,7 @@ sub oldloadindex { $pagemtime{$page}=$items{mtime}[0]; $oldlinks{$page}=[@{$items{link}}]; $links{$page}=[@{$items{link}}]; - $depends{$page}=$items{depends}[0] if exists $items{depends}; + $depends{$page}=[$items{depends}[0]] if exists $items{depends}; $destsources{$_}=$page foreach @{$items{dest}}; $renderedfiles{$page}=[@{$items{dest}}]; $pagecase{lc $page}=$page; diff --git a/t/index.t b/t/index.t index e79609902..107dac9d0 100755 --- a/t/index.t +++ b/t/index.t @@ -32,9 +32,9 @@ $renderedfiles{"bar.png"}=["bar.png"]; $links{"Foo"}=["bar.png"]; $links{"bar"}=["Foo", "new-page"]; $links{"bar.png"}=[]; -$depends{"Foo"}=""; -$depends{"bar"}="foo*"; -$depends{"bar.png"}=""; +$depends{"Foo"}=[]; +$depends{"bar"}=["foo*"]; +$depends{"bar.png"}=[]; $pagestate{"bar"}{meta}{title}="a page about bar"; $pagestate{"bar"}{meta}{moo}="mooooo"; # only loaded plugins save state, so this should not be saved out @@ -80,9 +80,9 @@ is_deeply(\%links, { "bar.png" => [], }, "%links loaded correctly"); is_deeply(\%depends, { - Foo => "", - bar => "foo*", - "bar.png" => "", + Foo => [], + bar => ["foo*"], + "bar.png" => [], }, "%depends loaded correctly"); is_deeply(\%pagestate, { bar => { -- cgit v1.2.3 From b6fcb1cb0ef27e5a63184440675d465fad652acf Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 18 Jun 2009 15:55:55 +0100 Subject: calendar, inline, map: don't pre-join dependencies The new dependency handling works better (eliminates more duplicates) if dependencies are split up. On the same wiki mentioned in the previous commit, this saves about a second (i.e. 4%) on the same test. --- IkiWiki/Plugin/calendar.pm | 4 +++- IkiWiki/Plugin/inline.pm | 4 +++- IkiWiki/Plugin/map.pm | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/IkiWiki/Plugin/calendar.pm b/IkiWiki/Plugin/calendar.pm index c25893f72..5d16dff75 100644 --- a/IkiWiki/Plugin/calendar.pm +++ b/IkiWiki/Plugin/calendar.pm @@ -212,7 +212,9 @@ EOF add_depends($params{page}, $params{pages}); # Explicitly add all currently linked pages as dependencies, so # that if they are removed, the calendar will be sure to be updated. - add_depends($params{page}, join(" or ", @list)); + foreach my $p (@list) { + add_depends($params{page}, $p); + } return $calendar; } diff --git a/IkiWiki/Plugin/inline.pm b/IkiWiki/Plugin/inline.pm index 3a2f4b7bc..a501566b5 100644 --- a/IkiWiki/Plugin/inline.pm +++ b/IkiWiki/Plugin/inline.pm @@ -251,7 +251,9 @@ sub preprocess_inline (@) { # Explicitly add all currently displayed pages as dependencies, so # that if they are removed or otherwise changed, the inline will be # sure to be updated. - add_depends($params{page}, join(" or ", $#list >= $#feedlist ? @list : @feedlist)); + foreach my $p ($#list >= $#feedlist ? @list : @feedlist) { + add_depends($params{page}, $p); + } if ($feeds && exists $params{feedpages}) { @feedlist=pagespec_match_list(\@feedlist, $params{feedpages}, location => $params{page}); diff --git a/IkiWiki/Plugin/map.pm b/IkiWiki/Plugin/map.pm index 826dbbd66..54146dc46 100644 --- a/IkiWiki/Plugin/map.pm +++ b/IkiWiki/Plugin/map.pm @@ -73,7 +73,9 @@ sub preprocess (@) { add_depends($params{page}, $params{pages}); # Explicitly add all currently shown pages, to detect when pages # are removed. - add_depends($params{page}, join(" or ", keys %mapitems)); + foreach my $item (keys %mapitems) { + add_depends($params{page}, $item); + } # Create the map. my $parent=""; -- cgit v1.2.3 From 2cf26fd709efa802b93109935b4203590d549810 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 24 Aug 2009 22:18:46 +0100 Subject: Don't bother to save {depends} to the index As per Joey's review. --- IkiWiki.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index 3e94c8a25..734b167cf 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -1557,7 +1557,6 @@ sub saveindex () { }; if (exists $depends{$page}) { - $index{page}{$src}{depends} = join(" or ", @{$depends{$page}}); $index{page}{$src}{dependslist} = $depends{$page}; } -- cgit v1.2.3 From 7227c2debfeef94b35f7d81f42900aa01820caa3 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 24 Aug 2009 23:01:42 +0100 Subject: Use a hash to de-duplicate dependencies --- IkiWiki.pm | 19 ++++++------------- IkiWiki/Render.pm | 2 +- ikiwiki-transition | 2 +- t/index.t | 12 ++++++------ 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index 734b167cf..21a74adce 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -1508,10 +1508,12 @@ sub loadindex () { $oldlinks{$page}=[@{$d->{links}}]; } if (exists $d->{dependslist}) { - $depends{$page}=$d->{dependslist}; + $depends{$page}={ + map { $_ => 1 } @{$d->{dependslist}} + }; } elsif (exists $d->{depends}) { - $depends{$page}=[$d->{depends}]; + $depends{$page}={$d->{depends} => 1}; } if (exists $d->{state}) { $pagestate{$page}=$d->{state}; @@ -1557,7 +1559,7 @@ sub saveindex () { }; if (exists $depends{$page}) { - $index{page}{$src}{dependslist} = $depends{$page}; + $index{page}{$src}{dependslist} = [ keys %{$depends{$page}} ]; } if (exists $pagestate{$page}) { @@ -1730,16 +1732,7 @@ sub add_depends ($$) { return unless pagespec_valid($pagespec); - if (! exists $depends{$page}) { - $depends{$page}=[$pagespec]; - } - else { - foreach my $p (@{$depends{$page}}) { - return 1 if $p eq $pagespec; - } - push @{$depends{$page}}, $pagespec; - } - + $depends{$page}{$pagespec} = 1; return 1; } diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 08d484847..b9f1d7754 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -459,7 +459,7 @@ sub refresh () { next if $rendered{$f}; my $p=pagename($f); if (exists $depends{$p}) { - foreach my $d (@{$depends{$p}}) { + foreach my $d (keys %{$depends{$p}}) { # only consider internal files # if the page explicitly depends on such files foreach my $file (@changed, $d=~/internal\(/ ? @internal : ()) { diff --git a/ikiwiki-transition b/ikiwiki-transition index 60cea3d54..795ab31cb 100755 --- a/ikiwiki-transition +++ b/ikiwiki-transition @@ -299,7 +299,7 @@ sub oldloadindex { $pagemtime{$page}=$items{mtime}[0]; $oldlinks{$page}=[@{$items{link}}]; $links{$page}=[@{$items{link}}]; - $depends{$page}=[$items{depends}[0]] if exists $items{depends}; + $depends{$page}={ $items{depends}[0] => 1 } if exists $items{depends}; $destsources{$_}=$page foreach @{$items{dest}}; $renderedfiles{$page}=[@{$items{dest}}]; $pagecase{lc $page}=$page; diff --git a/t/index.t b/t/index.t index 107dac9d0..2f23524a7 100755 --- a/t/index.t +++ b/t/index.t @@ -32,9 +32,9 @@ $renderedfiles{"bar.png"}=["bar.png"]; $links{"Foo"}=["bar.png"]; $links{"bar"}=["Foo", "new-page"]; $links{"bar.png"}=[]; -$depends{"Foo"}=[]; -$depends{"bar"}=["foo*"]; -$depends{"bar.png"}=[]; +$depends{"Foo"}={}; +$depends{"bar"}={"foo*" => 1}; +$depends{"bar.png"}={}; $pagestate{"bar"}{meta}{title}="a page about bar"; $pagestate{"bar"}{meta}{moo}="mooooo"; # only loaded plugins save state, so this should not be saved out @@ -80,9 +80,9 @@ is_deeply(\%links, { "bar.png" => [], }, "%links loaded correctly"); is_deeply(\%depends, { - Foo => [], - bar => ["foo*"], - "bar.png" => [], + Foo => {}, + bar => {"foo*" => 1}, + "bar.png" => {}, }, "%depends loaded correctly"); is_deeply(\%pagestate, { bar => { -- cgit v1.2.3 From e4cd168ebedd95585290c97ff42234344bfed46c Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 24 Aug 2009 23:16:15 +0100 Subject: Allow add_depends to take an arrayref --- IkiWiki.pm | 9 +++++++++ IkiWiki/Plugin/calendar.pm | 4 +--- IkiWiki/Plugin/inline.pm | 4 +--- IkiWiki/Plugin/map.pm | 4 +--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index 21a74adce..35fee1aa7 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -1730,6 +1730,15 @@ sub add_depends ($$) { my $page=shift; my $pagespec=shift; + if (ref $pagespec eq 'ARRAY') { + foreach my $ps (@$pagespec) { + if (pagespec_valid($ps)) { + $depends{$page}{$ps} = 1; + } + } + return; + } + return unless pagespec_valid($pagespec); $depends{$page}{$pagespec} = 1; diff --git a/IkiWiki/Plugin/calendar.pm b/IkiWiki/Plugin/calendar.pm index 5d16dff75..ce0719404 100644 --- a/IkiWiki/Plugin/calendar.pm +++ b/IkiWiki/Plugin/calendar.pm @@ -212,9 +212,7 @@ EOF add_depends($params{page}, $params{pages}); # Explicitly add all currently linked pages as dependencies, so # that if they are removed, the calendar will be sure to be updated. - foreach my $p (@list) { - add_depends($params{page}, $p); - } + add_depends($params{page}, \@list); return $calendar; } diff --git a/IkiWiki/Plugin/inline.pm b/IkiWiki/Plugin/inline.pm index a501566b5..b566d960f 100644 --- a/IkiWiki/Plugin/inline.pm +++ b/IkiWiki/Plugin/inline.pm @@ -251,9 +251,7 @@ sub preprocess_inline (@) { # Explicitly add all currently displayed pages as dependencies, so # that if they are removed or otherwise changed, the inline will be # sure to be updated. - foreach my $p ($#list >= $#feedlist ? @list : @feedlist) { - add_depends($params{page}, $p); - } + add_depends($params{page}, $#list >= $#feedlist ? \@list : \@feedlist); if ($feeds && exists $params{feedpages}) { @feedlist=pagespec_match_list(\@feedlist, $params{feedpages}, location => $params{page}); diff --git a/IkiWiki/Plugin/map.pm b/IkiWiki/Plugin/map.pm index 54146dc46..cc977024d 100644 --- a/IkiWiki/Plugin/map.pm +++ b/IkiWiki/Plugin/map.pm @@ -73,9 +73,7 @@ sub preprocess (@) { add_depends($params{page}, $params{pages}); # Explicitly add all currently shown pages, to detect when pages # are removed. - foreach my $item (keys %mapitems) { - add_depends($params{page}, $item); - } + add_depends($params{page}, [keys %mapitems]); # Create the map. my $parent=""; -- cgit v1.2.3 From 275dd5c4ea748261741236a62ad763c775a651c5 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Tue, 25 Aug 2009 00:02:27 +0100 Subject: IkiWiki::pagename: memoize results This relies for its correctness on %config, %hooks and pagetype() not changing. --- IkiWiki.pm | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/IkiWiki.pm b/IkiWiki.pm index 35fee1aa7..8a8695768 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -661,9 +661,15 @@ sub pagetype ($) { return; } +my %pagename_cache; + sub pagename ($) { my $file=shift; + if (exists $pagename_cache{$file}) { + return $pagename_cache{$file}; + } + my $type=pagetype($file); my $page=$file; $page=~s/\Q.$type\E*$// @@ -672,6 +678,8 @@ sub pagename ($) { if ($config{indexpages} && $page=~/(.*)\/index$/) { $page=$1; } + + $pagename_cache{$file} = $page; return $page; } -- cgit v1.2.3 From df8ab3406d67dd7d97fc45324ebb0e30c30dc9b0 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Tue, 25 Aug 2009 00:21:19 +0100 Subject: use pagespec_match_list --- IkiWiki/Render.pm | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index b9f1d7754..fb28b6e3b 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -462,15 +462,17 @@ sub refresh () { foreach my $d (keys %{$depends{$p}}) { # only consider internal files # if the page explicitly depends on such files - foreach my $file (@changed, $d=~/internal\(/ ? @internal : ()) { - next if $f eq $file; - my $page=pagename($file); - if (pagespec_match($page, $d, location => $p)) { - debug(sprintf(gettext("building %s, which depends on %s"), $f, $page)); - render($f); - $rendered{$f}=1; - next F; - } + my @pages = map { + pagename($_) + } grep { + $_ ne $f + } (@changed, $d =~ /internal\(/ ? @internal : ()); + @pages = pagespec_match_list(\@pages, $d, location => $p); + if (@pages) { + debug(sprintf(gettext("building %s, which depends on %s"), $f, $pages[0])); + render($f); + $rendered{$f}=1; + next F; } } } -- cgit v1.2.3 From bc6e50a075b164100d3144a13633647613a3dc8e Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Sat, 1 Aug 2009 12:43:04 +0100 Subject: Mark "should optimise pagespecs" as done --- doc/todo/should_optimise_pagespecs.mdwn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/todo/should_optimise_pagespecs.mdwn b/doc/todo/should_optimise_pagespecs.mdwn index 1594dcee7..5ed24d333 100644 --- a/doc/todo/should_optimise_pagespecs.mdwn +++ b/doc/todo/should_optimise_pagespecs.mdwn @@ -90,6 +90,8 @@ I can think about reducung the size of my wiki source and making it available on >> rather than a single pagespec. This does turn out to be faster, although >> not as much as I'd like. --[[smcv]] +>>> [[Merged|done]] --[[smcv]] + >>> I just wanted to note that there is a whole long discussion of dependencies and pagespecs on the [[todo/tracking_bugs_with_dependencies]] page. -- [[Will]] >>>> Yeah, I had a look at that (as the only other mention of `pagespec_merge`). -- cgit v1.2.3