From 49bf877701d89d613dcf5c2d85bd08876a636dba Mon Sep 17 00:00:00 2001 From: joey Date: Sat, 28 Oct 2006 03:27:10 +0000 Subject: * Add a separate pass to find page links, and only render each page once, instead of over and over. This is up to 8 times faster than before! (This could have introduced some subtle bugs, so it needs to be tested extensively.) --- IkiWiki/Render.pm | 64 ++++++++++++++++++++++++++------------------- debian/changelog | 9 +++++++ doc/roadmap.mdwn | 5 ++-- doc/todo/optimisations.mdwn | 36 ++++++++++++------------- 4 files changed, 64 insertions(+), 50 deletions(-) diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index deec539ae..026b3582e 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -13,6 +13,7 @@ sub backlinks ($) { #{{{ my @links; foreach my $p (keys %links) { next if bestlink($page, $p) eq $page; + if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) { my $href=abs2rel(htmlpage($p), dirname($page)); @@ -119,21 +120,25 @@ sub mtime ($) { #{{{ return (stat($file))[9]; } #}}} -sub findlinks ($$) { #{{{ - my $page=shift; - my $content=shift; +sub scan ($) { #{{{ + my $file=shift; - my @links; - while ($content =~ /(? $oldpagemtime{$page} || + $forcerebuild{$page}) { + debug("scanning $file"); + scan($file); + } + } + # render any updated files foreach my $file (@files) { my $page=pagename($file); @@ -278,12 +291,10 @@ sub refresh () { #{{{ } # if any files were added or removed, check to see if each page - # needs an update due to linking to them or inlining them. - # TODO: inefficient; pages may get rendered above and again here; - # problem is the bestlink may have changed and we won't know until - # now + # needs an update due to linking to them or inlining them if (@add || @del) { FILE: foreach my $file (@files) { + next if $rendered{$file}; my $page=pagename($file); foreach my $f (@add, @del) { my $p=pagename($f); @@ -301,11 +312,9 @@ FILE: foreach my $file (@files) { # Handle backlinks; if a page has added/removed links, update the # pages it links to. Also handles rebuilding dependant pages. - # TODO: inefficient; pages may get rendered above and again here; - # problem is the backlinks could be wrong in the first pass render - # above if (%rendered || @del) { foreach my $f (@files) { + next if $rendered{$f}; my $p=pagename($f); if (exists $depends{$p}) { foreach my $file (keys %rendered, @del) { @@ -347,6 +356,7 @@ FILE: foreach my $file (@files) { foreach my $link (keys %linkchanged) { my $linkfile=$pagesources{$link}; if (defined $linkfile) { + next if $rendered{$linkfile}; debug("rendering $linkfile, to update its backlinks"); render($linkfile); $rendered{$linkfile}=1; diff --git a/debian/changelog b/debian/changelog index 24f0e3886..e914d40b3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +ikiwiki (1.32) UNRELEASED; urgency=low + + * Add a separate pass to find page links, and only render each page once, + instead of over and over. This is up to 8 times faster than before! + (This could have introduced some subtle bugs, so it needs to be tested + extensively.) + + -- Joey Hess Fri, 27 Oct 2006 23:21:35 -0400 + ikiwiki (1.31) unstable; urgency=low * Patch from Pawel Tecza to cp -a the templates in the Makefile. diff --git a/doc/roadmap.mdwn b/doc/roadmap.mdwn index 7a3193308..b393f254f 100644 --- a/doc/roadmap.mdwn +++ b/doc/roadmap.mdwn @@ -14,12 +14,11 @@ Released 29 April 2006. * Unit test suite (with tests of at least core stuff like [[PageSpec]]). _(status: exists, could of course use more tests)_ -* [[Plugins]] _(status: done, interface still not [[quite_stable|todo/firm_up_plugin_interface]])_ +* [[Plugins]] _(status: done)_ * [[Tags]] _(status: fair)_ * Should have fully working [[todo/utf8]] support. _(status: good)_ * [[Optimised_rendering|todo/optimisations]] if possible. Deal with other - scalability issues. _(status: 45%-60%+ speedup since 1.0, much more - possible)_ + scalability issues. _(status: something like 9x speedup 1.0!)_ * Improved [[todo/html]] stylesheets and templates. * Improved scalable [[logo]]. _(status: done)_ * Support for at other revision control systems aside from svn. diff --git a/doc/todo/optimisations.mdwn b/doc/todo/optimisations.mdwn index 4e8118756..13a270b8f 100644 --- a/doc/todo/optimisations.mdwn +++ b/doc/todo/optimisations.mdwn @@ -1,25 +1,21 @@ -* Render each changed page only once. Currently pages are rendered up to 4 - times in worst case (8 times if there's an rss feed). - - The issue is that rendering a page is used to gather info like the links - on the page (and other stuff) that can effect rendering other pages. So it - needs a multi-pass system. But rendering the whole page in each pass is - rather obscene. - - It would be better to have the first pass be a data gathering pass. Such - a pass would still need to load and parse the page contents etc, but - wouldn't need to generate html or write anything to disk. - - One problem with this idea is that it could turn into 2x the work in - cases where ikiwiki currently efficiently renders a page just once. And - caching between the passes to avoid that wouldn't do good things to the - memory footprint. - - Might be best to just do a partial first pass, getting eg, the page links - up-to-date, and then multiple, but generally fewer, rendering passes. - * Don't render blog archive pages unless a page is added/removed. Just changing a page doesn't affect the archives as they show only the title. * Look at splitting up CGI.pm. But note that too much splitting can slow perl down. + +* The backlinks code turns out to scale badly to wikis with thousands of + pages. The code is O(N^2)! It's called for each page, and it loops + through all the pages to find backlinks. + + Need to find a way to calculate and cache all the backlinks in one pass, + which could be done in at worst O(N), and possibly less (if they're + stored in the index, it could be constant time). But to do this, there + would need to be a way to invalidate or update the cache in these + situations: + + - A page is added. Note that this can change a backlink to point to + the new page instead of the page it pointed to before. + - A page is deleted. This can also change backlinks that pointed to that + page. + - A page is modified. Links added/removed. -- cgit v1.2.3