From 2a7721febd6cac1af5e7f4b4949ffe066c62c837 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 5 May 2009 23:40:09 -0400 Subject: Avoid %links accumulating duplicates. (For TOVA) This is sorta an optimisation, and sorta a bug fix. In one test case I have available, it can speed a page build up from 3 minutes to 3 seconds. The root of the problem is that $links{$page} contains arrays of links, rather than hashes of links. And when a link is found, it is just pushed onto the array, without checking for dups. Now, the array is emptied before scanning a page, so there should not be a lot of opportunity for lots of duplicate links to pile up in it. But, in some cases, they can, and if there are hundreds of duplicate links in the array, then scanning it for matching links, as match_link and some other code does, becomes much more expensive than it needs to be. Perhaps the real right fix would be to change the data structure to a hash. But, the list of links is never accessed like that, you always want to iterate through it. I also looked at deduping the list in saveindex, but that does a lot of unnecessary work, and doesn't completly solve the problem. So, finally, I decided to add an add_link function that handles deduping, and make ikiwiki-transition remove the old dup links. --- ikiwiki-transition | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'ikiwiki-transition') diff --git a/ikiwiki-transition b/ikiwiki-transition index 599261a09..f17868d73 100755 --- a/ikiwiki-transition +++ b/ikiwiki-transition @@ -220,6 +220,21 @@ sub moveprefs { IkiWiki::Setup::dump($setup); } +sub deduplinks { + my $dir=shift; + if (! defined $dir) { + usage(); + } + $config{wikistatedir}=$dir."/.ikiwiki"; + IkiWiki::loadindex(); + foreach my $page (keys %links) { + my %l; + $l{$_}=1 foreach @{$links{$page}}; + $links{$page}=[keys %l] + } + IkiWiki::saveindex(); +} + sub usage { print STDERR "Usage: ikiwiki-transition type ...\n"; print STDERR "Currently supported transition subcommands:\n"; @@ -229,6 +244,7 @@ sub usage { print STDERR "\tmoveprefs setupfile\n"; print STDERR "\thashpassword srcdir\n"; print STDERR "\tindexdb srcdir\n"; + print STDERR "\tdeduplinks srcdir\n"; exit 1; } @@ -253,6 +269,9 @@ elsif ($mode eq 'setupformat') { elsif ($mode eq 'moveprefs') { moveprefs(@ARGV); } +elsif ($mode eq 'deduplinks') { + deduplinks(@ARGV); +} else { usage(); } -- cgit v1.2.3