summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joey@kodama.kitenet.net>2008-06-03 15:29:54 -0400
committerJoey Hess <joey@kodama.kitenet.net>2008-06-03 15:29:54 -0400
commit8a6a5320edc2c8a2ed357463b61f161d5b295fbf (patch)
tree3c2ad4350d247692a4e043a2c98dd56753ea63da
parentc688863cf171df72b57180df746453e3c584f633 (diff)
search: Converted to use xapian-omega.
Everything is done except for the actual indexing. I plan to do incremental indexing as pages change.
-rw-r--r--Bundle/IkiWiki/Extras.pm1
-rw-r--r--IkiWiki/Plugin/search.pm124
-rw-r--r--debian/changelog1
-rw-r--r--debian/control2
-rw-r--r--doc/features.mdwn4
-rw-r--r--doc/ikiwiki.setup4
-rw-r--r--doc/plugins/search.mdwn19
-rw-r--r--doc/plugins/search/discussion.mdwn2
-rw-r--r--doc/todo/different_search_engine.mdwn2
-rw-r--r--doc/wikitemplates.mdwn5
-rw-r--r--templates/searchform.tmpl4
-rw-r--r--templates/searchquery.tmpl117
12 files changed, 169 insertions, 116 deletions
diff --git a/Bundle/IkiWiki/Extras.pm b/Bundle/IkiWiki/Extras.pm
index f09225d49..9289968e7 100644
--- a/Bundle/IkiWiki/Extras.pm
+++ b/Bundle/IkiWiki/Extras.pm
@@ -16,6 +16,7 @@ perl -MCPAN -e 'install Bundle::IkiWiki::Extras'
=head1 CONTENTS
+Search::Xapian
Authen::Passphrase
RPC::XML
File::MimeInfo
diff --git a/IkiWiki/Plugin/search.pm b/IkiWiki/Plugin/search.pm
index 9bf223cf0..e705d018a 100644
--- a/IkiWiki/Plugin/search.pm
+++ b/IkiWiki/Plugin/search.pm
@@ -1,5 +1,5 @@
#!/usr/bin/perl
-# hyperestraier search engine plugin
+# xapian-omega search engine plugin
package IkiWiki::Plugin::search;
use warnings;
@@ -7,33 +7,32 @@ use strict;
use IkiWiki 2.00;
sub import { #{{{
- hook(type => "getopt", id => "hyperestraier",
- call => \&getopt);
- hook(type => "checkconfig", id => "hyperestraier",
- call => \&checkconfig);
- hook(type => "pagetemplate", id => "hyperestraier",
- call => \&pagetemplate);
- hook(type => "delete", id => "hyperestraier",
- call => \&delete);
- hook(type => "change", id => "hyperestraier",
- call => \&change);
- hook(type => "cgi", id => "hyperestraier",
- call => \&cgi);
+ hook(type => "checkconfig", id => "search", call => \&checkconfig);
+ hook(type => "pagetemplate", id => "search", call => \&pagetemplate);
+ hook(type => "delete", id => "search", call => \&delete);
+ hook(type => "change", id => "search", call => \&change);
+ hook(type => "cgi", id => "search", call => \&cgi);
} # }}}
-sub getopt () { #{{{
- eval q{use Getopt::Long};
- error($@) if $@;
- Getopt::Long::Configure('pass_through');
- GetOptions("estseek=s" => \$config{estseek});
-} #}}}
-
sub checkconfig () { #{{{
foreach my $required (qw(url cgiurl)) {
if (! length $config{$required}) {
error(sprintf(gettext("Must specify %s when using the search plugin"), $required));
}
}
+
+ if (! exists $config{omega_cgi}) {
+ $config{omega_cgi}="/usr/lib/cgi-bin/omega/omega";
+ }
+
+ if (! -e $config{wikistatedir}."/xapian" || $config{rebuild}) {
+ writefile("omega.conf", $config{wikistatedir}."/xapian",
+ "database_dir .\n".
+ "template_dir ./templates\n");
+ writefile("query", $config{wikistatedir}."/xapian/templates",
+ IkiWiki::misctemplate(gettext("search"),
+ readfile(IkiWiki::template_file("searchquery.tmpl"))));
+ }
} #}}}
my $form;
@@ -55,93 +54,22 @@ sub pagetemplate (@) { #{{{
} #}}}
sub delete (@) { #{{{
- debug(gettext("cleaning hyperestraier search index"));
- estcmd("purge -cl");
- estcfg();
+ debug(gettext("cleaning xapian search index"));
} #}}}
sub change (@) { #{{{
- debug(gettext("updating hyperestraier search index"));
- estcmd("gather -cm -bc -cl -sd",
- map {
- map {
- Encode::encode_utf8($config{destdir}."/".$_)
- } @{$renderedfiles{pagename($_)}};
- } @_
- );
- estcfg();
+ debug(gettext("updating xapian search index"));
} #}}}
sub cgi ($) { #{{{
my $cgi=shift;
- if (defined $cgi->param('phrase') || defined $cgi->param("navi")) {
+ if (defined $cgi->param('P')) {
# only works for GET requests
- chdir("$config{wikistatedir}/hyperestraier") || error("chdir: $!");
- exec("./".IkiWiki::basename($config{cgiurl})) || error("estseek.cgi failed");
- }
-} #}}}
-
-my $configured=0;
-sub estcfg () { #{{{
- return if $configured;
- $configured=1;
-
- my $estdir="$config{wikistatedir}/hyperestraier";
- my $cgi=IkiWiki::basename($config{cgiurl});
- $cgi=~s/\..*$//;
-
- my $newfile="$estdir/$cgi.tmpl.new";
- my $cleanup = sub { unlink($newfile) };
- open(TEMPLATE, ">:utf8", $newfile) || error("open $newfile: $!", $cleanup);
- print TEMPLATE IkiWiki::misctemplate("search",
- "<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n",
- forcebaseurl => IkiWiki::dirname($config{cgiurl})."/") ||
- error("write $newfile: $!", $cleanup);
- close TEMPLATE || error("save $newfile: $!", $cleanup);
- rename($newfile, "$estdir/$cgi.tmpl") ||
- error("rename $newfile: $!", $cleanup);
-
- $newfile="$estdir/$cgi.conf";
- open(TEMPLATE, ">$newfile") || error("open $newfile: $!", $cleanup);
- my $template=template("estseek.conf");
- eval q{use Cwd 'abs_path'};
- $template->param(
- index => $estdir,
- tmplfile => "$estdir/$cgi.tmpl",
- destdir => abs_path($config{destdir}),
- url => $config{url},
- );
- print TEMPLATE $template->output || error("write $newfile: $!", $cleanup);
- close TEMPLATE || error("save $newfile: $!", $cleanup);
- rename($newfile, "$estdir/$cgi.conf") ||
- error("rename $newfile: $!", $cleanup);
-
- $cgi="$estdir/".IkiWiki::basename($config{cgiurl});
- unlink($cgi);
- my $estseek = defined $config{estseek} ? $config{estseek} : '/usr/lib/estraier/estseek.cgi';
- symlink($estseek, $cgi) || error("symlink $estseek $cgi: $!");
-} # }}}
-
-sub estcmd ($;@) { #{{{
- my @params=split(' ', shift);
- push @params, "-cl", "$config{wikistatedir}/hyperestraier";
- if (@_) {
- push @params, "-";
- }
-
- my $pid=open(CHILD, "|-");
- if ($pid) {
- # parent
- foreach (@_) {
- print CHILD "$_\n";
- }
- close(CHILD) || print STDERR "estcmd @params exited nonzero: $?\n";
- }
- else {
- # child
- open(STDOUT, "/dev/null"); # shut it up (closing won't work)
- exec("estcmd", @params) || error("can't run estcmd");
+ chdir("$config{wikistatedir}/xapian") || error("chdir: $!");
+ $ENV{OMEGA_CONFIG_FILE}="./omega.conf";
+ $ENV{CGIURL}=$config{cgiurl},
+ exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!");
}
} #}}}
diff --git a/debian/changelog b/debian/changelog
index 02796394b..d80f78062 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,7 @@ ikiwiki (2.49) UNRELEASED; urgency=low
* ikiwiki-mass-rebuild: Don't trust $! when setting $)
* inline: The optimisation in 2.41 broke nested inlines. Detect those
and avoid overoptimising.
+ * search: Converted to use xapian-omega.
-- Joey Hess <joeyh@debian.org> Fri, 30 May 2008 19:08:54 -0400
diff --git a/debian/control b/debian/control
index b71cbed6f..af281a74e 100644
--- a/debian/control
+++ b/debian/control
@@ -14,7 +14,7 @@ Package: ikiwiki
Architecture: all
Depends: ${perl:Depends}, markdown | libtext-markdown-perl, libhtml-scrubber-perl, libhtml-template-perl, libhtml-parser-perl, liburi-perl
Recommends: gcc | c-compiler, libc6-dev | libc-dev, subversion | git-core (>= 1:1.5.0) | tla | bzr (>= 0.91) | mercurial | monotone (>= 0.38), libxml-simple-perl, libnet-openid-consumer-perl, liblwpx-paranoidagent-perl, libtimedate-perl, libcgi-formbuilder-perl (>= 3.05), libcgi-session-perl (>= 4.14-1), libmail-sendmail-perl, libauthen-passphrase-perl
-Suggests: viewvc | gitweb | viewcvs, hyperestraier, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
+Suggests: viewvc | gitweb | viewcvs, libsearch-xapian-perl, xapian-omega, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
Conflicts: ikiwiki-plugin-table
Replaces: ikiwiki-plugin-table
Provides: ikiwiki-plugin-table
diff --git a/doc/features.mdwn b/doc/features.mdwn
index 1d762bed4..df963ab4f 100644
--- a/doc/features.mdwn
+++ b/doc/features.mdwn
@@ -158,8 +158,8 @@ Well, sorta. Rather than implementing YA history browser, it can link to
### Full text search
-ikiwiki can use the [[HyperEstraier]] search engine to add powerful
-full text search capabilities to your wiki.
+ikiwiki can use the xapian search engine to add powerful
+full text [[plugins/search]] capabilities to your wiki.
### [[w3mmode]]
diff --git a/doc/ikiwiki.setup b/doc/ikiwiki.setup
index db806a8c4..03d04176d 100644
--- a/doc/ikiwiki.setup
+++ b/doc/ikiwiki.setup
@@ -156,9 +156,9 @@ use IkiWiki::Setup::Standard {
# base page.
#tagbase => "tag",
- # For use with the search plugin if your estseek.cgi is located
+ # For use with the search plugin if the omega cgi is located
# somewhere else.
- #estseek => "/usr/lib/estraier/estseek.cgi",
+ #omega_cgi => "/usr/lib/cgi-bin/omega/omega",
# For use with the openid plugin, to give an url to a page users
# can use to signup for an OpenID.
diff --git a/doc/plugins/search.mdwn b/doc/plugins/search.mdwn
index 7b32714f4..4c1b50fcd 100644
--- a/doc/plugins/search.mdwn
+++ b/doc/plugins/search.mdwn
@@ -1,12 +1,17 @@
[[template id=plugin name=search author="[[Joey]]"]]
[[tag type/useful]]
-This plugin is included in ikiwiki, but is not enabled by default. It adds
-full text search to ikiwiki, using the [[HyperEstraier]] engine.
+This plugin adds full text search to ikiwiki, using the
+[xapian](http://xapian.org/) engine and its
+[omega](http://xapian.org/docs/omega/overview.html) frontend.
-It's possible to configure HyperEstraier via one of ikiwiki's
-[[templates|wikitemplates]], but for most users, no configuration should be
-needed aside from enabling the plugin.
+Ikiwiki will handle indexing new and changed page contents, using the
+[[cpan Search::Xapian]] perl modules. Note that it indexes page contents
+before they are preprocessed and converted to html, as this tends to
+produce less noisy search results. Also, since it only indexes page
+contents, files copied by the [[rawhtml]] plugin will not be indexed, nor
+will other types of data files.
-This plugin has a configuration option. To change the path to estseek.cgi,
-set `--estseek=/path/to/estseek.cgi`
+There is one setting you may need to use in the config file. `omega_cgi`
+should point to the location of the omega cgi program. The default location
+is `/usr/lib/cgi-bin/omega/omega`.
diff --git a/doc/plugins/search/discussion.mdwn b/doc/plugins/search/discussion.mdwn
index 494d0a38a..6b5714c42 100644
--- a/doc/plugins/search/discussion.mdwn
+++ b/doc/plugins/search/discussion.mdwn
@@ -42,3 +42,5 @@ Now I did a `rm -rf ~wiki/wiki/.ikiwiki/hyperestraier` and re-ran
`--rebuild`ing once more, I'm back to the previous error message.
--[[tschwinge]]
+
+I guess this is fixed now that it uses xapian. :-) --[[Joey]]
diff --git a/doc/todo/different_search_engine.mdwn b/doc/todo/different_search_engine.mdwn
index 81ca47547..3737fb140 100644
--- a/doc/todo/different_search_engine.mdwn
+++ b/doc/todo/different_search_engine.mdwn
@@ -1,3 +1,5 @@
+[[done]], using xapian-omega! --[[Joey]]
+
After using it for a while, my feeling is that [[hyperestraier]], as used in
the [[plugins/search]] plugin, is not robust enough for ikiwiki. It doesn't
upgrade well, and it has a habit of sig-11 on certain input from time to
diff --git a/doc/wikitemplates.mdwn b/doc/wikitemplates.mdwn
index f095cb035..b03fc10a1 100644
--- a/doc/wikitemplates.mdwn
+++ b/doc/wikitemplates.mdwn
@@ -21,15 +21,14 @@ located in /usr/share/ikiwiki/templates by default.
* `inlinepage.tmpl` - Used for adding a page inline in a blog
page.
* `archivepage.tmpl` - Used for listing a page in a blog archive page.
-* `estseek.conf` - Not a html template, this is actually a template for
- a config file for the [[HyperEstraier]] search engine. If you like you
- can read the [[HyperEstraier]] docs and configure it using this.
* `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss/atom links)
* `feedlink.tmpl` - Used to add rss/atom links if blogpost.tmpl is not used.
* `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create
a page for a post.
* `searchform.tmpl` - Used by the [[plugins/search]] plugin to add a search
form to wiki pages.
+* `searchquery.tmpl` - This is an omega template, used by the
+ [[plugins/search]] plugin.
The [[plugins/pagetemplate]] plugin can allow individual pages to use a
different template than `page.tmpl`.
diff --git a/templates/searchform.tmpl b/templates/searchform.tmpl
index 7c4fdb026..d49cf22d3 100644
--- a/templates/searchform.tmpl
+++ b/templates/searchform.tmpl
@@ -1,7 +1,5 @@
<form method="get" action="<TMPL_VAR SEARCHACTION>" id="searchform">
<div>
-<input type="text" name="phrase" value="" size="16" />
-<input type="hidden" name="enc" value="UTF-8" />
-<input type="hidden" name="do" value="hyperestraier" />
+<input type="text" name="P" value="" size="16" />
</div>
</form>
diff --git a/templates/searchquery.tmpl b/templates/searchquery.tmpl
new file mode 100644
index 000000000..dd12d2970
--- /dev/null
+++ b/templates/searchquery.tmpl
@@ -0,0 +1,117 @@
+$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}
+${
+$def{PREV,
+$if{$ne{$topdoc,0},<INPUT TYPE=image NAME="&lt;" ALT="&lt;"
+SRC="/images/xapian-omega/prev.png" BORDER=0 HEIGHT=30 WIDTH=30>,
+<IMG ALT="" SRC="/images/xapian-omega/prevoff.png" HEIGHT=30 WIDTH=30>}
+}
+
+$def{NEXT,
+$if{$ne{$last,$msize},<INPUT TYPE=image NAME="&gt;" ALT="&gt;"
+SRC="/images/xapian-omega/next.png" BORDER=0 HEIGHT=30 WIDTH=30>,
+<IMG ALT="" SRC="/images/xapian-omega/nextoff.png" HEIGHT=30 WIDTH=30>}
+}
+
+$def{P,<INPUT TYPE=image NAME="$1" VALUE="$1" SRC="/images/xapian-omega/page-$2.png" BORDER=0$opt{a} ALT="$1">}
+$def{PAGE,$if{$gt{$1,9},$if{$gt{$1,99},$P{$1,$div{$1,100}}}$P{$1,$mod{$div{$1,10},10}}}$P{$1,$mod{$1,10}}}
+
+$def{S,<IMG SRC="/images/xapian-omega/page-$2s.png"$opt{a} ALT=$1>}
+$def{SPAGE,$if{$gt{$1,9},$if{$gt{$1,99},$S{$1,$div{$1,100}}}$S{$1,$mod{$div{$1,10},10}}}$S{$1,$mod{$1,10}}}
+}
+
+$def{PREV,$if{$ne{$topdoc,0},<INPUT TYPE=submit NAME="&lt;" VALUE="Previous">}}
+
+$def{PAGE,<INPUT TYPE=submit NAME="[" VALUE="$1">}
+
+$def{SPAGE,<INPUT TYPE=submit NAME="[" VALUE="$1" DISABLED=disabled>}
+
+$def{NEXT,$if{$ne{$last,$msize},<INPUT TYPE=submit NAME="&gt;" VALUE="Next">}}
+
+<p>
+
+<FORM NAME=P METHOD=GET
+ACTION="$html{$env{CGIURL}}" TARGET="_top">
+<center>
+<INPUT NAME=P VALUE="$html{$query}" SIZE=65>
+<INPUT TYPE=SUBMIT VALUE="Search">
+<hr>
+<SELECT NAME=DEFAULTOP>
+<OPTION VALUE=or $if{$eq{$defaultop,or},SELECTED}>Matching any words
+<OPTION VALUE=and $if{$eq{$defaultop,and},SELECTED}>Matching all words
+</SELECT>
+$if{$opt{topterms},
+ <div title="Suggested terms to add to your query"
+ style="text-align:left;background:#cfc;border:1px solid green;padding:2px;font:11px verdana$. arial$. helvetica$. sans-serif;">
+ $map{$topterms,<span style="white-space:nowrap"><INPUT TYPE=checkbox NAME=X VALUE="$prettyterm{$_}" onClick="C(this)">$prettyterm{$_}</span> }
+ <BR><NOSCRIPT><INPUT TYPE=hidden NAME=ADD VALUE=1></NOSCRIPT>
+ </div>
+}
+$or{$html{$error},
+$if{$eq{$msize,0},
+$if{$query,No documents match your query,
+<hr>Searching $nice{$dbsize} documents
+},
+$if{$not{$msizeexact},
+ $nice{$add{$topdoc,1}}-$nice{$last} of about $nice{$msize} matches,
+ $if{$and{$eq{$last,$msize},$eq{$topdoc,0}},
+ All $nice{$msize} matches,
+ $nice{$add{$topdoc,1}}$if{$ne{$add{$topdoc,1},$last},-$nice{$last}} of exactly $nice{$msize} matches}
+}
+<hr>
+</center>
+$list{$map{$queryterms,$list{$html{$uniq{$unstem{$_}}},<b>,</b>/<b>,</b>}:&nbsp;$nice{$freq{$_}}},Term frequencies: ,$. ,}
+<br><small>Search took $time seconds</small>
+<table>
+$hitlist{<tr><td valign=top>
+${<IMG SRC="/images/xapian-omega/score-$div{$percentage,10}.png" ALT="$percentage%" HEIGHT=16 WIDTH=32>}
+<div title="$percentage%" style='float:left;width:60px;height:10px;border:1px solid black;margin-top:4px;'>
+<div style='width:$div{$mul{$percentage,6},10}px; height:10px; background-color: red;'>
+</div></div>
+<div style='float:left;margin-top:2px;font-size:x-small;'>
+<span title="$html{$date{$field{modtime},%Y-%m-%d %H:%M:%S}}">
+Modified:<br><b>$html{$date{$field{modtime},%Y-%m-%d}}</b></span><br>
+$if{$field{language},Language: <b>$html{$field{language}}</b><br>}
+$if{$field{size},<span title="$html{$field{size}} bytes">Size: <b>$html{$filesize{$field{size}}}</b></span><br>}
+</div>
+</td>
+<td><B><A HREF="$field{url}">$html{$or{$field{caption},$field{title},$field{url},Untitled}}</A></B><BR>
+<small>$highlight{$field{sample},$terms}$if{$field{sample},...}</small><br>
+<A HREF="$field{url}">$html{$field{url}}</A><br>
+<small>
+$percentage% relevant$. matching:
+<i>$list{$map{$terms,$html{$prettyterm{$_}}},$. ,</i> and <i>}</i></small>${for lynx:}<p></td></tr>}
+</table>
+
+<br><center>
+
+${suppress next, prev, and page links if there's only one page}
+$if{$ne{$lastpage,1},
+$set{a,$if{$opt{pagelink_height}, HEIGHT=$opt{pagelink_height}}$if{$opt{pagelink_width}, WIDTH=$opt{pagelink_width}}}
+
+${1-W ... X-(this)-Y ...}
+$set{w,$min{3,$add{$thispage,-1}}}
+$set{x,$max{$add{$opt{w},1},$add{$thispage,-3}}}
+$set{y,$min{$lastpage,$add{$thispage,8}}}
+$PREV
+$map{$range{1,$opt{w}},$PAGE{$_}}
+$if{$ne{$add{$opt{w},1},$opt{x}},...}
+$map{$range{$opt{x},$add{$thispage,-1}},$PAGE{$_}}
+$SPAGE{$thispage}
+$map{$range{$add{$thispage,1},$opt{y}},$PAGE{$_}}
+$if{$ne{$opt{y},$lastpage},...}
+$NEXT
+}
+}}
+</center><br>
+$if{$dbname,<INPUT TYPE=hidden NAME=DB VALUE="$html{$dbname}">}
+$if{$ne{$topdoc,0},<INPUT TYPE=hidden NAME=TOPDOC VALUE=$topdoc>}
+$if{$ne{$hitsperpage,10},<INPUT TYPE=hidden NAME=HITSPERPAGE VALUE=$hitsperpage>}
+$if{$fmt,<INPUT TYPE=hidden NAME=FMT VALUE="$html{$fmt}">}
+$if{$cgi{COLLAPSE},<INPUT TYPE=hidden NAME=COLLAPSE VALUE="$html{$cgi{COLLAPSE}}">}
+$if{$queryterms,<INPUT TYPE=hidden NAME=xP VALUE="$html{$queryterms}">}
+<INPUT TYPE=hidden NAME=xDB VALUE="$html{$dbname}">
+<INPUT TYPE=hidden NAME=xFILTERS VALUE="$html{$filters}">
+$list{$relevants,<INPUT TYPE=hidden NAME=R VALUE=",.,">}
+$if{$cgi{THRESHOLD},<INPUT TYPE=hidden NAME=THRESHOLD VALUE="$html{$cgi{THRESHOLD}}">}
+</FORM>
+<hr><div align=right><i><small><a href="http://www.xapian.org/">$html{$version}</a></small></i></div>