diff options
author | Joey Hess <joey@kodama.kitenet.net> | 2008-06-03 15:29:54 -0400 |
---|---|---|
committer | Joey Hess <joey@kodama.kitenet.net> | 2008-06-03 15:29:54 -0400 |
commit | 8a6a5320edc2c8a2ed357463b61f161d5b295fbf (patch) | |
tree | 3c2ad4350d247692a4e043a2c98dd56753ea63da | |
parent | c688863cf171df72b57180df746453e3c584f633 (diff) |
search: Converted to use xapian-omega.
Everything is done except for the actual indexing. I plan to do incremental
indexing as pages change.
-rw-r--r-- | Bundle/IkiWiki/Extras.pm | 1 | ||||
-rw-r--r-- | IkiWiki/Plugin/search.pm | 124 | ||||
-rw-r--r-- | debian/changelog | 1 | ||||
-rw-r--r-- | debian/control | 2 | ||||
-rw-r--r-- | doc/features.mdwn | 4 | ||||
-rw-r--r-- | doc/ikiwiki.setup | 4 | ||||
-rw-r--r-- | doc/plugins/search.mdwn | 19 | ||||
-rw-r--r-- | doc/plugins/search/discussion.mdwn | 2 | ||||
-rw-r--r-- | doc/todo/different_search_engine.mdwn | 2 | ||||
-rw-r--r-- | doc/wikitemplates.mdwn | 5 | ||||
-rw-r--r-- | templates/searchform.tmpl | 4 | ||||
-rw-r--r-- | templates/searchquery.tmpl | 117 |
12 files changed, 169 insertions, 116 deletions
diff --git a/Bundle/IkiWiki/Extras.pm b/Bundle/IkiWiki/Extras.pm index f09225d49..9289968e7 100644 --- a/Bundle/IkiWiki/Extras.pm +++ b/Bundle/IkiWiki/Extras.pm @@ -16,6 +16,7 @@ perl -MCPAN -e 'install Bundle::IkiWiki::Extras' =head1 CONTENTS +Search::Xapian Authen::Passphrase RPC::XML File::MimeInfo diff --git a/IkiWiki/Plugin/search.pm b/IkiWiki/Plugin/search.pm index 9bf223cf0..e705d018a 100644 --- a/IkiWiki/Plugin/search.pm +++ b/IkiWiki/Plugin/search.pm @@ -1,5 +1,5 @@ #!/usr/bin/perl -# hyperestraier search engine plugin +# xapian-omega search engine plugin package IkiWiki::Plugin::search; use warnings; @@ -7,33 +7,32 @@ use strict; use IkiWiki 2.00; sub import { #{{{ - hook(type => "getopt", id => "hyperestraier", - call => \&getopt); - hook(type => "checkconfig", id => "hyperestraier", - call => \&checkconfig); - hook(type => "pagetemplate", id => "hyperestraier", - call => \&pagetemplate); - hook(type => "delete", id => "hyperestraier", - call => \&delete); - hook(type => "change", id => "hyperestraier", - call => \&change); - hook(type => "cgi", id => "hyperestraier", - call => \&cgi); + hook(type => "checkconfig", id => "search", call => \&checkconfig); + hook(type => "pagetemplate", id => "search", call => \&pagetemplate); + hook(type => "delete", id => "search", call => \&delete); + hook(type => "change", id => "search", call => \&change); + hook(type => "cgi", id => "search", call => \&cgi); } # }}} -sub getopt () { #{{{ - eval q{use Getopt::Long}; - error($@) if $@; - Getopt::Long::Configure('pass_through'); - GetOptions("estseek=s" => \$config{estseek}); -} #}}} - sub checkconfig () { #{{{ foreach my $required (qw(url cgiurl)) { if (! length $config{$required}) { error(sprintf(gettext("Must specify %s when using the search plugin"), $required)); } } + + if (! exists $config{omega_cgi}) { + $config{omega_cgi}="/usr/lib/cgi-bin/omega/omega"; + } + + if (! -e $config{wikistatedir}."/xapian" || $config{rebuild}) { + writefile("omega.conf", $config{wikistatedir}."/xapian", + "database_dir .\n". + "template_dir ./templates\n"); + writefile("query", $config{wikistatedir}."/xapian/templates", + IkiWiki::misctemplate(gettext("search"), + readfile(IkiWiki::template_file("searchquery.tmpl")))); + } } #}}} my $form; @@ -55,93 +54,22 @@ sub pagetemplate (@) { #{{{ } #}}} sub delete (@) { #{{{ - debug(gettext("cleaning hyperestraier search index")); - estcmd("purge -cl"); - estcfg(); + debug(gettext("cleaning xapian search index")); } #}}} sub change (@) { #{{{ - debug(gettext("updating hyperestraier search index")); - estcmd("gather -cm -bc -cl -sd", - map { - map { - Encode::encode_utf8($config{destdir}."/".$_) - } @{$renderedfiles{pagename($_)}}; - } @_ - ); - estcfg(); + debug(gettext("updating xapian search index")); } #}}} sub cgi ($) { #{{{ my $cgi=shift; - if (defined $cgi->param('phrase') || defined $cgi->param("navi")) { + if (defined $cgi->param('P')) { # only works for GET requests - chdir("$config{wikistatedir}/hyperestraier") || error("chdir: $!"); - exec("./".IkiWiki::basename($config{cgiurl})) || error("estseek.cgi failed"); - } -} #}}} - -my $configured=0; -sub estcfg () { #{{{ - return if $configured; - $configured=1; - - my $estdir="$config{wikistatedir}/hyperestraier"; - my $cgi=IkiWiki::basename($config{cgiurl}); - $cgi=~s/\..*$//; - - my $newfile="$estdir/$cgi.tmpl.new"; - my $cleanup = sub { unlink($newfile) }; - open(TEMPLATE, ">:utf8", $newfile) || error("open $newfile: $!", $cleanup); - print TEMPLATE IkiWiki::misctemplate("search", - "<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n", - forcebaseurl => IkiWiki::dirname($config{cgiurl})."/") || - error("write $newfile: $!", $cleanup); - close TEMPLATE || error("save $newfile: $!", $cleanup); - rename($newfile, "$estdir/$cgi.tmpl") || - error("rename $newfile: $!", $cleanup); - - $newfile="$estdir/$cgi.conf"; - open(TEMPLATE, ">$newfile") || error("open $newfile: $!", $cleanup); - my $template=template("estseek.conf"); - eval q{use Cwd 'abs_path'}; - $template->param( - index => $estdir, - tmplfile => "$estdir/$cgi.tmpl", - destdir => abs_path($config{destdir}), - url => $config{url}, - ); - print TEMPLATE $template->output || error("write $newfile: $!", $cleanup); - close TEMPLATE || error("save $newfile: $!", $cleanup); - rename($newfile, "$estdir/$cgi.conf") || - error("rename $newfile: $!", $cleanup); - - $cgi="$estdir/".IkiWiki::basename($config{cgiurl}); - unlink($cgi); - my $estseek = defined $config{estseek} ? $config{estseek} : '/usr/lib/estraier/estseek.cgi'; - symlink($estseek, $cgi) || error("symlink $estseek $cgi: $!"); -} # }}} - -sub estcmd ($;@) { #{{{ - my @params=split(' ', shift); - push @params, "-cl", "$config{wikistatedir}/hyperestraier"; - if (@_) { - push @params, "-"; - } - - my $pid=open(CHILD, "|-"); - if ($pid) { - # parent - foreach (@_) { - print CHILD "$_\n"; - } - close(CHILD) || print STDERR "estcmd @params exited nonzero: $?\n"; - } - else { - # child - open(STDOUT, "/dev/null"); # shut it up (closing won't work) - exec("estcmd", @params) || error("can't run estcmd"); + chdir("$config{wikistatedir}/xapian") || error("chdir: $!"); + $ENV{OMEGA_CONFIG_FILE}="./omega.conf"; + $ENV{CGIURL}=$config{cgiurl}, + exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!"); } } #}}} diff --git a/debian/changelog b/debian/changelog index 02796394b..d80f78062 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,7 @@ ikiwiki (2.49) UNRELEASED; urgency=low * ikiwiki-mass-rebuild: Don't trust $! when setting $) * inline: The optimisation in 2.41 broke nested inlines. Detect those and avoid overoptimising. + * search: Converted to use xapian-omega. -- Joey Hess <joeyh@debian.org> Fri, 30 May 2008 19:08:54 -0400 diff --git a/debian/control b/debian/control index b71cbed6f..af281a74e 100644 --- a/debian/control +++ b/debian/control @@ -14,7 +14,7 @@ Package: ikiwiki Architecture: all Depends: ${perl:Depends}, markdown | libtext-markdown-perl, libhtml-scrubber-perl, libhtml-template-perl, libhtml-parser-perl, liburi-perl Recommends: gcc | c-compiler, libc6-dev | libc-dev, subversion | git-core (>= 1:1.5.0) | tla | bzr (>= 0.91) | mercurial | monotone (>= 0.38), libxml-simple-perl, libnet-openid-consumer-perl, liblwpx-paranoidagent-perl, libtimedate-perl, libcgi-formbuilder-perl (>= 3.05), libcgi-session-perl (>= 4.14-1), libmail-sendmail-perl, libauthen-passphrase-perl -Suggests: viewvc | gitweb | viewcvs, hyperestraier, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl +Suggests: viewvc | gitweb | viewcvs, libsearch-xapian-perl, xapian-omega, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl Conflicts: ikiwiki-plugin-table Replaces: ikiwiki-plugin-table Provides: ikiwiki-plugin-table diff --git a/doc/features.mdwn b/doc/features.mdwn index 1d762bed4..df963ab4f 100644 --- a/doc/features.mdwn +++ b/doc/features.mdwn @@ -158,8 +158,8 @@ Well, sorta. Rather than implementing YA history browser, it can link to ### Full text search -ikiwiki can use the [[HyperEstraier]] search engine to add powerful -full text search capabilities to your wiki. +ikiwiki can use the xapian search engine to add powerful +full text [[plugins/search]] capabilities to your wiki. ### [[w3mmode]] diff --git a/doc/ikiwiki.setup b/doc/ikiwiki.setup index db806a8c4..03d04176d 100644 --- a/doc/ikiwiki.setup +++ b/doc/ikiwiki.setup @@ -156,9 +156,9 @@ use IkiWiki::Setup::Standard { # base page. #tagbase => "tag", - # For use with the search plugin if your estseek.cgi is located + # For use with the search plugin if the omega cgi is located # somewhere else. - #estseek => "/usr/lib/estraier/estseek.cgi", + #omega_cgi => "/usr/lib/cgi-bin/omega/omega", # For use with the openid plugin, to give an url to a page users # can use to signup for an OpenID. diff --git a/doc/plugins/search.mdwn b/doc/plugins/search.mdwn index 7b32714f4..4c1b50fcd 100644 --- a/doc/plugins/search.mdwn +++ b/doc/plugins/search.mdwn @@ -1,12 +1,17 @@ [[template id=plugin name=search author="[[Joey]]"]] [[tag type/useful]] -This plugin is included in ikiwiki, but is not enabled by default. It adds -full text search to ikiwiki, using the [[HyperEstraier]] engine. +This plugin adds full text search to ikiwiki, using the +[xapian](http://xapian.org/) engine and its +[omega](http://xapian.org/docs/omega/overview.html) frontend. -It's possible to configure HyperEstraier via one of ikiwiki's -[[templates|wikitemplates]], but for most users, no configuration should be -needed aside from enabling the plugin. +Ikiwiki will handle indexing new and changed page contents, using the +[[cpan Search::Xapian]] perl modules. Note that it indexes page contents +before they are preprocessed and converted to html, as this tends to +produce less noisy search results. Also, since it only indexes page +contents, files copied by the [[rawhtml]] plugin will not be indexed, nor +will other types of data files. -This plugin has a configuration option. To change the path to estseek.cgi, -set `--estseek=/path/to/estseek.cgi` +There is one setting you may need to use in the config file. `omega_cgi` +should point to the location of the omega cgi program. The default location +is `/usr/lib/cgi-bin/omega/omega`. diff --git a/doc/plugins/search/discussion.mdwn b/doc/plugins/search/discussion.mdwn index 494d0a38a..6b5714c42 100644 --- a/doc/plugins/search/discussion.mdwn +++ b/doc/plugins/search/discussion.mdwn @@ -42,3 +42,5 @@ Now I did a `rm -rf ~wiki/wiki/.ikiwiki/hyperestraier` and re-ran `--rebuild`ing once more, I'm back to the previous error message. --[[tschwinge]] + +I guess this is fixed now that it uses xapian. :-) --[[Joey]] diff --git a/doc/todo/different_search_engine.mdwn b/doc/todo/different_search_engine.mdwn index 81ca47547..3737fb140 100644 --- a/doc/todo/different_search_engine.mdwn +++ b/doc/todo/different_search_engine.mdwn @@ -1,3 +1,5 @@ +[[done]], using xapian-omega! --[[Joey]] + After using it for a while, my feeling is that [[hyperestraier]], as used in the [[plugins/search]] plugin, is not robust enough for ikiwiki. It doesn't upgrade well, and it has a habit of sig-11 on certain input from time to diff --git a/doc/wikitemplates.mdwn b/doc/wikitemplates.mdwn index f095cb035..b03fc10a1 100644 --- a/doc/wikitemplates.mdwn +++ b/doc/wikitemplates.mdwn @@ -21,15 +21,14 @@ located in /usr/share/ikiwiki/templates by default. * `inlinepage.tmpl` - Used for adding a page inline in a blog page. * `archivepage.tmpl` - Used for listing a page in a blog archive page. -* `estseek.conf` - Not a html template, this is actually a template for - a config file for the [[HyperEstraier]] search engine. If you like you - can read the [[HyperEstraier]] docs and configure it using this. * `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss/atom links) * `feedlink.tmpl` - Used to add rss/atom links if blogpost.tmpl is not used. * `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create a page for a post. * `searchform.tmpl` - Used by the [[plugins/search]] plugin to add a search form to wiki pages. +* `searchquery.tmpl` - This is an omega template, used by the + [[plugins/search]] plugin. The [[plugins/pagetemplate]] plugin can allow individual pages to use a different template than `page.tmpl`. diff --git a/templates/searchform.tmpl b/templates/searchform.tmpl index 7c4fdb026..d49cf22d3 100644 --- a/templates/searchform.tmpl +++ b/templates/searchform.tmpl @@ -1,7 +1,5 @@ <form method="get" action="<TMPL_VAR SEARCHACTION>" id="searchform"> <div> -<input type="text" name="phrase" value="" size="16" /> -<input type="hidden" name="enc" value="UTF-8" /> -<input type="hidden" name="do" value="hyperestraier" /> +<input type="text" name="P" value="" size="16" /> </div> </form> diff --git a/templates/searchquery.tmpl b/templates/searchquery.tmpl new file mode 100644 index 000000000..dd12d2970 --- /dev/null +++ b/templates/searchquery.tmpl @@ -0,0 +1,117 @@ +$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France} +${ +$def{PREV, +$if{$ne{$topdoc,0},<INPUT TYPE=image NAME="<" ALT="<" +SRC="/images/xapian-omega/prev.png" BORDER=0 HEIGHT=30 WIDTH=30>, +<IMG ALT="" SRC="/images/xapian-omega/prevoff.png" HEIGHT=30 WIDTH=30>} +} + +$def{NEXT, +$if{$ne{$last,$msize},<INPUT TYPE=image NAME=">" ALT=">" +SRC="/images/xapian-omega/next.png" BORDER=0 HEIGHT=30 WIDTH=30>, +<IMG ALT="" SRC="/images/xapian-omega/nextoff.png" HEIGHT=30 WIDTH=30>} +} + +$def{P,<INPUT TYPE=image NAME="$1" VALUE="$1" SRC="/images/xapian-omega/page-$2.png" BORDER=0$opt{a} ALT="$1">} +$def{PAGE,$if{$gt{$1,9},$if{$gt{$1,99},$P{$1,$div{$1,100}}}$P{$1,$mod{$div{$1,10},10}}}$P{$1,$mod{$1,10}}} + +$def{S,<IMG SRC="/images/xapian-omega/page-$2s.png"$opt{a} ALT=$1>} +$def{SPAGE,$if{$gt{$1,9},$if{$gt{$1,99},$S{$1,$div{$1,100}}}$S{$1,$mod{$div{$1,10},10}}}$S{$1,$mod{$1,10}}} +} + +$def{PREV,$if{$ne{$topdoc,0},<INPUT TYPE=submit NAME="<" VALUE="Previous">}} + +$def{PAGE,<INPUT TYPE=submit NAME="[" VALUE="$1">} + +$def{SPAGE,<INPUT TYPE=submit NAME="[" VALUE="$1" DISABLED=disabled>} + +$def{NEXT,$if{$ne{$last,$msize},<INPUT TYPE=submit NAME=">" VALUE="Next">}} + +<p> + +<FORM NAME=P METHOD=GET +ACTION="$html{$env{CGIURL}}" TARGET="_top"> +<center> +<INPUT NAME=P VALUE="$html{$query}" SIZE=65> +<INPUT TYPE=SUBMIT VALUE="Search"> +<hr> +<SELECT NAME=DEFAULTOP> +<OPTION VALUE=or $if{$eq{$defaultop,or},SELECTED}>Matching any words +<OPTION VALUE=and $if{$eq{$defaultop,and},SELECTED}>Matching all words +</SELECT> +$if{$opt{topterms}, + <div title="Suggested terms to add to your query" + style="text-align:left;background:#cfc;border:1px solid green;padding:2px;font:11px verdana$. arial$. helvetica$. sans-serif;"> + $map{$topterms,<span style="white-space:nowrap"><INPUT TYPE=checkbox NAME=X VALUE="$prettyterm{$_}" onClick="C(this)">$prettyterm{$_}</span> } + <BR><NOSCRIPT><INPUT TYPE=hidden NAME=ADD VALUE=1></NOSCRIPT> + </div> +} +$or{$html{$error}, +$if{$eq{$msize,0}, +$if{$query,No documents match your query, +<hr>Searching $nice{$dbsize} documents +}, +$if{$not{$msizeexact}, + $nice{$add{$topdoc,1}}-$nice{$last} of about $nice{$msize} matches, + $if{$and{$eq{$last,$msize},$eq{$topdoc,0}}, + All $nice{$msize} matches, + $nice{$add{$topdoc,1}}$if{$ne{$add{$topdoc,1},$last},-$nice{$last}} of exactly $nice{$msize} matches} +} +<hr> +</center> +$list{$map{$queryterms,$list{$html{$uniq{$unstem{$_}}},<b>,</b>/<b>,</b>}: $nice{$freq{$_}}},Term frequencies: ,$. ,} +<br><small>Search took $time seconds</small> +<table> +$hitlist{<tr><td valign=top> +${<IMG SRC="/images/xapian-omega/score-$div{$percentage,10}.png" ALT="$percentage%" HEIGHT=16 WIDTH=32>} +<div title="$percentage%" style='float:left;width:60px;height:10px;border:1px solid black;margin-top:4px;'> +<div style='width:$div{$mul{$percentage,6},10}px; height:10px; background-color: red;'> +</div></div> +<div style='float:left;margin-top:2px;font-size:x-small;'> +<span title="$html{$date{$field{modtime},%Y-%m-%d %H:%M:%S}}"> +Modified:<br><b>$html{$date{$field{modtime},%Y-%m-%d}}</b></span><br> +$if{$field{language},Language: <b>$html{$field{language}}</b><br>} +$if{$field{size},<span title="$html{$field{size}} bytes">Size: <b>$html{$filesize{$field{size}}}</b></span><br>} +</div> +</td> +<td><B><A HREF="$field{url}">$html{$or{$field{caption},$field{title},$field{url},Untitled}}</A></B><BR> +<small>$highlight{$field{sample},$terms}$if{$field{sample},...}</small><br> +<A HREF="$field{url}">$html{$field{url}}</A><br> +<small> +$percentage% relevant$. matching: +<i>$list{$map{$terms,$html{$prettyterm{$_}}},$. ,</i> and <i>}</i></small>${for lynx:}<p></td></tr>} +</table> + +<br><center> + +${suppress next, prev, and page links if there's only one page} +$if{$ne{$lastpage,1}, +$set{a,$if{$opt{pagelink_height}, HEIGHT=$opt{pagelink_height}}$if{$opt{pagelink_width}, WIDTH=$opt{pagelink_width}}} + +${1-W ... X-(this)-Y ...} +$set{w,$min{3,$add{$thispage,-1}}} +$set{x,$max{$add{$opt{w},1},$add{$thispage,-3}}} +$set{y,$min{$lastpage,$add{$thispage,8}}} +$PREV +$map{$range{1,$opt{w}},$PAGE{$_}} +$if{$ne{$add{$opt{w},1},$opt{x}},...} +$map{$range{$opt{x},$add{$thispage,-1}},$PAGE{$_}} +$SPAGE{$thispage} +$map{$range{$add{$thispage,1},$opt{y}},$PAGE{$_}} +$if{$ne{$opt{y},$lastpage},...} +$NEXT +} +}} +</center><br> +$if{$dbname,<INPUT TYPE=hidden NAME=DB VALUE="$html{$dbname}">} +$if{$ne{$topdoc,0},<INPUT TYPE=hidden NAME=TOPDOC VALUE=$topdoc>} +$if{$ne{$hitsperpage,10},<INPUT TYPE=hidden NAME=HITSPERPAGE VALUE=$hitsperpage>} +$if{$fmt,<INPUT TYPE=hidden NAME=FMT VALUE="$html{$fmt}">} +$if{$cgi{COLLAPSE},<INPUT TYPE=hidden NAME=COLLAPSE VALUE="$html{$cgi{COLLAPSE}}">} +$if{$queryterms,<INPUT TYPE=hidden NAME=xP VALUE="$html{$queryterms}">} +<INPUT TYPE=hidden NAME=xDB VALUE="$html{$dbname}"> +<INPUT TYPE=hidden NAME=xFILTERS VALUE="$html{$filters}"> +$list{$relevants,<INPUT TYPE=hidden NAME=R VALUE=",.,">} +$if{$cgi{THRESHOLD},<INPUT TYPE=hidden NAME=THRESHOLD VALUE="$html{$cgi{THRESHOLD}}">} +</FORM> +<hr><div align=right><i><small><a href="http://www.xapian.org/">$html{$version}</a></small></i></div> |