From daa04ae43612f2c48c6ec3e1c5dfa0ee6fda6653 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Thu, 6 Nov 2008 17:28:04 +0100 Subject: po: fix link to homepage in special case ... by wrapping IkiWiki::urlto in order to workaround hard-coded /index.$config{htmlext}, which is wrong when usedirs=0 and po_link_to=current and translatable homepage Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 5 ----- 1 file changed, 5 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 923ccd63a..4262640bc 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -254,11 +254,6 @@ in the backlinks. `po_link_to = current`: seems to work nicely -### parentlinks - -When `usedirs` is disabled and the home page is translatable, the -parent link to the wiki home page is broken (`/index.html`). - Translation quality assurance ----------------------------- -- cgit v1.2.3 From 61a01d33889f28577aafbbe803f046c5ce817bb9 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Fri, 7 Nov 2008 16:33:28 +0100 Subject: po: write detailed specification of wished backlinks behaviour Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 4262640bc..4f6212ff4 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -247,12 +247,32 @@ be fixed by something like [[todo/using_meta_titles_for_parentlinks]]. ### backlinks -`po_link_to = negotiated`: if a given translatable `sourcepage.mdwn` -links to \[[destpage]], `sourcepage.LL.po` also link to \[[destpage]], -and the latter has the master page *and* all its translations listed -in the backlinks. +#### `po_link_to = negotiated` -`po_link_to = current`: seems to work nicely +If a given translatable `sourcepage.mdwn` links to \[[destpage]], +`sourcepage.LL.po` also link to \[[destpage]], and the latter has the +master page *and* all its translations listed in the backlinks. On the +other hand, translations of `destpage` get none of these backlinks. +What would be nice is: + +- if a translatable page links to another translatable page: the + master destpage gets a backlink to the master sourcepage, and every + slave destpage gets a backlink to the corresponding slave sourcepage +- if a translatable page links to a non-translatable page: the + destpage gets a backlink to the master sourcepage only +- if a non-translatable page links to a translatable page: every + master or slave destpage gets a backlink to the sourcepage +- if a non-translatable page links to another non-translatable page: + the default behavious is nice, don't change it + +#### `po_link_to = current` + +At first glance, backlinks seem to work nicely, but a more thorough +look is needed. + +#### `po_link_to = default` + +FIXME Translation quality assurance ----------------------------- -- cgit v1.2.3 From 34ab884242fd23139b7d4ccc9ab368468d501186 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Fri, 7 Nov 2008 21:27:00 +0100 Subject: po: implemented linking/backlinks specification for po_link_to=negotiated Signed-off-by: intrigeri --- IkiWiki/Plugin/po.pm | 38 ++++++++++++++++++++++++++++++++++++++ doc/plugins/po.mdwn | 7 +------ 2 files changed, 39 insertions(+), 6 deletions(-) (limited to 'doc/plugins') diff --git a/IkiWiki/Plugin/po.pm b/IkiWiki/Plugin/po.pm index 643621a91..acc133042 100644 --- a/IkiWiki/Plugin/po.pm +++ b/IkiWiki/Plugin/po.pm @@ -17,6 +17,7 @@ use File::Copy; use File::Spec; use File::Temp; use Memoize; +use UNIVERSAL; my %translations; my @origneedsbuild; @@ -36,6 +37,7 @@ sub import { #{{{ hook(type => "getsetup", id => "po", call => \&getsetup); hook(type => "checkconfig", id => "po", call => \&checkconfig); hook(type => "needsbuild", id => "po", call => \&needsbuild); + hook(type => "scan", id => "po", call => \&scan, last =>1); hook(type => "filter", id => "po", call => \&filter); hook(type => "htmlize", id => "po", call => \&htmlize); hook(type => "pagetemplate", id => "po", call => \&pagetemplate, last => 1); @@ -218,6 +220,42 @@ sub needsbuild () { #{{{ } } #}}} +sub scan (@) { #{{{ + my %params=@_; + my $page=$params{page}; + my $content=$params{content}; + + return unless UNIVERSAL::can("IkiWiki::Plugin::link", "import"); + return unless $config{'po_link_to'} eq 'negotiated'; + + if (istranslation($page)) { + my ($masterpage, $curlang) = ($page =~ /(.*)[.]([a-z]{2})$/); + foreach my $destpage (@{$links{$page}}) { + if (istranslatable($destpage)) { + # replace one occurence of $destpage in $links{$page} + # (we only want to replace the one that was added by + # IkiWiki::Plugin::link::scan, other occurences may be + # there for other reasons) + for (my $i=0; $i<@{$links{$page}}; $i++) { + if (@{$links{$page}}[$i] eq $destpage) { + @{$links{$page}}[$i] = $destpage . '.' . $curlang; + last; + } + } + } + } + } + elsif (! istranslatable($page) && ! istranslation($page)) { + foreach my $destpage (@{$links{$page}}) { + if (istranslatable($destpage)) { + map { + push @{$links{$page}}, $destpage . '.' . $_; + } (keys %{$config{po_slave_languages}}); + } + } + } +} #}}} + sub mytargetpage ($$) { #{{{ my $page=shift; my $ext=shift; diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 4f6212ff4..a010dbb7e 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -249,12 +249,7 @@ be fixed by something like [[todo/using_meta_titles_for_parentlinks]]. #### `po_link_to = negotiated` -If a given translatable `sourcepage.mdwn` links to \[[destpage]], -`sourcepage.LL.po` also link to \[[destpage]], and the latter has the -master page *and* all its translations listed in the backlinks. On the -other hand, translations of `destpage` get none of these backlinks. -What would be nice is: - +This is now implemented: - if a translatable page links to another translatable page: the master destpage gets a backlink to the master sourcepage, and every slave destpage gets a backlink to the corresponding slave sourcepage -- cgit v1.2.3 From a0ac34607919eab8f25e5312366c9c06bcfb791b Mon Sep 17 00:00:00 2001 From: intrigeri Date: Fri, 7 Nov 2008 22:17:54 +0100 Subject: po: finished backlinks implementation Signed-off-by: intrigeri --- IkiWiki/Plugin/po.pm | 1 - doc/plugins/po.mdwn | 24 --------------------- t/po.t | 59 +++++++++++++++++++++++++++++++++++----------------- 3 files changed, 40 insertions(+), 44 deletions(-) (limited to 'doc/plugins') diff --git a/IkiWiki/Plugin/po.pm b/IkiWiki/Plugin/po.pm index acc133042..89bd99470 100644 --- a/IkiWiki/Plugin/po.pm +++ b/IkiWiki/Plugin/po.pm @@ -226,7 +226,6 @@ sub scan (@) { #{{{ my $content=$params{content}; return unless UNIVERSAL::can("IkiWiki::Plugin::link", "import"); - return unless $config{'po_link_to'} eq 'negotiated'; if (istranslation($page)) { my ($masterpage, $curlang) = ($page =~ /(.*)[.]([a-z]{2})$/); diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index a010dbb7e..ddd0f5870 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -245,30 +245,6 @@ does. This is actually a duplicate for [[bugs/pagetitle_function_does_not_respect_meta_titles]], which might be fixed by something like [[todo/using_meta_titles_for_parentlinks]]. -### backlinks - -#### `po_link_to = negotiated` - -This is now implemented: -- if a translatable page links to another translatable page: the - master destpage gets a backlink to the master sourcepage, and every - slave destpage gets a backlink to the corresponding slave sourcepage -- if a translatable page links to a non-translatable page: the - destpage gets a backlink to the master sourcepage only -- if a non-translatable page links to a translatable page: every - master or slave destpage gets a backlink to the sourcepage -- if a non-translatable page links to another non-translatable page: - the default behavious is nice, don't change it - -#### `po_link_to = current` - -At first glance, backlinks seem to work nicely, but a more thorough -look is needed. - -#### `po_link_to = default` - -FIXME - Translation quality assurance ----------------------------- diff --git a/t/po.t b/t/po.t index e9d5ac35f..3074372d3 100755 --- a/t/po.t +++ b/t/po.t @@ -17,7 +17,7 @@ BEGIN { } } -use Test::More tests => 45; +use Test::More tests => 52; BEGIN { use_ok("IkiWiki"); } @@ -89,27 +89,48 @@ ok(! IkiWiki::Plugin::po::istranslation('test3'), "test3 is not a translation"); ok(! IkiWiki::Plugin::po::istranslation('test3'), "test3 is not a translation"); ### links +require IkiWiki::Render; + +sub refresh_n_scan(@) { + my @masterfiles_rel=@_; + foreach my $masterfile_rel (@masterfiles_rel) { + my $masterfile=srcfile($masterfile_rel); + IkiWiki::scan($masterfile_rel); + next unless IkiWiki::Plugin::po::istranslatable(pagename($masterfile_rel)); + my @pofiles=IkiWiki::Plugin::po::pofiles($masterfile); + IkiWiki::Plugin::po::refreshpot($masterfile); + IkiWiki::Plugin::po::refreshpofiles($masterfile, @pofiles); + map IkiWiki::scan(IkiWiki::abs2rel($_, $config{srcdir})), @pofiles; + } +} + writefile('index.mdwn', $config{srcdir}, '[[translatable]] [[nontranslatable]]'); writefile('translatable.mdwn', $config{srcdir}, '[[nontranslatable]]'); writefile('nontranslatable.mdwn', $config{srcdir}, '[[/]] [[translatable]]'); -map IkiWiki::Plugin::po::refreshpot(srcfile($_)), ('index.mdwn', 'translatable.mdwn'); -require IkiWiki::Render; -foreach my $masterfile_rel ('index.mdwn', 'translatable.mdwn') { - my $masterfile=srcfile($masterfile_rel); - my @pofiles=IkiWiki::Plugin::po::pofiles($masterfile); - IkiWiki::Plugin::po::refreshpot($masterfile); - IkiWiki::Plugin::po::refreshpofiles($masterfile, @pofiles); - IkiWiki::scan($masterfile_rel); - map IkiWiki::scan(IkiWiki::abs2rel($_, $config{srcdir})), @pofiles; -} -IkiWiki::scan('nontranslatable.mdwn'); -is_deeply(\@{$links{'index'}}, ['translatable', 'nontranslatable'], 'index'); -is_deeply(\@{$links{'index.es'}}, ['translatable.es', 'nontranslatable'], 'index.es'); -is_deeply(\@{$links{'index.fr'}}, ['translatable.fr', 'nontranslatable'], 'index.fr'); -is_deeply(\@{$links{'translatable'}}, ['nontranslatable'], 'translatable'); -is_deeply(\@{$links{'translatable.es'}}, ['nontranslatable'], 'translatable.es'); -is_deeply(\@{$links{'translatable.fr'}}, ['nontranslatable'], 'translatable.fr'); -is_deeply(\@{$links{'nontranslatable'}}, ['/', 'translatable', 'translatable.fr', 'translatable.es'], 'nontranslatable'); + +$config{po_link_to}='negotiated'; +$msgprefix="links (po_link_to=negotiated)"; +refresh_n_scan('index.mdwn', 'translatable.mdwn', 'nontranslatable.mdwn'); +is_deeply(\@{$links{'index'}}, ['translatable', 'nontranslatable'], "$msgprefix index"); +is_deeply(\@{$links{'index.es'}}, ['translatable.es', 'nontranslatable'], "$msgprefix index.es"); +is_deeply(\@{$links{'index.fr'}}, ['translatable.fr', 'nontranslatable'], "$msgprefix index.fr"); +is_deeply(\@{$links{'translatable'}}, ['nontranslatable'], "$msgprefix translatable"); +is_deeply(\@{$links{'translatable.es'}}, ['nontranslatable'], "$msgprefix translatable.es"); +is_deeply(\@{$links{'translatable.fr'}}, ['nontranslatable'], "$msgprefix translatable.fr"); +is_deeply(\@{$links{'nontranslatable'}}, ['/', 'translatable', 'translatable.fr', 'translatable.es'], "$msgprefix nontranslatable"); + +$config{po_link_to}='current'; +$msgprefix="links (po_link_to=current)"; +refresh_n_scan('index.mdwn', 'translatable.mdwn', 'nontranslatable.mdwn'); +use Data::Dumper; +print Dumper(%links); +is_deeply(\@{$links{'index'}}, ['translatable', 'nontranslatable'], "$msgprefix index"); +is_deeply(\@{$links{'index.es'}}, [ map bestlink('index.es', $_), ('translatable.es', 'nontranslatable')], "$msgprefix index.es"); +is_deeply(\@{$links{'index.fr'}}, [ map bestlink('index.fr', $_), ('translatable.fr', 'nontranslatable')], "$msgprefix index.fr"); +is_deeply(\@{$links{'translatable'}}, [bestlink('translatable', 'nontranslatable')], "$msgprefix translatable"); +is_deeply(\@{$links{'translatable.es'}}, ['nontranslatable'], "$msgprefix translatable.es"); +is_deeply(\@{$links{'translatable.fr'}}, ['nontranslatable'], "$msgprefix translatable.fr"); +is_deeply(\@{$links{'nontranslatable'}}, ['/', 'translatable', 'translatable.fr', 'translatable.es'], "$msgprefix nontranslatable"); ### targetpage $config{usedirs}=0; -- cgit v1.2.3 From 3c6c129100ba7b721fa57a56bba2b7a36739f4fc Mon Sep 17 00:00:00 2001 From: intrigeri Date: Sat, 8 Nov 2008 00:08:44 +0100 Subject: po: started research on gettext/po4a security Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index ddd0f5870..39575fb63 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -217,9 +217,28 @@ Security checks - Can any sort of directives be put in po files that will cause mischief (ie, include other files, run commands, crash gettext, - whatever). + whatever). The [PO file + format](http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files) + should contain the answer. - Any security issues on running po4a on untrusted content? +### Security history + +#### GNU gettext +- [CVE-2004-0966](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2004-0966) + / [Debian bug #278283](http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=278283): + the autopoint and gettextize scripts in the GNU gettext package + 1.14 and later versions, as used in Trustix Secure Linux 1.5 + through 2.1 and other operating systems, allows local users to + overwrite files via a symlink attack on temporary files. + +#### po4a +- + [CVE-2007-4462](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2007-4462): + lib/Locale/Po4a/Po.pm in po4a before 0.32 allows local users to + overwrite arbitrary files via a symlink attack on the + gettextization.failed.po temporary file. + gettext/po4a rough corners -------------------------- -- cgit v1.2.3 From 86edd539f4995b49e57086e055b0c9a5571b2ff3 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Sat, 8 Nov 2008 02:13:37 +0100 Subject: po/todo: mostly security research Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 79 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 17 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 39575fb63..6c0d49197 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -6,6 +6,8 @@ gettext, using [po4a](http://po4a.alioth.debian.org/). It depends on the Perl `Locale::Po4a::Po` library (`apt-get install po4a`). +[[!toc]] + Introduction ============ @@ -215,30 +217,71 @@ TODO Security checks --------------- -- Can any sort of directives be put in po files that will - cause mischief (ie, include other files, run commands, crash gettext, - whatever). The [PO file - format](http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files) - should contain the answer. -- Any security issues on running po4a on untrusted content? - ### Security history -#### GNU gettext -- [CVE-2004-0966](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2004-0966) - / [Debian bug #278283](http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=278283): +The only past security issues I could find in GNU gettext and po4a +are: + +- [CVE-2004-0966](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2004-0966), + *i.e.* [Debian bug #278283](http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=278283): the autopoint and gettextize scripts in the GNU gettext package 1.14 and later versions, as used in Trustix Secure Linux 1.5 through 2.1 and other operating systems, allows local users to overwrite files via a symlink attack on temporary files. - -#### po4a -- - [CVE-2007-4462](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2007-4462): - lib/Locale/Po4a/Po.pm in po4a before 0.32 allows local users to +- [CVE-2007-4462](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2007-4462): + `lib/Locale/Po4a/Po.pm` in po4a before 0.32 allows local users to overwrite arbitrary files via a symlink attack on the gettextization.failed.po temporary file. +**FIXME**: check whether this plugin would have been a possible attack +vector to exploit these vulnerabilities. + +Depending on my mood, the lack of found security issues can either +indicate that there are none, or reveal that no-one ever bothered to +find (and publish) them. + +### PO file features + +Can any sort of directives be put in po files that will cause mischief +(ie, include other files, run commands, crash gettext, whatever)? + +> No [documented](http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files) +> directive is supposed to do so. + +### Running po4a on untrusted content + +Are there any security issues on running po4a on untrusted content? + +> To say the least, this issue is not well covered, at least publicly: +> +> - the documentation does not talk about it; +> - grep'ing the source code for `security` or `trust` gives no answer. +> +> I'll ask their opinion to the po4a maintainers. +> +> I'm not in a position to audit the code, but I had a look anyway: +> +> - no use of `system()`, `exec()` or backticks in `Locale::Po4a`; are +> there any other way to run external programs in Perl? +> - a symlink attack vulnerability was already discovered, so I "hope" +> the code has been checked to find some more already +> - the po4a parts we are using themselves use the following Perl +> modules: `DynaLoader`, `Encode`, `Encode::Guess`, +> `Text::WrapI18N`, `Locale::gettext` (`bindtextdomain`, +> `textdomain`, `gettext`, `dgettext`) +> +> --[[intrigeri]] + +### Fuzzing input + +I was not able to find any public information about gettext or po4a +having been tested with a fuzzing program, such as `zzuf` or `fusil`. +Moreover, some gettext parsers seem to be quite +[easy to crash](http://fusil.hachoir.org/trac/browser/trunk/fuzzers/fusil-gettext), +so it might be useful to bang gettext/po4a's heads against such +a program in order to easily detect some of the most obvious DoS. +[[--intrigeri]] + gettext/po4a rough corners -------------------------- @@ -246,8 +289,10 @@ gettext/po4a rough corners live in different directories): say bla.fr.po has been updated in repo2; pulling repo2 from repo1 seems to trigger a PO update, that changes bla.fr.po in repo1; then pushing repo1 to repo2 triggers - a PO update, that changes bla.fr.po in repo2; etc.; fixed in - `629968fc89bced6727981c0a1138072631751fee`? + a PO update, that changes bla.fr.po in repo2; etc.; quickly fixed in + `629968fc89bced6727981c0a1138072631751fee`, by disabling references + in Pot files. Using `Locale::Po4a::write_if_needed` might be + a cleaner solution. - new translations created in the web interface must get proper charset/encoding gettext metadata, else the next automatic PO update removes any non-ascii chars; possible solution: put such metadata -- cgit v1.2.3 From 41d13673c6aac2cf0e965d023835989edbe82c93 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Sat, 8 Nov 2008 12:21:54 +0100 Subject: po/todo++: support other file formats than markdown Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 6c0d49197..6a2409847 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -309,6 +309,12 @@ does. This is actually a duplicate for [[bugs/pagetitle_function_does_not_respect_meta_titles]], which might be fixed by something like [[todo/using_meta_titles_for_parentlinks]]. +### source files format + +Markdown is supported, great, but what about others? The set of file +formats supported both in ikiwiki and po4a probably is greater than +`{markdown}`. + Translation quality assurance ----------------------------- -- cgit v1.2.3 From 44bb872a9705781e20c1b7af89297240523d6bb9 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Sat, 8 Nov 2008 22:08:50 +0100 Subject: po/todo(security): many research results ... and some questions to Joey (hint: look for your name) Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 110 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 24 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 6a2409847..e88cc3106 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -6,7 +6,7 @@ gettext, using [po4a](http://po4a.alioth.debian.org/). It depends on the Perl `Locale::Po4a::Po` library (`apt-get install po4a`). -[[!toc]] +[[!toc levels=2]] Introduction ============ @@ -246,31 +246,88 @@ Can any sort of directives be put in po files that will cause mischief (ie, include other files, run commands, crash gettext, whatever)? > No [documented](http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files) -> directive is supposed to do so. +> directive is supposed to do so. [[--intrigeri]] ### Running po4a on untrusted content Are there any security issues on running po4a on untrusted content? -> To say the least, this issue is not well covered, at least publicly: -> -> - the documentation does not talk about it; -> - grep'ing the source code for `security` or `trust` gives no answer. -> -> I'll ask their opinion to the po4a maintainers. -> -> I'm not in a position to audit the code, but I had a look anyway: -> -> - no use of `system()`, `exec()` or backticks in `Locale::Po4a`; are -> there any other way to run external programs in Perl? -> - a symlink attack vulnerability was already discovered, so I "hope" -> the code has been checked to find some more already -> - the po4a parts we are using themselves use the following Perl -> modules: `DynaLoader`, `Encode`, `Encode::Guess`, -> `Text::WrapI18N`, `Locale::gettext` (`bindtextdomain`, -> `textdomain`, `gettext`, `dgettext`) -> -> --[[intrigeri]] +To say the least, this issue is not well covered, at least publicly: + +- the documentation does not talk about it; +- grep'ing the source code for `security` or `trust` gives no answer. + +On the other hand, a po4a developer answered my questions in +a convincing manner, stating that processing untrusted content was not +an initial goal, and analysing in detail the possible issues. + +#### Already checked + +- the core (`Po.pm`, `Transtractor.pm`) should be safe +- po4a source code was fully checked for other potential symlink + attacks, after discovery of one such issue +- the only external program run by the core is `diff`, in `Po.pm` (in + parts of its code we don't use) +- `Locale::gettext`: only used to display translated error messages +- Nicolas François "hopes" `DynaLoader` is safe, and has "no reason to + think that `Encode` is not safe" +- Nicolas François has "no reason to think that `Encode::Guess` is not + safe". The po plugin nevertheless avoids using it by defining the + input charset (`file_in_charset`) before asking `Transtractor` to + read any file. NB: this hack depends on po4a internals to stay + the same. + +#### To be checked + +##### Locale::Po4a modules + +- the modules we want to use have to be checked, as not all are safe + (e.g. the LaTeX module's behaviour is changed by commands included + in the content); they may use regexps generated from the content; we + currently only use the `Text` module +- the `Text` module does not run any external program +- check that no module is loaded by `Chooser.pm`, when we tell it to + load the `Text` one +- `nsgmls` is used by `Sgml.pm` + +##### Text::WrapI18N + +`Text::WrapI18N` can cause DoS (see the +[Debian bug #470250](http://bugs.debian.org/470250)), but it is +optional and we do not need the features it provides. + +It is loaded if available by `Locale::Po4a::Common`; looking at the +code, I'm not sure we can prevent this at all, but maybe some symbol +table manipulation tricks could work; overriding +`Locale::Po4a::Common::wrapi18n` may be easier. I'm no expert at all +in this field. Joey? [[--intrigeri]] + +##### Term::ReadKey + +`Term::ReadKey` is not a hard dependency in our case, *i.e.* po4a +works nicely without it. But the po4a Debian package recommends +`libterm-readkey-perl`, so it will probably be installed on most +systems using the po plugin. + +If `$ENV{COLUMNS}` is not set, `Locale::Po4a::Common` uses +`Term::ReadKey::GetTerminalSize()` to get the terminal size. How safe +is this? + +Part of `Term::ReadKey` is written in C. Depending on the runtime +platform, this function use ioctl, environment, or C library function +calls, and may end up running the `resize` command (without +arguments). + +IMHO, using Term::ReadKey has too far reaching implications for us to +be able to guarantee anything wrt. security. Since it is anyway of no +use in our case, I suggest we define `ENV{COLUMNS}` before loading +`Locale::Po4a::Common`, just to be on the safe side. Joey? +[[--intrigeri]] + +### msgmerge + +`refreshpofiles()` runs this external program. A po4a developer +answered he does "not expect any security issues from it". ### Fuzzing input @@ -278,10 +335,13 @@ I was not able to find any public information about gettext or po4a having been tested with a fuzzing program, such as `zzuf` or `fusil`. Moreover, some gettext parsers seem to be quite [easy to crash](http://fusil.hachoir.org/trac/browser/trunk/fuzzers/fusil-gettext), -so it might be useful to bang gettext/po4a's heads against such +so it might be useful to bang msgmerge/po4a's heads against such a program in order to easily detect some of the most obvious DoS. [[--intrigeri]] +> po4a was not fuzzy-tested, but according to one of its developers, +> "it would be really appreciated". [[--intrigeri]] + gettext/po4a rough corners -------------------------- @@ -292,7 +352,8 @@ gettext/po4a rough corners a PO update, that changes bla.fr.po in repo2; etc.; quickly fixed in `629968fc89bced6727981c0a1138072631751fee`, by disabling references in Pot files. Using `Locale::Po4a::write_if_needed` might be - a cleaner solution. + a cleaner solution. (warning: this function runs the external + `diff` program, have to check security) - new translations created in the web interface must get proper charset/encoding gettext metadata, else the next automatic PO update removes any non-ascii chars; possible solution: put such metadata @@ -313,7 +374,8 @@ be fixed by something like [[todo/using_meta_titles_for_parentlinks]]. Markdown is supported, great, but what about others? The set of file formats supported both in ikiwiki and po4a probably is greater than -`{markdown}`. +`{markdown}`. Warning: the po4a modules are the place where one can +expect security issues. Translation quality assurance ----------------------------- -- cgit v1.2.3 From e397888a77b10d96437754779062852f56d96765 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Mon, 10 Nov 2008 23:52:50 +0100 Subject: po/doc: more security research results Apart of the fuzzying part, I'm done with what I can do without help. The "Running po4a on untrusted content" section needs at least a quick glance from an experimented Perl programmer. Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index e88cc3106..09df26394 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -281,14 +281,19 @@ an initial goal, and analysing in detail the possible issues. ##### Locale::Po4a modules -- the modules we want to use have to be checked, as not all are safe - (e.g. the LaTeX module's behaviour is changed by commands included - in the content); they may use regexps generated from the content; we - currently only use the `Text` module -- the `Text` module does not run any external program -- check that no module is loaded by `Chooser.pm`, when we tell it to - load the `Text` one -- `nsgmls` is used by `Sgml.pm` +The modules we want to use have to be checked, as not all are safe +(e.g. the LaTeX module's behaviour is changed by commands included in +the content); they may use regexps generated from the content. + +`Chooser.pm` only loads the plugin we tell it too: currently, this +means the `Text` module only. + +`Text` module (I checked the CVS version): + +- it does not run any external program +- only `do_paragraph()` builds regexp's that expand untrusted + variables; they seem safe to me, but someone more expert than me + will need to check. Joey? ##### Text::WrapI18N @@ -302,6 +307,13 @@ table manipulation tricks could work; overriding `Locale::Po4a::Common::wrapi18n` may be easier. I'm no expert at all in this field. Joey? [[--intrigeri]] +> Update: Nicolas François suggests we add an option to po4a to +> disable it. It would do the trick, but only for people running +> a brand new po4a (probably too late for Lenny). Anyway, this option +> would have to take effect in a `BEGIN` / `eval` that I'm not +> familiar with. I can learn and do it, in case no Perl wizard +> volunteers to provide the po4a patch. [[--intrigeri]] + ##### Term::ReadKey `Term::ReadKey` is not a hard dependency in our case, *i.e.* po4a @@ -324,6 +336,10 @@ use in our case, I suggest we define `ENV{COLUMNS}` before loading `Locale::Po4a::Common`, just to be on the safe side. Joey? [[--intrigeri]] +> Update: adding an option to disable `Text::WrapI18N`, as Nicolas +> François suggested, would as a bonus disable `Term::ReadKey` +> as well. [[--intrigeri]] + ### msgmerge `refreshpofiles()` runs this external program. A po4a developer -- cgit v1.2.3 From f2dc003cfbe08acb9f10a28503d78abefa7d67b3 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Tue, 11 Nov 2008 03:23:02 +0100 Subject: po/doc: first fuzzy-testing results for po4a and msgmerge Chapter #1, in which we learn po4a could help to DoS ikiwiki + po, whereas msgmerge seems reluctant to cooperate. Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 09df26394..2f413e275 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -358,6 +358,89 @@ a program in order to easily detect some of the most obvious DoS. > po4a was not fuzzy-tested, but according to one of its developers, > "it would be really appreciated". [[--intrigeri]] +Test conditions: + +- a 21M file containing 100 concatenated copies of all the files in my + `/usr/share/common-licenses/`; I had no existing PO file or + translated versions at hand, which renders these tests + quite incomplete. +- po4a was the Debian 0.34-2 package; the same tests were also run + after replacing the `Text` module with the CVS one (the core was not + changed in CVS since 0.34-2 was released), without any significant + difference in the results. +- Perl 5.10.0-16 + +#### po4a-gettextize + +`po4a-gettextize` uses more or less the same po4a features as our +`refreshpot` function. + +Without specifying an input charset, zzuf'ed `po4a-gettextize` quickly +errors out, complaining it was not able to detect the input charset; +it leaves no incomplete file on disk. + +So I had to pretend the input was in UTF-8, as does the po plugin. + +Two ways of crashing were revealed by this command-line: + + zzuf -vc -s 0:100 -r 0.1:0.5 \ + po4a-gettextize -f text -o markdown -M utf-8 -L utf-8 \ + -m LICENSES >/dev/null + +They are: + + Malformed UTF-8 character (UTF-16 surrogate 0xdcc9) in substitution iterator at /usr/share/perl5/Locale/Po4a/Po.pm line 1443. + Malformed UTF-8 character (fatal) at /usr/share/perl5/Locale/Po4a/Po.pm line 1443. + +and + + Malformed UTF-8 character (UTF-16 surrogate 0xdcec) in substitution (s///) at /usr/share/perl5/Locale/Po4a/Po.pm line 1443. + Malformed UTF-8 character (fatal) at /usr/share/perl5/Locale/Po4a/Po.pm line 1443. + +Perl seems to exit cleanly, and an incomplete PO file is written on +disk. I not sure whether if this is a bug in Perl or in `Po.pm`. + +#### po4a-translate + +`po4a-translate` uses more or less the same po4a features as our +`filter` function. + +Without specifying an input charset, same behaviour as +`po4a-gettextize`, so let's specify UTF-8 as input charset as of now. + + zzuf -cv \ + po4a-translate -d -f text -o markdown -M utf-8 -L utf-8 \ + -k 0 -m LICENSES -p LICENSES.fr.po -l test.fr + +... prints tons of occurences of the following error, but a complete +translated document is written (obviously with some weird chars +inside): + + Use of uninitialized value in string ne at /usr/share/perl5/Locale/Po4a/TransTractor.pm line 854. + Use of uninitialized value in string ne at /usr/share/perl5/Locale/Po4a/TransTractor.pm line 840. + Use of uninitialized value in pattern match (m//) at /usr/share/perl5/Locale/Po4a/Po.pm line 1002. + +While: + + zzuf -cv -s 0:10 -r 0.001:0.3 \ + po4a-translate -d -f text -o markdown -M utf-8 -L utf-8 \ + -k 0 -m LICENSES -p LICENSES.fr.po -l test.fr + +... seems to lose the fight, at the `readpo(LICENSES.fr.po)` step, +against some kind of infinite loop, deadlock, or any similar beast. +It does not seem to eat memory, though. + +Whatever format module is used does not change anything. This is thus +probably a bug in po4a's core or in a lib it depends on. + +The sub `read`, in `TransTractor.pm`, seems to be a good debugging +starting point. + +#### msgmerge + +`msgmerge` is run in our `refreshpofiles` function. I did not manage +to crash it with `zzuf`. + gettext/po4a rough corners -------------------------- -- cgit v1.2.3 From 6a9dafdc1d946cbf13c0c3798b9ece4a3a4540ad Mon Sep 17 00:00:00 2001 From: intrigeri Date: Tue, 11 Nov 2008 05:05:53 +0100 Subject: po/todo: updated page formats, broken links, documentation Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 2f413e275..932dd1430 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -462,19 +462,32 @@ gettext/po4a rough corners Misc. improvements ------------------ -### page titles +Page titles in links +-------------------- -Use nice page titles from meta plugin in links, as inline already -does. This is actually a duplicate for -[[bugs/pagetitle_function_does_not_respect_meta_titles]], which might -be fixed by something like [[todo/using_meta_titles_for_parentlinks]]. +To use the page titles set with the [meta](plugins/meta) plugin when +rendering links would be very much nicer, than the current +"filename.LL" format. This is actually a duplicate for +[[bugs/pagetitle_function_does_not_respect_meta_titles]]. -### source files format +Page formats +------------ -Markdown is supported, great, but what about others? The set of file -formats supported both in ikiwiki and po4a probably is greater than -`{markdown}`. Warning: the po4a modules are the place where one can -expect security issues. +Markdown is well supported, great, but what about others? + +The [po](plugins/po) uses `Locale::Po4a::Text` for every page format; +this can be expected to work out of the box with most other wiki-like +formats supported by ikiwiki. Some of their ad-hoc syntax might be +parsed in a strange way, but the worst problems I can imagine would be +wrapping issues; e.g. there is code in po4a dedicated to prevent +re-wrapping the underlined Markdown headers. + +While it would be easy to better support formats such as [[html]] or +LaTeX, by using for each one the dedicated po4a module, this can be +problematic from a security point of view. + +**TODO**: test the more popular formats and write proper documentation +about it. Translation quality assurance ----------------------------- @@ -487,3 +500,15 @@ A new `cansave` type of hook would be needed to implement this. Note: committing to the underlying repository is a way to bypass this check. + +Broken links +------------ + +See [[contrib/po]]. + +Documentation +------------- + +Maybe write separate documentation depending on the people it targets: +translators, wiki administrators, hackers. This plugin is maybe +complex enough to deserve this. -- cgit v1.2.3 From 18331e9261c40eb577f7553c3a42232d5a593e82 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Tue, 11 Nov 2008 15:25:23 +0100 Subject: po/todo: added bug report for weird Perl warnings Signed-off-by: intrigeri --- doc/plugins/po.mdwn | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'doc/plugins') diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 932dd1430..2f359bb83 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -459,8 +459,17 @@ gettext/po4a rough corners into the Pot file, and let it propagate; should be fixed in `773de05a7a1ee68d2bed173367cf5e716884945a`, time will tell. -Misc. improvements ------------------- +When committing a translatable page to the repository, msgmerge +happens and then: + + Use of uninitialized value $page in pattern match (m//) at /usr/local/share/perl/5.10.0/IkiWiki.pm line 624. + Use of uninitialized value $p in hash element at /usr/local/share/perl/5.10.0/IkiWiki/Render.pm line 423. + Use of uninitialized value $file in string eq at /usr/local/share/perl/5.10.0/IkiWiki/Render.pm line 444. + Use of uninitialized value $page in pattern match (m//) at /usr/local/share/perl/5.10.0/IkiWiki.pm line 624. + +And then tons of: + Use of uninitialized value $page in pattern match (m//) at /usr/local/share/perl/5.10.0/IkiWiki.pm line 1860. + Use of uninitialized value $page in concatenation (.) or string at /usr/local/share/perl/5.10.0/IkiWiki.pm line 1869. Page titles in links -------------------- -- cgit v1.2.3 From 43a1d36378c085ed9c71e066686907104c91da7e Mon Sep 17 00:00:00 2001 From: intrigeri Date: Tue, 11 Nov 2008 15:27:39 +0100 Subject: po: added HOMEPAGEURL template variable, documented when to use it Hopefully all links should now be consistent with the chosen linking behavior, but who knows... Signed-off-by: intrigeri --- IkiWiki/Plugin/po.pm | 8 ++++++++ doc/plugins/po.mdwn | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'doc/plugins') diff --git a/IkiWiki/Plugin/po.pm b/IkiWiki/Plugin/po.pm index 42a125808..739564c6b 100644 --- a/IkiWiki/Plugin/po.pm +++ b/IkiWiki/Plugin/po.pm @@ -285,6 +285,9 @@ sub pagetemplate (@) { #{{{ if ($template->query(name => "istranslatable")) { $template->param(istranslatable => istranslatable($page)); } + if ($template->query(name => "HOMEPAGEURL")) { + $template->param(homepageurl => homepageurl($page)); + } if ($template->query(name => "otherlanguages")) { $template->param(otherlanguages => [otherlanguagesloop($page)]); map add_depends($page, $_), (values %{otherlanguages($page)}); @@ -705,6 +708,11 @@ sub otherlanguagesloop ($) { #{{{ } @ret; } #}}} +sub homepageurl (;$) { #{{{ + my $page=shift; + + return urlto('', $page); +} #}}} # ,---- # | PageSpec's diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index 2f359bb83..d27b5af1d 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -129,6 +129,10 @@ Usage Templates --------- +When `po_link_to` is not set to `negotiated`, one should replace some +occurrences of `BASEURL` with `HOMEPAGEURL` to get correct links to +the wiki homepage. + The `ISTRANSLATION` and `ISTRANSLATABLE` variables can be used to display things only on translatable or translation pages. -- cgit v1.2.3 From f0e796d9a12d5f78fb89f312f138c1a0bb6f0141 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Tue, 11 Nov 2008 16:04:40 +0100 Subject: po(change): fix uninitialized variables when running IkiWiki::refresh() Signed-off-by: intrigeri --- IkiWiki/Plugin/po.pm | 4 ++++ doc/plugins/po.mdwn | 12 ------------ 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'doc/plugins') diff --git a/IkiWiki/Plugin/po.pm b/IkiWiki/Plugin/po.pm index 739564c6b..a8d9b9cd9 100644 --- a/IkiWiki/Plugin/po.pm +++ b/IkiWiki/Plugin/po.pm @@ -362,6 +362,10 @@ sub change(@) { #{{{ resettranslationscache(); # Trigger a wiki refresh. require IkiWiki::Render; + # without preliminary saveindex/loadindex, refresh() + # complains about a lot of uninitialized variables + IkiWiki::saveindex(); + IkiWiki::loadindex(); IkiWiki::refresh(); IkiWiki::saveindex(); } diff --git a/doc/plugins/po.mdwn b/doc/plugins/po.mdwn index d27b5af1d..544c7ef62 100644 --- a/doc/plugins/po.mdwn +++ b/doc/plugins/po.mdwn @@ -463,18 +463,6 @@ gettext/po4a rough corners into the Pot file, and let it propagate; should be fixed in `773de05a7a1ee68d2bed173367cf5e716884945a`, time will tell. -When committing a translatable page to the repository, msgmerge -happens and then: - - Use of uninitialized value $page in pattern match (m//) at /usr/local/share/perl/5.10.0/IkiWiki.pm line 624. - Use of uninitialized value $p in hash element at /usr/local/share/perl/5.10.0/IkiWiki/Render.pm line 423. - Use of uninitialized value $file in string eq at /usr/local/share/perl/5.10.0/IkiWiki/Render.pm line 444. - Use of uninitialized value $page in pattern match (m//) at /usr/local/share/perl/5.10.0/IkiWiki.pm line 624. - -And then tons of: - Use of uninitialized value $page in pattern match (m//) at /usr/local/share/perl/5.10.0/IkiWiki.pm line 1860. - Use of uninitialized value $page in concatenation (.) or string at /usr/local/share/perl/5.10.0/IkiWiki.pm line 1869. - Page titles in links -------------------- -- cgit v1.2.3