summaryrefslogtreecommitdiff
path: root/perl
diff options
context:
space:
mode:
Diffstat (limited to 'perl')
-rw-r--r--perl/Locale/Po4a/Text.pm260
1 files changed, 260 insertions, 0 deletions
diff --git a/perl/Locale/Po4a/Text.pm b/perl/Locale/Po4a/Text.pm
new file mode 100644
index 0000000..836d2c1
--- /dev/null
+++ b/perl/Locale/Po4a/Text.pm
@@ -0,0 +1,260 @@
+#!/usr/bin/perl -w
+
+# Po4a::Text.pm
+#
+# extract and translate translatable strings from a text documents
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=head1 NAME
+
+Locale::Po4a::Text - Convert text documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (po for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Text is a module to help the translation of text documents into
+other [human] languages.
+
+Paragraphs are splitted on empty lines (or lines containing only spaces or
+tabulations).
+
+If a paragraph contains a line starting by a space (or tabulation), this
+paragraph won't be rewrapped.
+
+=cut
+
+package Locale::Po4a::Text;
+
+use 5.006;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over
+
+=item B<nobullet>
+
+Deactivate detection of bullets.
+
+By default, when a bullet is detected, the bullet paragraph is not considered
+as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
+rewrap this paragraph in the generated PO file and in the translation.
+
+=cut
+
+my $bullets = 1;
+
+=item B<debianchangelog>
+
+Handle the header and footer of
+released versions, which only contain non translatable informations.
+
+=cut
+
+my $debianchangelog = 0;
+
+=item B<markdown>
+
+Handle some special markup in Markdown-formatted texts.
+
+=cut
+
+my $markdown = 1;
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'nobullets'}='';
+
+ if (defined $options{'nobullets'}) {
+ $bullets = 0;
+ }
+
+ if (defined $options{'debianchangelog'}) {
+ $debianchangelog=1;
+ }
+
+ if (defined $options{'markdown'}) {
+ $markdown=1;
+ }
+}
+
+sub parse {
+ my $self = shift;
+ my ($line,$ref);
+ my $paragraph="";
+ my $wrapped_mode = 1;
+ my $expect_header = 1;
+ ($line,$ref)=$self->shiftline();
+ while (defined($line)) {
+ chomp($line);
+ $self->{ref}="$ref";
+ if ($debianchangelog and
+ $expect_header and
+ $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
+ \s+([-+0-9a-z.]+); # distribution
+ \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
+ do_paragraph($self,$paragraph,$wrapped_mode);
+ $paragraph="";
+ $self->pushline("$line\n");
+ $expect_header=0;
+ } elsif ($debianchangelog and
+ $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
+ # Found trailer
+ do_paragraph($self,$paragraph,$wrapped_mode);
+ $paragraph="";
+ $self->pushline("$line\n");
+ $expect_header=1;
+ } elsif ($line =~ /^\s*$/) {
+ # Break paragraphs on lines containing only spaces
+ do_paragraph($self,$paragraph,$wrapped_mode);
+ $self->pushline("\n") unless ( $wrapped_mode == 0
+ or $paragraph eq "");
+ $paragraph="";
+ $wrapped_mode = 1;
+ $self->pushline($line."\n");
+ } elsif ( $line =~ /^=*$/
+ or $line =~ /^_*$/
+ or $line =~ /^-*$/) {
+ $wrapped_mode = 0;
+ $paragraph .= $line."\n";
+ do_paragraph($self,$paragraph,$wrapped_mode);
+ $paragraph="";
+ $wrapped_mode = 1;
+ } elsif ($markdown and
+ ( $line =~ m/^#/ # headline
+ or $line =~ m/^>/ # blockquote
+ or $line =~ m/[<>]/ # maybe html
+ or $line =~ m/^"""/ # textblock inside macro end
+ or $line =~ m/"""$/)) { # textblock inside macro begin
+ # Found headline
+ $wrapped_mode = 0;
+ $paragraph .= $line."\n";
+ do_paragraph($self,$paragraph,$wrapped_mode);
+ $paragraph="";
+ $wrapped_mode = 1;
+ } else {
+ if ($line =~ /^\s/) {
+ # A line starting by a space indicates a non-wrap
+ # paragraph
+ $wrapped_mode = 0;
+ }
+ $paragraph .= $line."\n";
+ }
+ # paragraphs starting by a bullet, or numbered
+ # or paragraphs with a line containing many consecutive spaces
+ # (more than 3)
+ # are considered as verbatim paragraphs
+ $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
+ or $paragraph =~ m/[ \t][ \t][ \t]/s);
+ ($line,$ref)=$self->shiftline();
+ }
+ if (length $paragraph) {
+ do_paragraph($self,$paragraph,$wrapped_mode);
+ }
+}
+
+sub do_paragraph {
+ my ($self, $paragraph, $wrap) = (shift, shift, shift);
+ return if ($paragraph eq "");
+
+ if ($bullets) {
+ # Detect bullets
+ # | * blah blah
+ # |<spaces> blah
+ # | ^-- aligned
+ # <empty line>
+ #
+ # Other bullets supported:
+ # - blah o blah + blah
+ # 1. blah 1) blah (1) blah
+TEST_BULLET:
+ if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {
+ my $para = $5;
+ my $bullet = $2;
+ my $indent1 = $1;
+ my $indent2 = "$1".(' ' x length $bullet);
+ my $text = $4;
+ while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
+ and $para =~ s/^$indent2(\S[^\n]*\n)//s) {
+ $text .= $1;
+ }
+ # TODO: detect if a line starts with the same bullet
+ if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {
+ my $bullet_regex = quotemeta($indent1.$bullet);
+ $bullet_regex =~ s/[0-9]+/\\d\+/;
+ if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {
+ my $trans = $self->translate($text,
+ $self->{ref},
+ "Bullet: '$indent1$bullet'",
+ "wrap" => 1,
+ "wrapcol" => - (length $indent2));
+ $trans =~ s/^/$indent1$bullet/s;
+ $trans =~ s/\n(.)/\n$indent2$1/sg;
+ $self->pushline( $trans."\n" );
+ if ($para eq '') {
+ return;
+ } else {
+ # Another bullet
+ $paragraph = $para;
+ goto TEST_BULLET;
+ }
+ }
+ }
+ }
+ }
+ # TODO: detect indented paragraphs
+
+ $self->pushline( $self->translate($paragraph,
+ $self->{ref},
+ "Plain text",
+ "wrap" => $wrap) );
+}
+
+1;
+
+=head1 STATUS OF THIS MODULE
+
+Tested successfully on simple text files and NEWS.Debian files.
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright 2005,2007 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL (see the COPYING file).