- #!/usr/bin/perl -w
- # Po4a::Text.pm
- #
- # extract and translate translatable strings from a text documents
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc.,
- # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- #
- ########################################################################
- =encoding UTF-8
- =head1 NAME
- Locale::Po4a::Text - Convert text documents from/to PO files
- =head1 DESCRIPTION
- The po4a (po for anything) project goal is to ease translations (and more
- interestingly, the maintenance of translations) using gettext tools on
- areas where they were not expected like documentation.
- Locale::Po4a::Text is a module to help the translation of text documents into
- other [human] languages.
- Paragraphs are split on empty lines (or lines containing only spaces or
- tabulations).
- If a paragraph contains a line starting by a space (or tabulation), this
- paragraph won't be rewrapped.
- =cut
- package Locale::Po4a::Text;
- use 5.006;
- use strict;
- use warnings;
- require Exporter;
- use vars qw(@ISA @EXPORT);
- @ISA = qw(Locale::Po4a::TransTractor);
- @EXPORT = qw();
- use Locale::Po4a::TransTractor;
- use Locale::Po4a::Common;
- =head1 OPTIONS ACCEPTED BY THIS MODULE
- These are this module's particular options:
- =over
- =item B<nobullets>
- Deactivate detection of bullets.
- By default, when a bullet is detected, the bullet paragraph is not considered
- as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
- rewrap this paragraph in the generated PO file and in the translation.
- =cut
- my $bullets = 1;
- =item B<tabs>=I<mode>
- Specify how tabulations shall be handled. The I<mode> can be any of:
- =over
- =item B<split>
- Lines with tabulations introduce breaks in the current paragraph.
- =item B<verbatim>
- Paragraph containing tabulations will not be re-wrapped.
- =back
- By default, tabulations are considered as spaces.
- =cut
- my $tabs = "";
- =item B<breaks>=I<regex>
- A regular expression matching lines which introduce breaks.
- The regular expression will be anchored so that the whole line must match.
- =cut
- my $breaks;
- =item B<debianchangelog>
- Handle the header and footer of
- released versions, which only contain non translatable informations.
- =cut
- my $debianchangelog = 0;
- =item B<fortunes>
- Handle the fortunes format, which separate fortunes with a line which
- consists in '%' or '%%', and use '%%' as the beginning of a comment.
- =cut
- my $fortunes = 0;
- =item B<markdown>
- Handle some special markup in Markdown-formatted texts.
- =cut
- my $markdown = 0;
- =item B<asciidoc>
- Handle documents in the asciidoc format.
- =cut
- my $asciidoc = 0;
- =back
- =cut
- sub initialize {
- my $self = shift;
- my %options = @_;
- $self->{options}{'asciidoc'} = 1;
- $self->{options}{'breaks'} = 1;
- $self->{options}{'debianchangelog'} = 1;
- $self->{options}{'debug'} = 1;
- $self->{options}{'fortunes'} = 1;
- $self->{options}{'markdown'} = 1;
- $self->{options}{'nobullets'} = 1;
- $self->{options}{'tabs'} = 1;
- $self->{options}{'verbose'} = 1;
- foreach my $opt (keys %options) {
- die wrap_mod("po4a::text",
- dgettext("po4a", "Unknown option: %s"), $opt)
- unless exists $self->{options}{$opt};
- $self->{options}{$opt} = $options{$opt};
- }
- if (defined $options{'nobullets'}) {
- $bullets = 0;
- }
- if (defined $options{'tabs'}) {
- $tabs = $options{'tabs'};
- }
- if (defined $options{'breaks'}) {
- $breaks = $options{'breaks'};
- }
- if (defined $options{'debianchangelog'}) {
- $debianchangelog=1;
- }
- if (defined $options{'fortunes'}) {
- $fortunes=1;
- }
- if (defined $options{'markdown'}) {
- $markdown=1;
- }
- $asciidoc=1 if (defined $options{'asciidoc'});
- }
- sub parse {
- my $self = shift;
- my ($line,$ref);
- my $paragraph="";
- my $wrapped_mode = 1;
- my $expect_header = 1;
- my $end_of_paragraph = 0;
- ($line,$ref)=$self->shiftline();
- my $file = $ref;
- $file =~ s/:[0-9]+$//;
- while (defined($line)) {
- $ref =~ m/^(.*):[0-9]+$/;
- if ($1 ne $file) {
- $file = $1;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- $expect_header = 1;
- }
- chomp($line);
- $self->{ref}="$ref";
- if ($debianchangelog and
- $expect_header and
- $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
- \s+([-+0-9a-z.]+); # distribution
- \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $self->pushline("$line\n");
- $expect_header=0;
- } elsif ($debianchangelog and
- $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
- # Found trailer
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $self->pushline("$line\n");
- $expect_header=1;
- } elsif ($fortunes and
- $line =~ m/^%%?\s*$/) {
- # Found end of fortune
- do_paragraph($self,$paragraph,$wrapped_mode);
- $self->pushline("\n") unless ( $wrapped_mode == 0
- or $paragraph eq "");
- $paragraph="";
- $wrapped_mode = 1;
- $self->pushline("$line\n");
- } elsif ( (defined $self->{verbatim})
- and ($self->{verbatim} == 2)) {
- # Untranslated blocks
- $self->pushline($line."\n");
- if ($asciidoc and
- ($line =~ m/^(\/{4,}|~{4,})$/)) {
- undef $self->{verbatim};
- undef $self->{type};
- $wrapped_mode = 1;
- }
- } elsif ( ($line =~ /^\s*$/)
- or ( defined $breaks
- and $line =~ m/^$breaks$/)) {
- # Break paragraphs on lines containing only spaces
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1 unless defined($self->{verbatim});
- $self->pushline($line."\n");
- } elsif ($asciidoc and (not defined($self->{verbatim})) and
- ($line =~ m/^(\+|--)$/)) {
- # List Item Continuation or List Block
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $self->pushline($line."\n");
- } elsif ($asciidoc and (not defined($self->{verbatim})) and
- ($line =~ m/^(={4,}|-{4,}|~{4,}|\^{4,}|\+{4,})$/) and
- (defined($paragraph) )and
- ($paragraph =~ m/^[^\n]*\n$/s) and
- (length($paragraph) == (length($line)+1))) {
- # Found title
- $wrapped_mode = 0;
- my $level = $line;
- $level =~ s/^(.).*$/$1/;
- my $t = $self->translate($paragraph,
- $self->{ref},
- "Title $level",
- "wrap" => 0);
- $self->pushline($t);
- $paragraph="";
- $wrapped_mode = 1;
- $self->pushline(($level x (length($t)-1))."\n");
- } elsif ($asciidoc and
- ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/)) {
- my $titlelevel1 = $1;
- my $titlespaces = $2;
- my $title = $3;
- my $titlelevel2 = $4||"";
- # Found one line title
- do_paragraph($self,$paragraph,$wrapped_mode);
- $wrapped_mode = 0;
- $paragraph="";
- my $t = $self->translate($title,
- $self->{ref},
- "Title $titlelevel1",
- "wrap" => 0);
- $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
- $wrapped_mode = 1;
- } elsif ($asciidoc and
- ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/)) {
- # Found one delimited block
- my $t = $line;
- $t =~ s/^(.).*$/$1/;
- my $type = "delimited block $t";
- if (defined $self->{verbatim} and ($self->{type} ne $type)) {
- $paragraph .= "$line\n";
- } else {
- do_paragraph($self,$paragraph,$wrapped_mode);
- if ( (defined $self->{type})
- and ($self->{type} eq $type)) {
- undef $self->{type};
- undef $self->{verbatim};
- $wrapped_mode = 1;
- } else {
- if ($t eq "\/") {
- # CommentBlock, should not be treated
- $self->{verbatim} = 2;
- } elsif ($t eq "+") {
- # PassthroughBlock
- $wrapped_mode = 0;
- $self->{verbatim} = 1;
- } elsif ($t eq "-") {
- # ListingBlock
- $wrapped_mode = 0;
- $self->{verbatim} = 1;
- } elsif ($t eq ".") {
- # LiteralBlock
- $wrapped_mode = 0;
- $self->{verbatim} = 1;
- } elsif ($t eq "*") {
- # SidebarBlock
- $wrapped_mode = 1;
- } elsif ($t eq "_") {
- # QuoteBlock
- if ( (defined $self->{type})
- and ($self->{type} eq "verse")) {
- $wrapped_mode = 0;
- $self->{verbatim} = 1;
- } else {
- $wrapped_mode = 1;
- }
- } elsif ($t eq "=") {
- # ExampleBlock
- $wrapped_mode = 1;
- } elsif ($t eq "~") {
- # Filter blocks, TBC: not translated
- $wrapped_mode = 0;
- $self->{verbatim} = 2;
- }
- $self->{type} = $type;
- }
- $paragraph="";
- $self->pushline($line."\n");
- }
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
- # Found BlockId
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- $self->pushline($line."\n");
- undef $self->{bullet};
- undef $self->{indent};
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($paragraph eq "") and
- ($line =~ m/^((?:NOTE|TIP|IMPORTANT|WARNING|CAUTION):\s+)(.*)$/)) {
- my $type = $1;
- my $text = $2;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph=$text."\n";
- $wrapped_mode = 1;
- $self->pushline($type);
- undef $self->{bullet};
- undef $self->{indent};
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^\[(NOTE|TIP|IMPORTANT|WARNING|CAUTION|verse|quote)\]$/)) {
- my $type = $1;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- $self->pushline($line."\n");
- if ($type eq "verse") {
- $wrapped_mode = 0;
- }
- undef $self->{bullet};
- undef $self->{indent};
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^\[(verse|quote), +(.*)\]$/)) {
- my $type = $1;
- my $arg = $2;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- my $t = $self->translate($arg,
- $self->{ref},
- "$type",
- "wrap" => 0);
- $self->pushline("[$type, $t]\n");
- $wrapped_mode = 1;
- if ($type eq "verse") {
- $wrapped_mode = 0;
- }
- $self->{type} = $type;
- undef $self->{bullet};
- undef $self->{indent};
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^\[icon="(.*)"\]$/)) {
- my $arg = $1;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- my $t = $self->translate($arg,
- $self->{ref},
- "icon",
- "wrap" => 0);
- $self->pushline("[icon=\"$t\"]\n");
- $wrapped_mode = 1;
- undef $self->{bullet};
- undef $self->{indent};
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
- my $arg = $1;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- my $t = $self->translate($arg,
- $self->{ref},
- "caption",
- "wrap" => 0);
- $self->pushline("[icons=None, caption=\"$t\"]\n");
- $wrapped_mode = 1;
- undef $self->{bullet};
- undef $self->{indent};
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
- my $indent = $1;
- my $label = $2;
- my $labelend = $3;
- # Found labeled list
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- $self->{bullet} = "";
- $self->{indent} = $indent;
- my $t = $self->translate($label,
- $self->{ref},
- "Labeled list",
- "wrap" => 0);
- $self->pushline("$indent$t$labelend\n");
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
- my $indent = $1;
- my $label = $2;
- my $labelend = $3;
- my $labeltext = $4;
- # Found Horizontal Labeled Lists
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph=$labeltext."\n";
- $wrapped_mode = 1;
- $self->{bullet} = "";
- $self->{indent} = $indent;
- my $t = $self->translate($label,
- $self->{ref},
- "Labeled list",
- "wrap" => 0);
- $self->pushline("$indent$t$labelend");
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
- my $attrname = $1;
- my $attrsep = $2;
- my $attrvalue = $3;
- # Found a Attribute entry
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- undef $self->{bullet};
- undef $self->{indent};
- my $t = $self->translate($attrvalue,
- $self->{ref},
- "Attribute :$attrname:",
- "wrap" => 0);
- $self->pushline(":$attrname$attrsep$t\n");
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
- my $title = $1;
- # Found block title
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- undef $self->{bullet};
- undef $self->{indent};
- my $t = $self->translate($title,
- $self->{ref},
- "Block title",
- "wrap" => 0);
- $self->pushline(".$t\n");
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
- my $indent = $1||"";
- my $bullet = $2;
- my $text = $3;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph = $text."\n";
- $self->{indent} = $indent;
- $self->{bullet} = $bullet;
- } elsif ($asciidoc and not defined $self->{verbatim} and
- ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
- my $bullet = $1;
- my $text = $2;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph = $text."\n";
- $self->{indent} = "";
- $self->{bullet} = $bullet;
- } elsif ($asciidoc and not defined $self->{verbatim} and
- (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
- my $indent = $1;
- my $text = $2;
|