iki engine, based on git.ikiwiki.info
summaryrefslogtreecommitdiff
path: root/perl/Locale/Po4a/Text.pm
blob: 676363dd51fe21e60b023f1be78b06d33b039f01 (plain)
  1. #!/usr/bin/perl -w
  2. # Po4a::Text.pm
  3. #
  4. # extract and translate translatable strings from a text documents
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc.,
  19. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. #
  21. ########################################################################
  22. =encoding UTF-8
  23. =head1 NAME
  24. Locale::Po4a::Text - Convert text documents from/to PO files
  25. =head1 DESCRIPTION
  26. The po4a (po for anything) project goal is to ease translations (and more
  27. interestingly, the maintenance of translations) using gettext tools on
  28. areas where they were not expected like documentation.
  29. Locale::Po4a::Text is a module to help the translation of text documents into
  30. other [human] languages.
  31. Paragraphs are split on empty lines (or lines containing only spaces or
  32. tabulations).
  33. If a paragraph contains a line starting by a space (or tabulation), this
  34. paragraph won't be rewrapped.
  35. =cut
  36. package Locale::Po4a::Text;
  37. use 5.006;
  38. use strict;
  39. use warnings;
  40. require Exporter;
  41. use vars qw(@ISA @EXPORT);
  42. @ISA = qw(Locale::Po4a::TransTractor);
  43. @EXPORT = qw();
  44. use Locale::Po4a::TransTractor;
  45. use Locale::Po4a::Common;
  46. =head1 OPTIONS ACCEPTED BY THIS MODULE
  47. These are this module's particular options:
  48. =over
  49. =item B<nobullets>
  50. Deactivate detection of bullets.
  51. By default, when a bullet is detected, the bullet paragraph is not considered
  52. as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
  53. rewrap this paragraph in the generated PO file and in the translation.
  54. =cut
  55. my $bullets = 1;
  56. =item B<tabs>=I<mode>
  57. Specify how tabulations shall be handled. The I<mode> can be any of:
  58. =over
  59. =item B<split>
  60. Lines with tabulations introduce breaks in the current paragraph.
  61. =item B<verbatim>
  62. Paragraph containing tabulations will not be re-wrapped.
  63. =back
  64. By default, tabulations are considered as spaces.
  65. =cut
  66. my $tabs = "";
  67. =item B<breaks>=I<regex>
  68. A regular expression matching lines which introduce breaks.
  69. The regular expression will be anchored so that the whole line must match.
  70. =cut
  71. my $breaks;
  72. =item B<debianchangelog>
  73. Handle the header and footer of
  74. released versions, which only contain non translatable informations.
  75. =cut
  76. my $debianchangelog = 0;
  77. =item B<fortunes>
  78. Handle the fortunes format, which separate fortunes with a line which
  79. consists in '%' or '%%', and use '%%' as the beginning of a comment.
  80. =cut
  81. my $fortunes = 0;
  82. =item B<markdown>
  83. Handle some special markup in Markdown-formatted texts.
  84. =cut
  85. my $markdown = 0;
  86. =item B<asciidoc>
  87. Handle documents in the asciidoc format.
  88. =cut
  89. my $asciidoc = 0;
  90. =back
  91. =cut
  92. sub initialize {
  93. my $self = shift;
  94. my %options = @_;
  95. $self->{options}{'asciidoc'} = 1;
  96. $self->{options}{'breaks'} = 1;
  97. $self->{options}{'debianchangelog'} = 1;
  98. $self->{options}{'debug'} = 1;
  99. $self->{options}{'fortunes'} = 1;
  100. $self->{options}{'markdown'} = 1;
  101. $self->{options}{'nobullets'} = 1;
  102. $self->{options}{'tabs'} = 1;
  103. $self->{options}{'verbose'} = 1;
  104. foreach my $opt (keys %options) {
  105. die wrap_mod("po4a::text",
  106. dgettext("po4a", "Unknown option: %s"), $opt)
  107. unless exists $self->{options}{$opt};
  108. $self->{options}{$opt} = $options{$opt};
  109. }
  110. if (defined $options{'nobullets'}) {
  111. $bullets = 0;
  112. }
  113. if (defined $options{'tabs'}) {
  114. $tabs = $options{'tabs'};
  115. }
  116. if (defined $options{'breaks'}) {
  117. $breaks = $options{'breaks'};
  118. }
  119. if (defined $options{'debianchangelog'}) {
  120. $debianchangelog=1;
  121. }
  122. if (defined $options{'fortunes'}) {
  123. $fortunes=1;
  124. }
  125. if (defined $options{'markdown'}) {
  126. $markdown=1;
  127. }
  128. $asciidoc=1 if (defined $options{'asciidoc'});
  129. }
  130. sub parse {
  131. my $self = shift;
  132. my ($line,$ref);
  133. my $paragraph="";
  134. my $wrapped_mode = 1;
  135. my $expect_header = 1;
  136. my $end_of_paragraph = 0;
  137. ($line,$ref)=$self->shiftline();
  138. my $file = $ref;
  139. $file =~ s/:[0-9]+$//;
  140. while (defined($line)) {
  141. $ref =~ m/^(.*):[0-9]+$/;
  142. if ($1 ne $file) {
  143. $file = $1;
  144. do_paragraph($self,$paragraph,$wrapped_mode);
  145. $paragraph="";
  146. $wrapped_mode = 1;
  147. $expect_header = 1;
  148. }
  149. chomp($line);
  150. $self->{ref}="$ref";
  151. if ($debianchangelog and
  152. $expect_header and
  153. $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
  154. \s+([-+0-9a-z.]+); # distribution
  155. \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
  156. do_paragraph($self,$paragraph,$wrapped_mode);
  157. $paragraph="";
  158. $self->pushline("$line\n");
  159. $expect_header=0;
  160. } elsif ($debianchangelog and
  161. $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
  162. # Found trailer
  163. do_paragraph($self,$paragraph,$wrapped_mode);
  164. $paragraph="";
  165. $self->pushline("$line\n");
  166. $expect_header=1;
  167. } elsif ($fortunes and
  168. $line =~ m/^%%?\s*$/) {
  169. # Found end of fortune
  170. do_paragraph($self,$paragraph,$wrapped_mode);
  171. $self->pushline("\n") unless ( $wrapped_mode == 0
  172. or $paragraph eq "");
  173. $paragraph="";
  174. $wrapped_mode = 1;
  175. $self->pushline("$line\n");
  176. } elsif ( (defined $self->{verbatim})
  177. and ($self->{verbatim} == 2)) {
  178. # Untranslated blocks
  179. $self->pushline($line."\n");
  180. if ($asciidoc and
  181. ($line =~ m/^(\/{4,}|~{4,})$/)) {
  182. undef $self->{verbatim};
  183. undef $self->{type};
  184. $wrapped_mode = 1;
  185. }
  186. } elsif ( ($line =~ /^\s*$/)
  187. or ( defined $breaks
  188. and $line =~ m/^$breaks$/)) {
  189. # Break paragraphs on lines containing only spaces
  190. do_paragraph($self,$paragraph,$wrapped_mode);
  191. $paragraph="";
  192. $wrapped_mode = 1 unless defined($self->{verbatim});
  193. $self->pushline($line."\n");
  194. } elsif ($asciidoc and (not defined($self->{verbatim})) and
  195. ($line =~ m/^(\+|--)$/)) {
  196. # List Item Continuation or List Block
  197. do_paragraph($self,$paragraph,$wrapped_mode);
  198. $paragraph="";
  199. $self->pushline($line."\n");
  200. } elsif ($asciidoc and (not defined($self->{verbatim})) and
  201. ($line =~ m/^(={4,}|-{4,}|~{4,}|\^{4,}|\+{4,})$/) and
  202. (defined($paragraph) )and
  203. ($paragraph =~ m/^[^\n]*\n$/s) and
  204. (length($paragraph) == (length($line)+1))) {
  205. # Found title
  206. $wrapped_mode = 0;
  207. my $level = $line;
  208. $level =~ s/^(.).*$/$1/;
  209. my $t = $self->translate($paragraph,
  210. $self->{ref},
  211. "Title $level",
  212. "wrap" => 0);
  213. $self->pushline($t);
  214. $paragraph="";
  215. $wrapped_mode = 1;
  216. $self->pushline(($level x (length($t)-1))."\n");
  217. } elsif ($asciidoc and
  218. ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/)) {
  219. my $titlelevel1 = $1;
  220. my $titlespaces = $2;
  221. my $title = $3;
  222. my $titlelevel2 = $4||"";
  223. # Found one line title
  224. do_paragraph($self,$paragraph,$wrapped_mode);
  225. $wrapped_mode = 0;
  226. $paragraph="";
  227. my $t = $self->translate($title,
  228. $self->{ref},
  229. "Title $titlelevel1",
  230. "wrap" => 0);
  231. $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
  232. $wrapped_mode = 1;
  233. } elsif ($asciidoc and
  234. ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/)) {
  235. # Found one delimited block
  236. my $t = $line;
  237. $t =~ s/^(.).*$/$1/;
  238. my $type = "delimited block $t";
  239. if (defined $self->{verbatim} and ($self->{type} ne $type)) {
  240. $paragraph .= "$line\n";
  241. } else {
  242. do_paragraph($self,$paragraph,$wrapped_mode);
  243. if ( (defined $self->{type})
  244. and ($self->{type} eq $type)) {
  245. undef $self->{type};
  246. undef $self->{verbatim};
  247. $wrapped_mode = 1;
  248. } else {
  249. if ($t eq "\/") {
  250. # CommentBlock, should not be treated
  251. $self->{verbatim} = 2;
  252. } elsif ($t eq "+") {
  253. # PassthroughBlock
  254. $wrapped_mode = 0;
  255. $self->{verbatim} = 1;
  256. } elsif ($t eq "-") {
  257. # ListingBlock
  258. $wrapped_mode = 0;
  259. $self->{verbatim} = 1;
  260. } elsif ($t eq ".") {
  261. # LiteralBlock
  262. $wrapped_mode = 0;
  263. $self->{verbatim} = 1;
  264. } elsif ($t eq "*") {
  265. # SidebarBlock
  266. $wrapped_mode = 1;
  267. } elsif ($t eq "_") {
  268. # QuoteBlock
  269. if ( (defined $self->{type})
  270. and ($self->{type} eq "verse")) {
  271. $wrapped_mode = 0;
  272. $self->{verbatim} = 1;
  273. } else {
  274. $wrapped_mode = 1;
  275. }
  276. } elsif ($t eq "=") {
  277. # ExampleBlock
  278. $wrapped_mode = 1;
  279. } elsif ($t eq "~") {
  280. # Filter blocks, TBC: not translated
  281. $wrapped_mode = 0;
  282. $self->{verbatim} = 2;
  283. }
  284. $self->{type} = $type;
  285. }
  286. $paragraph="";
  287. $self->pushline($line."\n");
  288. }
  289. } elsif ($asciidoc and not defined $self->{verbatim} and
  290. ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
  291. # Found BlockId
  292. do_paragraph($self,$paragraph,$wrapped_mode);
  293. $paragraph="";
  294. $wrapped_mode = 1;
  295. $self->pushline($line."\n");
  296. undef $self->{bullet};
  297. undef $self->{indent};
  298. } elsif ($asciidoc and not defined $self->{verbatim} and
  299. ($paragraph eq "") and
  300. ($line =~ m/^((?:NOTE|TIP|IMPORTANT|WARNING|CAUTION):\s+)(.*)$/)) {
  301. my $type = $1;
  302. my $text = $2;
  303. do_paragraph($self,$paragraph,$wrapped_mode);
  304. $paragraph=$text."\n";
  305. $wrapped_mode = 1;
  306. $self->pushline($type);
  307. undef $self->{bullet};
  308. undef $self->{indent};
  309. } elsif ($asciidoc and not defined $self->{verbatim} and
  310. ($line =~ m/^\[(NOTE|TIP|IMPORTANT|WARNING|CAUTION|verse|quote)\]$/)) {
  311. my $type = $1;
  312. do_paragraph($self,$paragraph,$wrapped_mode);
  313. $paragraph="";
  314. $wrapped_mode = 1;
  315. $self->pushline($line."\n");
  316. if ($type eq "verse") {
  317. $wrapped_mode = 0;
  318. }
  319. undef $self->{bullet};
  320. undef $self->{indent};
  321. } elsif ($asciidoc and not defined $self->{verbatim} and
  322. ($line =~ m/^\[(verse|quote), +(.*)\]$/)) {
  323. my $type = $1;
  324. my $arg = $2;
  325. do_paragraph($self,$paragraph,$wrapped_mode);
  326. $paragraph="";
  327. my $t = $self->translate($arg,
  328. $self->{ref},
  329. "$type",
  330. "wrap" => 0);
  331. $self->pushline("[$type, $t]\n");
  332. $wrapped_mode = 1;
  333. if ($type eq "verse") {
  334. $wrapped_mode = 0;
  335. }
  336. $self->{type} = $type;
  337. undef $self->{bullet};
  338. undef $self->{indent};
  339. } elsif ($asciidoc and not defined $self->{verbatim} and
  340. ($line =~ m/^\[icon="(.*)"\]$/)) {
  341. my $arg = $1;
  342. do_paragraph($self,$paragraph,$wrapped_mode);
  343. $paragraph="";
  344. my $t = $self->translate($arg,
  345. $self->{ref},
  346. "icon",
  347. "wrap" => 0);
  348. $self->pushline("[icon=\"$t\"]\n");
  349. $wrapped_mode = 1;
  350. undef $self->{bullet};
  351. undef $self->{indent};
  352. } elsif ($asciidoc and not defined $self->{verbatim} and
  353. ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
  354. my $arg = $1;
  355. do_paragraph($self,$paragraph,$wrapped_mode);
  356. $paragraph="";
  357. my $t = $self->translate($arg,
  358. $self->{ref},
  359. "caption",
  360. "wrap" => 0);
  361. $self->pushline("[icons=None, caption=\"$t\"]\n");
  362. $wrapped_mode = 1;
  363. undef $self->{bullet};
  364. undef $self->{indent};
  365. } elsif ($asciidoc and not defined $self->{verbatim} and
  366. ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
  367. my $indent = $1;
  368. my $label = $2;
  369. my $labelend = $3;
  370. # Found labeled list
  371. do_paragraph($self,$paragraph,$wrapped_mode);
  372. $paragraph="";
  373. $wrapped_mode = 1;
  374. $self->{bullet} = "";
  375. $self->{indent} = $indent;
  376. my $t = $self->translate($label,
  377. $self->{ref},
  378. "Labeled list",
  379. "wrap" => 0);
  380. $self->pushline("$indent$t$labelend\n");
  381. } elsif ($asciidoc and not defined $self->{verbatim} and
  382. ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
  383. my $indent = $1;
  384. my $label = $2;
  385. my $labelend = $3;
  386. my $labeltext = $4;
  387. # Found Horizontal Labeled Lists
  388. do_paragraph($self,$paragraph,$wrapped_mode);
  389. $paragraph=$labeltext."\n";
  390. $wrapped_mode = 1;
  391. $self->{bullet} = "";
  392. $self->{indent} = $indent;
  393. my $t = $self->translate($label,
  394. $self->{ref},
  395. "Labeled list",
  396. "wrap" => 0);
  397. $self->pushline("$indent$t$labelend");
  398. } elsif ($asciidoc and not defined $self->{verbatim} and
  399. ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
  400. my $attrname = $1;
  401. my $attrsep = $2;
  402. my $attrvalue = $3;
  403. # Found a Attribute entry
  404. do_paragraph($self,$paragraph,$wrapped_mode);
  405. $paragraph="";
  406. $wrapped_mode = 1;
  407. undef $self->{bullet};
  408. undef $self->{indent};
  409. my $t = $self->translate($attrvalue,
  410. $self->{ref},
  411. "Attribute :$attrname:",
  412. "wrap" => 0);
  413. $self->pushline(":$attrname$attrsep$t\n");
  414. } elsif ($asciidoc and not defined $self->{verbatim} and
  415. ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
  416. my $title = $1;
  417. # Found block title
  418. do_paragraph($self,$paragraph,$wrapped_mode);
  419. $paragraph="";
  420. $wrapped_mode = 1;
  421. undef $self->{bullet};
  422. undef $self->{indent};
  423. my $t = $self->translate($title,
  424. $self->{ref},
  425. "Block title",
  426. "wrap" => 0);
  427. $self->pushline(".$t\n");
  428. } elsif ($asciidoc and not defined $self->{verbatim} and
  429. ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
  430. my $indent = $1||"";
  431. my $bullet = $2;
  432. my $text = $3;
  433. do_paragraph($self,$paragraph,$wrapped_mode);
  434. $paragraph = $text."\n";
  435. $self->{indent} = $indent;
  436. $self->{bullet} = $bullet;
  437. } elsif ($asciidoc and not defined $self->{verbatim} and
  438. ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
  439. my $bullet = $1;
  440. my $text = $2;
  441. do_paragraph($self,$paragraph,$wrapped_mode);
  442. $paragraph = $text."\n";
  443. $self->{indent} = "";
  444. $self->{bullet} = $bullet;
  445. } elsif ($asciidoc and not defined $self->{verbatim} and
  446. (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
  447. my $indent = $1;
  448. my $text = $2;