summaryrefslogtreecommitdiff
path: root/Locale/Po4a/Text.pm
blob: 02cae98a99197c3ece2bda154dc6674bf6de6e4d (plain)
  1. #!/usr/bin/perl -w
  2. # Po4a::Text.pm
  3. #
  4. # extract and translate translatable strings from a text documents
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc.,
  19. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. #
  21. ########################################################################
  22. =encoding UTF-8
  23. =head1 NAME
  24. Locale::Po4a::Text - convert text documents from/to PO files
  25. =head1 DESCRIPTION
  26. The po4a (PO for anything) project goal is to ease translations (and more
  27. interestingly, the maintenance of translations) using gettext tools on
  28. areas where they were not expected like documentation.
  29. Locale::Po4a::Text is a module to help the translation of text documents into
  30. other [human] languages.
  31. Paragraphs are split on empty lines (or lines containing only spaces or
  32. tabulations).
  33. If a paragraph contains a line starting by a space (or tabulation), this
  34. paragraph won't be rewrapped.
  35. =cut
  36. package Locale::Po4a::Text;
  37. use 5.006;
  38. use strict;
  39. use warnings;
  40. require Exporter;
  41. use vars qw(@ISA @EXPORT);
  42. @ISA = qw(Locale::Po4a::TransTractor);
  43. @EXPORT = qw();
  44. use Locale::Po4a::TransTractor;
  45. use Locale::Po4a::Common;
  46. =head1 OPTIONS ACCEPTED BY THIS MODULE
  47. These are this module's particular options:
  48. =over
  49. =item B<nobullets>
  50. Deactivate detection of bullets.
  51. By default, when a bullet is detected, the bullet paragraph is not considered
  52. as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
  53. rewrap this paragraph in the generated PO file and in the translation.
  54. =cut
  55. my $bullets = 1;
  56. =item B<tabs=>I<mode>
  57. Specify how tabulations shall be handled. The I<mode> can be any of:
  58. =over
  59. =item B<split>
  60. Lines with tabulations introduce breaks in the current paragraph.
  61. =item B<verbatim>
  62. Paragraph containing tabulations will not be re-wrapped.
  63. =back
  64. By default, tabulations are considered as spaces.
  65. =cut
  66. my $tabs = "";
  67. =item B<breaks=>I<regex>
  68. A regular expression matching lines which introduce breaks.
  69. The regular expression will be anchored so that the whole line must match.
  70. =cut
  71. my $breaks;
  72. =item B<debianchangelog>
  73. Handle the header and footer of
  74. released versions, which only contain non translatable informations.
  75. =cut
  76. my $debianchangelog = 0;
  77. =item B<fortunes>
  78. Handle the fortunes format, which separate fortunes with a line which
  79. consists in '%' or '%%', and use '%%' as the beginning of a comment.
  80. =cut
  81. my $fortunes = 0;
  82. =item B<markdown>
  83. Handle some special markup in Markdown-formatted texts.
  84. =cut
  85. my $markdown = 0;
  86. =item B<asciidoc>
  87. Handle documents in the AsciiDoc format.
  88. =cut
  89. my $asciidoc = 0;
  90. =back
  91. =cut
  92. sub initialize {
  93. my $self = shift;
  94. my %options = @_;
  95. $self->{options}{'asciidoc'} = 1;
  96. $self->{options}{'breaks'} = 1;
  97. $self->{options}{'debianchangelog'} = 1;
  98. $self->{options}{'debug'} = 1;
  99. $self->{options}{'fortunes'} = 1;
  100. $self->{options}{'markdown'} = 1;
  101. $self->{options}{'nobullets'} = 1;
  102. $self->{options}{'tabs'} = 1;
  103. $self->{options}{'verbose'} = 1;
  104. foreach my $opt (keys %options) {
  105. die wrap_mod("po4a::text",
  106. dgettext("po4a", "Unknown option: %s"), $opt)
  107. unless exists $self->{options}{$opt};
  108. $self->{options}{$opt} = $options{$opt};
  109. }
  110. if (defined $options{'nobullets'}) {
  111. $bullets = 0;
  112. }
  113. if (defined $options{'tabs'}) {
  114. $tabs = $options{'tabs'};
  115. }
  116. if (defined $options{'breaks'}) {
  117. $breaks = $options{'breaks'};
  118. }
  119. if (defined $options{'debianchangelog'}) {
  120. $debianchangelog=1;
  121. }
  122. if (defined $options{'fortunes'}) {
  123. $fortunes=1;
  124. }
  125. if (defined $options{'markdown'}) {
  126. $markdown=1;
  127. }
  128. $asciidoc=1 if (defined $options{'asciidoc'});
  129. }
  130. sub parse {
  131. my $self = shift;
  132. my ($line,$ref);
  133. my $paragraph="";
  134. my $wrapped_mode = 1;
  135. my $expect_header = 1;
  136. my $end_of_paragraph = 0;
  137. ($line,$ref)=$self->shiftline();
  138. my $file = $ref;
  139. $file =~ s/:[0-9]+$// if defined($line);
  140. while (defined($line)) {
  141. $ref =~ m/^(.*):[0-9]+$/;
  142. if ($1 ne $file) {
  143. $file = $1;
  144. do_paragraph($self,$paragraph,$wrapped_mode);
  145. $paragraph="";
  146. $wrapped_mode = 1;
  147. $expect_header = 1;
  148. }
  149. chomp($line);
  150. $self->{ref}="$ref";
  151. if ($debianchangelog and
  152. $expect_header and
  153. $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
  154. \s+([-+0-9a-z.]+); # distribution
  155. \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
  156. do_paragraph($self,$paragraph,$wrapped_mode);
  157. $paragraph="";
  158. $self->pushline("$line\n");
  159. $expect_header=0;
  160. } elsif ($debianchangelog and
  161. $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
  162. # Found trailer
  163. do_paragraph($self,$paragraph,$wrapped_mode);
  164. $paragraph="";
  165. $self->pushline("$line\n");
  166. $expect_header=1;
  167. } elsif ($fortunes and
  168. $line =~ m/^%%?\s*$/) {
  169. # Found end of fortune
  170. do_paragraph($self,$paragraph,$wrapped_mode);
  171. $self->pushline("\n") unless ( $wrapped_mode == 0
  172. or $paragraph eq "");
  173. $paragraph="";
  174. $wrapped_mode = 1;
  175. $self->pushline("$line\n");
  176. } elsif ( (defined $self->{verbatim})
  177. and ($self->{verbatim} == 2)) {
  178. # Untranslated blocks
  179. $self->pushline($line."\n");
  180. if ($asciidoc and
  181. ($line =~ m/^(\/{4,}|~{4,})$/)) {
  182. undef $self->{verbatim};
  183. undef $self->{type};
  184. $wrapped_mode = 1;
  185. }
  186. } elsif ( ($line =~ /^\s*$/)
  187. or ( defined $breaks
  188. and $line =~ m/^$breaks$/)) {
  189. # Break paragraphs on lines containing only spaces
  190. do_paragraph($self,$paragraph,$wrapped_mode);
  191. $paragraph="";
  192. $wrapped_mode = 1 unless defined($self->{verbatim});
  193. $self->pushline($line."\n");
  194. } elsif ($asciidoc and (not defined($self->{verbatim})) and
  195. ($line =~ m/^(\+|--)$/)) {
  196. # List Item Continuation or List Block
  197. do_paragraph($self,$paragraph,$wrapped_mode);
  198. $paragraph="";
  199. $self->pushline($line."\n");
  200. } elsif ($asciidoc and (not defined($self->{verbatim})) and
  201. ($line =~ m/^(={4,}|-{4,}|~{4,}|\^{4,}|\+{4,})$/) and
  202. (defined($paragraph) )and
  203. ($paragraph =~ m/^[^\n]*\n$/s) and
  204. (length($paragraph) == (length($line)+1))) {
  205. # Found title
  206. $wrapped_mode = 0;
  207. my $level = $line;
  208. $level =~ s/^(.).*$/$1/;
  209. $paragraph =~ s/\n$//s;
  210. my $t = $self->translate($paragraph,
  211. $self->{ref},
  212. "Title $level",
  213. "wrap" => 0);
  214. $self->pushline($t."\n");
  215. $paragraph="";
  216. $wrapped_mode = 1;
  217. $self->pushline(($level x (length($t)))."\n");
  218. } elsif ($asciidoc and
  219. ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/)) {
  220. my $titlelevel1 = $1;
  221. my $titlespaces = $2;
  222. my $title = $3;
  223. my $titlelevel2 = $4||"";
  224. # Found one line title
  225. do_paragraph($self,$paragraph,$wrapped_mode);
  226. $wrapped_mode = 0;
  227. $paragraph="";
  228. my $t = $self->translate($title,
  229. $self->{ref},
  230. "Title $titlelevel1",
  231. "wrap" => 0);
  232. $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
  233. $wrapped_mode = 1;
  234. } elsif ($asciidoc and
  235. ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/)) {
  236. # Found one delimited block
  237. my $t = $line;
  238. $t =~ s/^(.).*$/$1/;
  239. my $type = "delimited block $t";
  240. if (defined $self->{verbatim} and ($self->{type} ne $type)) {
  241. $paragraph .= "$line\n";
  242. } else {
  243. do_paragraph($self,$paragraph,$wrapped_mode);
  244. if ( (defined $self->{type})
  245. and ($self->{type} eq $type)) {
  246. undef $self->{type};
  247. undef $self->{verbatim};
  248. $wrapped_mode = 1;
  249. } else {
  250. if ($t eq "\/") {
  251. # CommentBlock, should not be treated
  252. $self->{verbatim} = 2;
  253. } elsif ($t eq "+") {
  254. # PassthroughBlock
  255. $wrapped_mode = 0;
  256. $self->{verbatim} = 1;
  257. } elsif ($t eq "-") {
  258. # ListingBlock
  259. $wrapped_mode = 0;
  260. $self->{verbatim} = 1;
  261. } elsif ($t eq ".") {
  262. # LiteralBlock
  263. $wrapped_mode = 0;
  264. $self->{verbatim} = 1;
  265. } elsif ($t eq "*") {
  266. # SidebarBlock
  267. $wrapped_mode = 1;
  268. } elsif ($t eq "_") {
  269. # QuoteBlock
  270. if ( (defined $self->{type})
  271. and ($self->{type} eq "verse")) {
  272. $wrapped_mode = 0;
  273. $self->{verbatim} = 1;
  274. } else {
  275. $wrapped_mode = 1;
  276. }
  277. } elsif ($t eq "=") {
  278. # ExampleBlock
  279. $wrapped_mode = 1;
  280. } elsif ($t eq "~") {
  281. # Filter blocks, TBC: not translated
  282. $wrapped_mode = 0;
  283. $self->{verbatim} = 2;
  284. }
  285. $self->{type} = $type;
  286. }
  287. $paragraph="";
  288. $self->pushline($line."\n");
  289. }
  290. } elsif ($asciidoc and not defined $self->{verbatim} and
  291. ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
  292. # Found BlockId
  293. do_paragraph($self,$paragraph,$wrapped_mode);
  294. $paragraph="";
  295. $wrapped_mode = 1;
  296. $self->pushline($line."\n");
  297. undef $self->{bullet};
  298. undef $self->{indent};
  299. } elsif ($asciidoc and not defined $self->{verbatim} and
  300. ($paragraph eq "") and
  301. ($line =~ m/^((?:NOTE|TIP|IMPORTANT|WARNING|CAUTION):\s+)(.*)$/)) {
  302. my $type = $1;
  303. my $text = $2;
  304. do_paragraph($self,$paragraph,$wrapped_mode);
  305. $paragraph=$text."\n";
  306. $wrapped_mode = 1;
  307. $self->pushline($type);
  308. undef $self->{bullet};
  309. undef $self->{indent};
  310. } elsif ($asciidoc and not defined $self->{verbatim} and
  311. ($line =~ m/^\[(NOTE|TIP|IMPORTANT|WARNING|CAUTION|verse|quote)\]$/)) {
  312. my $type = $1;
  313. do_paragraph($self,$paragraph,$wrapped_mode);
  314. $paragraph="";
  315. $wrapped_mode = 1;
  316. $self->pushline($line."\n");
  317. if ($type eq "verse") {
  318. $wrapped_mode = 0;
  319. }
  320. undef $self->{bullet};
  321. undef $self->{indent};
  322. } elsif ($asciidoc and not defined $self->{verbatim} and
  323. ($line =~ m/^\[(verse|quote), +(.*)\]$/)) {
  324. my $type = $1;
  325. my $arg = $2;
  326. do_paragraph($self,$paragraph,$wrapped_mode);
  327. $paragraph="";
  328. my $t = $self->translate($arg,
  329. $self->{ref},
  330. "$type",
  331. "wrap" => 0);
  332. $self->pushline("[$type, $t]\n");
  333. $wrapped_mode = 1;
  334. if ($type eq "verse") {
  335. $wrapped_mode = 0;
  336. }
  337. $self->{type} = $type;
  338. undef $self->{bullet};
  339. undef $self->{indent};
  340. } elsif ($asciidoc and not defined $self->{verbatim} and
  341. ($line =~ m/^\[icon="(.*)"\]$/)) {
  342. my $arg = $1;
  343. do_paragraph($self,$paragraph,$wrapped_mode);
  344. $paragraph="";
  345. my $t = $self->translate($arg,
  346. $self->{ref},
  347. "icon",
  348. "wrap" => 0);
  349. $self->pushline("[icon=\"$t\"]\n");
  350. $wrapped_mode = 1;
  351. undef $self->{bullet};
  352. undef $self->{indent};
  353. } elsif ($asciidoc and not defined $self->{verbatim} and
  354. ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
  355. my $arg = $1;
  356. do_paragraph($self,$paragraph,$wrapped_mode);
  357. $paragraph="";
  358. my $t = $self->translate($arg,
  359. $self->{ref},
  360. "caption",
  361. "wrap" => 0);
  362. $self->pushline("[icons=None, caption=\"$t\"]\n");
  363. $wrapped_mode = 1;
  364. undef $self->{bullet};
  365. undef $self->{indent};
  366. } elsif ($asciidoc and not defined $self->{verbatim} and
  367. ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
  368. my $indent = $1;
  369. my $label = $2;
  370. my $labelend = $3;
  371. # Found labeled list
  372. do_paragraph($self,$paragraph,$wrapped_mode);
  373. $paragraph="";
  374. $wrapped_mode = 1;
  375. $self->{bullet} = "";
  376. $self->{indent} = $indent;
  377. my $t = $self->translate($label,
  378. $self->{ref},
  379. "Labeled list",
  380. "wrap" => 0);
  381. $self->pushline("$indent$t$labelend\n");
  382. } elsif ($asciidoc and not defined $self->{verbatim} and
  383. ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
  384. my $indent = $1;
  385. my $label = $2;
  386. my $labelend = $3;
  387. my $labeltext = $4;
  388. # Found Horizontal Labeled Lists
  389. do_paragraph($self,$paragraph,$wrapped_mode);
  390. $paragraph=$labeltext."\n";
  391. $wrapped_mode = 1;
  392. $self->{bullet} = "";
  393. $self->{indent} = $indent;
  394. my $t = $self->translate($label,
  395. $self->{ref},
  396. "Labeled list",
  397. "wrap" => 0);
  398. $self->pushline("$indent$t$labelend");
  399. } elsif ($asciidoc and not defined $self->{verbatim} and
  400. ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
  401. my $attrname = $1;
  402. my $attrsep = $2;
  403. my $attrvalue = $3;
  404. # Found a Attribute entry
  405. do_paragraph($self,$paragraph,$wrapped_mode);
  406. $paragraph="";
  407. $wrapped_mode = 1;
  408. undef $self->{bullet};
  409. undef $self->{indent};
  410. my $t = $self->translate($attrvalue,
  411. $self->{ref},
  412. "Attribute :$attrname:",
  413. "wrap" => 0);
  414. $self->pushline(":$attrname$attrsep$t\n");
  415. } elsif ($asciidoc and not defined $self->{verbatim} and
  416. ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
  417. my $title = $1;
  418. # Found block title
  419. do_paragraph($self,$paragraph,$wrapped_mode);
  420. $paragraph="";
  421. $wrapped_mode = 1;
  422. undef $self->{bullet};
  423. undef $self->{indent};
  424. my $t = $self->translate($title,
  425. $self->{ref},
  426. "Block title",
  427. "wrap" => 0);
  428. $self->pushline(".$t\n");
  429. } elsif ($asciidoc and not defined $self->{verbatim} and
  430. ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
  431. my $indent = $1||"";
  432. my $bullet = $2;
  433. my $text = $3;
  434. do_paragraph($self,$paragraph,$wrapped_mode);
  435. $paragraph = $text."\n";
  436. $self->{indent} = $indent;
  437. $self->{bullet} = $bullet;
  438. } elsif ($asciidoc and not defined $self->{verbatim} and
  439. ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
  440. my $bullet = $1;
  441. my $text = $2;
  442. do_paragraph($self,$paragraph,$wrapped_mode);
  443. $paragraph = $text."\n";
  444. $self->{indent} = "";
  445. $self->{bullet} = $bullet;
  446. } elsif ($asciidoc and not defined $self->{verbatim} and
  447. (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
  448. my $indent = $1;
  449. my $text = $2;
  450. if (not defined $self->{indent}) {
  451. $paragraph .= $text."\n";
  452. $self->{indent} = $indent;
  453. } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {
  454. $paragraph .= $text."\n";
  455. } else {
  456. do_paragraph($self,$paragraph,$wrapped_mode);
  457. $paragraph = $text."\n";
  458. $self->{indent} = $indent;
  459. $self->{bullet} = "";
  460. }
  461. } elsif ($markdown and
  462. (not defined($self->{verbatim})) and
  463. ($line =~ m/^(={4,}|-{4,})$/) and
  464. (defined($paragraph) )and
  465. ($paragraph =~ m/^[^\n]*\n$/s) and
  466. (length($paragraph) == (length($line)+1))) {
  467. # XXX: There can be any number of underlining according
  468. # to the documentation. This detection, which avoid
  469. # translating the formatting, is only supported if
  470. # the underlining has the same size as the header text.
  471. # Found title
  472. $wrapped_mode = 0;
  473. my $level = $line;
  474. $level =~ s/^(.).*$/$1/;
  475. my $t = $self->translate($paragraph,
  476. $self->{ref},
  477. "Title $level",
  478. "wrap" => 0);
  479. $self->pushline($t);
  480. $paragraph="";
  481. $wrapped_mode = 1;
  482. $self->pushline(($level x (length($t)-1))."\n");
  483. } elsif ($markdown and
  484. ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/)) {
  485. my $titlelevel1 = $1;
  486. my $titlespaces = $2;
  487. my $title = $3;
  488. my $titlelevel2 = $4||"";
  489. # Found one line title
  490. do_paragraph($self,$paragraph,$wrapped_mode);
  491. $wrapped_mode = 0;
  492. $paragraph="";
  493. my $t = $self->translate($title,
  494. $self->{ref},
  495. "Title $titlelevel1",
  496. "wrap" => 0);
  497. $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
  498. $wrapped_mode = 1;
  499. } elsif ($markdown and
  500. ($paragraph eq "") and
  501. ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {
  502. # Horizontal rule
  503. $wrapped_mode = 1;
  504. $self->pushline($line."\n");
  505. } elsif ($line =~ /^-- $/) {
  506. # Break paragraphs on email signature hint
  507. do_paragraph($self,$paragraph,$wrapped_mode);
  508. $paragraph="";
  509. $wrapped_mode = 1;
  510. $self->pushline($line."\n");
  511. } elsif ( $line =~ /^=+$/
  512. or $line =~ /^_+$/
  513. or $line =~ /^-+$/) {
  514. $wrapped_mode = 0;
  515. $paragraph .= $line."\n";
  516. do_paragraph($self,$paragraph,$wrapped_mode);
  517. $paragraph="";
  518. $wrapped_mode = 1;
  519. } elsif ($markdown and
  520. ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin
  521. or $line =~ /^\s*"""\s*\]\]\s*$/)) { # """ textblock inside macro end
  522. # Avoid translating Markdown lines containing only markup
  523. do_paragraph($self,$paragraph,$wrapped_mode);
  524. $paragraph="";
  525. $wrapped_mode = 1;
  526. $self->pushline("$line\n");
  527. } elsif ($markdown and
  528. ( $line =~ /^#/ # headline
  529. or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/)) { # sole macro
  530. # Preserve some Markdown markup as a single line
  531. do_paragraph($self,$paragraph,$wrapped_mode);
  532. $paragraph="$line\n";
  533. $wrapped_mode = 0;
  534. $end_of_paragraph = 1;
  535. } elsif ($markdown and
  536. ( $line =~ /^"""/)) { # """ textblock inside macro end
  537. # Markdown markup needing separation _before_ this line
  538. do_paragraph($self,$paragraph,$wrapped_mode);
  539. $paragraph="$line\n";
  540. $wrapped_mode = 1;
  541. } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {
  542. $wrapped_mode = 0;
  543. do_paragraph($self,$paragraph,$wrapped_mode);
  544. $paragraph = "$line\n";
  545. $wrapped_mode = 0;
  546. } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {
  547. do_paragraph($self,$paragraph,$wrapped_mode);
  548. $paragraph = "$line\n";
  549. $wrapped_mode = 1;
  550. } else {
  551. if ($line =~ /^\s/) {
  552. # A line starting by a space indicates a non-wrap
  553. # paragraph
  554. $wrapped_mode = 0;
  555. }
  556. if ($markdown and
  557. ( $line =~ /\S $/ # explicit newline
  558. or $line =~ /"""$/)) { # """ textblock inside macro begin
  559. # Markdown markup needing separation _after_ this line
  560. $end_of_paragraph = 1;
  561. } else {
  562. undef $self->{bullet};
  563. undef $self->{indent};
  564. }
  565. if ($fortunes) {
  566. $line =~ s/%%(.*)$//;
  567. }
  568. # TODO: comments
  569. $paragraph .= $line."\n";
  570. }
  571. # paragraphs starting by a bullet, or numbered
  572. # or paragraphs with a line containing many consecutive spaces
  573. # (more than 3)
  574. # are considered as verbatim paragraphs
  575. $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
  576. or $paragraph =~ m/[ \t][ \t][ \t]/s);
  577. $wrapped_mode = 0 if ( $tabs eq "verbatim"
  578. and $paragraph =~ m/\t/s);
  579. if ($markdown) {
  580. # Some Markdown markup can (or might) not survive wrapping
  581. $wrapped_mode = 0 if (
  582. $paragraph =~ /^>/ms # blockquote
  583. or $paragraph =~ /^( {8}|\t)/ms # monospaced
  584. or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro
  585. or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)
  586. or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)
  587. or $paragraph =~ /\S $/ms # explicit newline
  588. or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin
  589. );
  590. }
  591. if ($end_of_paragraph) {
  592. do_paragraph($self,$paragraph,$wrapped_mode);
  593. $paragraph="";
  594. $wrapped_mode = 1;
  595. $end_of_paragraph = 0;
  596. }
  597. ($line,$ref)=$self->shiftline();
  598. }
  599. if (length $paragraph) {
  600. do_paragraph($self,$paragraph,$wrapped_mode);
  601. }
  602. }
  603. sub do_paragraph {
  604. my ($self, $paragraph, $wrap) = (shift, shift, shift);
  605. my $type = shift || $self->{type} || "Plain text";
  606. return if ($paragraph eq "");
  607. # DEBUG
  608. # my $b;
  609. # if (defined $self->{bullet}) {
  610. # $b = $self->{bullet};
  611. # } else {
  612. # $b = "UNDEF";
  613. # }
  614. # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";
  615. if ($bullets and not $wrap and not defined $self->{verbatim}) {
  616. # Detect bullets
  617. # | * blah blah
  618. # |<spaces> blah
  619. # | ^-- aligned
  620. # <empty line>
  621. #
  622. # Other bullets supported:
  623. # - blah o blah + blah
  624. # 1. blah 1) blah (1) blah
  625. TEST_BULLET:
  626. if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {
  627. my $para = $5;
  628. my $bullet = $2;
  629. my $indent1 = $1;
  630. my $indent2 = "$1".(' ' x length $bullet);
  631. my $text = $4;
  632. while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
  633. and $para =~ s/^$indent2(\S[^\n]*\n)//s) {
  634. $text .= $1;
  635. }
  636. # TODO: detect if a line starts with the same bullet
  637. if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {
  638. my $bullet_regex = quotemeta($indent1.$bullet);
  639. $bullet_regex =~ s/[0-9]+/\\d\+/;
  640. if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {
  641. my $trans = $self->translate($text,
  642. $self->{ref},
  643. "Bullet: '$indent1$bullet'",
  644. "wrap" => 1,
  645. "wrapcol" => - (length $indent2));
  646. $trans =~ s/^/$indent1$bullet/s;
  647. $trans =~ s/\n(.)/\n$indent2$1/sg;
  648. $self->pushline( $trans."\n" );
  649. if ($para eq '') {
  650. return;
  651. } else {
  652. # Another bullet
  653. $paragraph = $para;
  654. goto TEST_BULLET;
  655. }
  656. }
  657. }
  658. }
  659. }
  660. my $end = "";
  661. if ($wrap) {
  662. $paragraph =~ s/^(.*?)(\n*)$/$1/s;
  663. $end = $2 || "";
  664. }
  665. my $t = $self->translate($paragraph,
  666. $self->{ref},
  667. $type,
  668. "wrap" => $wrap);
  669. if (defined $self->{bullet}) {
  670. my $bullet = $self->{bullet};
  671. my $indent1 = $self->{indent};
  672. my $indent2 = $indent1.(' ' x length($bullet));
  673. $t =~ s/^/$indent1$bullet/s;
  674. $t =~ s/\n(.)/\n$indent2$1/sg;
  675. }
  676. $self->pushline( $t.$end );
  677. }
  678. 1;
  679. =head1 STATUS OF THIS MODULE
  680. Tested successfully on simple text files and NEWS.Debian files.
  681. =head1 AUTHORS
  682. Nicolas François <nicolas.francois@centraliens.net>
  683. =head1 COPYRIGHT AND LICENSE
  684. Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
  685. This program is free software; you may redistribute it and/or modify it
  686. under the terms of GPL (see the COPYING file).