summaryrefslogtreecommitdiff
path: root/perl/Locale/Po4a/Text.pm
blob: 676363dd51fe21e60b023f1be78b06d33b039f01 (plain)
  1. #!/usr/bin/perl -w
  2. # Po4a::Text.pm
  3. #
  4. # extract and translate translatable strings from a text documents
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc.,
  19. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. #
  21. ########################################################################
  22. =encoding UTF-8
  23. =head1 NAME
  24. Locale::Po4a::Text - Convert text documents from/to PO files
  25. =head1 DESCRIPTION
  26. The po4a (po for anything) project goal is to ease translations (and more
  27. interestingly, the maintenance of translations) using gettext tools on
  28. areas where they were not expected like documentation.
  29. Locale::Po4a::Text is a module to help the translation of text documents into
  30. other [human] languages.
  31. Paragraphs are split on empty lines (or lines containing only spaces or
  32. tabulations).
  33. If a paragraph contains a line starting by a space (or tabulation), this
  34. paragraph won't be rewrapped.
  35. =cut
  36. package Locale::Po4a::Text;
  37. use 5.006;
  38. use strict;
  39. use warnings;
  40. require Exporter;
  41. use vars qw(@ISA @EXPORT);
  42. @ISA = qw(Locale::Po4a::TransTractor);
  43. @EXPORT = qw();
  44. use Locale::Po4a::TransTractor;
  45. use Locale::Po4a::Common;
  46. =head1 OPTIONS ACCEPTED BY THIS MODULE
  47. These are this module's particular options:
  48. =over
  49. =item B<nobullets>
  50. Deactivate detection of bullets.
  51. By default, when a bullet is detected, the bullet paragraph is not considered
  52. as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
  53. rewrap this paragraph in the generated PO file and in the translation.
  54. =cut
  55. my $bullets = 1;
  56. =item B<tabs>=I<mode>
  57. Specify how tabulations shall be handled. The I<mode> can be any of:
  58. =over
  59. =item B<split>
  60. Lines with tabulations introduce breaks in the current paragraph.
  61. =item B<verbatim>
  62. Paragraph containing tabulations will not be re-wrapped.
  63. =back
  64. By default, tabulations are considered as spaces.
  65. =cut
  66. my $tabs = "";
  67. =item B<breaks>=I<regex>
  68. A regular expression matching lines which introduce breaks.
  69. The regular expression will be anchored so that the whole line must match.
  70. =cut
  71. my $breaks;
  72. =item B<debianchangelog>
  73. Handle the header and footer of
  74. released versions, which only contain non translatable informations.
  75. =cut
  76. my $debianchangelog = 0;
  77. =item B<fortunes>
  78. Handle the fortunes format, which separate fortunes with a line which
  79. consists in '%' or '%%', and use '%%' as the beginning of a comment.
  80. =cut
  81. my $fortunes = 0;
  82. =item B<markdown>
  83. Handle some special markup in Markdown-formatted texts.
  84. =cut
  85. my $markdown = 0;
  86. =item B<asciidoc>
  87. Handle documents in the asciidoc format.
  88. =cut
  89. my $asciidoc = 0;
  90. =back
  91. =cut
  92. sub initialize {
  93. my $self = shift;
  94. my %options = @_;
  95. $self->{options}{'asciidoc'} = 1;
  96. $self->{options}{'breaks'} = 1;
  97. $self->{options}{'debianchangelog'} = 1;
  98. $self->{options}{'debug'} = 1;
  99. $self->{options}{'fortunes'} = 1;
  100. $self->{options}{'markdown'} = 1;
  101. $self->{options}{'nobullets'} = 1;
  102. $self->{options}{'tabs'} = 1;
  103. $self->{options}{'verbose'} = 1;
  104. foreach my $opt (keys %options) {
  105. die wrap_mod("po4a::text",
  106. dgettext("po4a", "Unknown option: %s"), $opt)
  107. unless exists $self->{options}{$opt};
  108. $self->{options}{$opt} = $options{$opt};
  109. }
  110. if (defined $options{'nobullets'}) {
  111. $bullets = 0;
  112. }
  113. if (defined $options{'tabs'}) {
  114. $tabs = $options{'tabs'};
  115. }
  116. if (defined $options{'breaks'}) {
  117. $breaks = $options{'breaks'};
  118. }
  119. if (defined $options{'debianchangelog'}) {
  120. $debianchangelog=1;
  121. }
  122. if (defined $options{'fortunes'}) {
  123. $fortunes=1;
  124. }
  125. if (defined $options{'markdown'}) {
  126. $markdown=1;
  127. }
  128. $asciidoc=1 if (defined $options{'asciidoc'});
  129. }
  130. sub parse {
  131. my $self = shift;
  132. my ($line,$ref);
  133. my $paragraph="";
  134. my $wrapped_mode = 1;
  135. my $expect_header = 1;
  136. my $end_of_paragraph = 0;
  137. ($line,$ref)=$self->shiftline();
  138. my $file = $ref;
  139. $file =~ s/:[0-9]+$//;
  140. while (defined($line)) {
  141. $ref =~ m/^(.*):[0-9]+$/;
  142. if ($1 ne $file) {
  143. $file = $1;
  144. do_paragraph($self,$paragraph,$wrapped_mode);
  145. $paragraph="";
  146. $wrapped_mode = 1;
  147. $expect_header = 1;
  148. }
  149. chomp($line);
  150. $self->{ref}="$ref";
  151. if ($debianchangelog and
  152. $expect_header and
  153. $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
  154. \s+([-+0-9a-z.]+); # distribution
  155. \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
  156. do_paragraph($self,$paragraph,$wrapped_mode);
  157. $paragraph="";
  158. $self->pushline("$line\n");
  159. $expect_header=0;
  160. } elsif ($debianchangelog and
  161. $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
  162. # Found trailer
  163. do_paragraph($self,$paragraph,$wrapped_mode);
  164. $paragraph="";
  165. $self->pushline("$line\n");
  166. $expect_header=1;
  167. } elsif ($fortunes and
  168. $line =~ m/^%%?\s*$/) {
  169. # Found end of fortune
  170. do_paragraph($self,$paragraph,$wrapped_mode);
  171. $self->pushline("\n") unless ( $wrapped_mode == 0
  172. or $paragraph eq "");
  173. $paragraph="";
  174. $wrapped_mode = 1;
  175. $self->pushline("$line\n");
  176. } elsif ( (defined $self->{verbatim})
  177. and ($self->{verbatim} == 2)) {
  178. # Untranslated blocks
  179. $self->pushline($line."\n");
  180. if ($asciidoc and
  181. ($line =~ m/^(\/{4,}|~{4,})$/)) {
  182. undef $self->{verbatim};
  183. undef $self->{type};
  184. $wrapped_mode = 1;
  185. }
  186. } elsif ( ($line =~ /^\s*$/)
  187. or ( defined $breaks
  188. and $line =~ m/^$breaks$/)) {
  189. # Break paragraphs on lines containing only spaces
  190. do_paragraph($self,$paragraph,$wrapped_mode);
  191. $paragraph="";
  192. $wrapped_mode = 1 unless defined($self->{verbatim});
  193. $self->pushline($line."\n");
  194. } elsif ($asciidoc and (not defined($self->{verbatim})) and
  195. ($line =~ m/^(\+|--)$/)) {
  196. # List Item Continuation or List Block
  197. do_paragraph($self,$paragraph,$wrapped_mode);
  198. $paragraph="";
  199. $self->pushline($line."\n");
  200. } elsif ($asciidoc and (not defined($self->{verbatim})) and
  201. ($line =~ m/^(={4,}|-{4,}|~{4,}|\^{4,}|\+{4,})$/) and
  202. (defined($paragraph) )and
  203. ($paragraph =~ m/^[^\n]*\n$/s) and
  204. (length($paragraph) == (length($line)+1))) {
  205. # Found title
  206. $wrapped_mode = 0;
  207. my $level = $line;
  208. $level =~ s/^(.).*$/$1/;
  209. my $t = $self->translate($paragraph,
  210. $self->{ref},
  211. "Title $level",
  212. "wrap" => 0);
  213. $self->pushline($t);
  214. $paragraph="";
  215. $wrapped_mode = 1;
  216. $self->pushline(($level x (length($t)-1))."\n");
  217. } elsif ($asciidoc and
  218. ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/)) {
  219. my $titlelevel1 = $1;
  220. my $titlespaces = $2;
  221. my $title = $3;
  222. my $titlelevel2 = $4||"";
  223. # Found one line title
  224. do_paragraph($self,$paragraph,$wrapped_mode);
  225. $wrapped_mode = 0;
  226. $paragraph="";
  227. my $t = $self->translate($title,
  228. $self->{ref},
  229. "Title $titlelevel1",
  230. "wrap" => 0);
  231. $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
  232. $wrapped_mode = 1;
  233. } elsif ($asciidoc and
  234. ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/)) {
  235. # Found one delimited block
  236. my $t = $line;
  237. $t =~ s/^(.).*$/$1/;
  238. my $type = "delimited block $t";
  239. if (defined $self->{verbatim} and ($self->{type} ne $type)) {
  240. $paragraph .= "$line\n";
  241. } else {
  242. do_paragraph($self,$paragraph,$wrapped_mode);
  243. if ( (defined $self->{type})
  244. and ($self->{type} eq $type)) {
  245. undef $self->{type};
  246. undef $self->{verbatim};
  247. $wrapped_mode = 1;
  248. } else {
  249. if ($t eq "\/") {
  250. # CommentBlock, should not be treated
  251. $self->{verbatim} = 2;
  252. } elsif ($t eq "+") {
  253. # PassthroughBlock
  254. $wrapped_mode = 0;
  255. $self->{verbatim} = 1;
  256. } elsif ($t eq "-") {
  257. # ListingBlock
  258. $wrapped_mode = 0;
  259. $self->{verbatim} = 1;
  260. } elsif ($t eq ".") {
  261. # LiteralBlock
  262. $wrapped_mode = 0;
  263. $self->{verbatim} = 1;
  264. } elsif ($t eq "*") {
  265. # SidebarBlock
  266. $wrapped_mode = 1;
  267. } elsif ($t eq "_") {
  268. # QuoteBlock
  269. if ( (defined $self->{type})
  270. and ($self->{type} eq "verse")) {
  271. $wrapped_mode = 0;
  272. $self->{verbatim} = 1;
  273. } else {
  274. $wrapped_mode = 1;
  275. }
  276. } elsif ($t eq "=") {
  277. # ExampleBlock
  278. $wrapped_mode = 1;
  279. } elsif ($t eq "~") {
  280. # Filter blocks, TBC: not translated
  281. $wrapped_mode = 0;
  282. $self->{verbatim} = 2;
  283. }
  284. $self->{type} = $type;
  285. }
  286. $paragraph="";
  287. $self->pushline($line."\n");
  288. }
  289. } elsif ($asciidoc and not defined $self->{verbatim} and
  290. ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
  291. # Found BlockId
  292. do_paragraph($self,$paragraph,$wrapped_mode);
  293. $paragraph="";
  294. $wrapped_mode = 1;
  295. $self->pushline($line."\n");
  296. undef $self->{bullet};
  297. undef $self->{indent};
  298. } elsif ($asciidoc and not defined $self->{verbatim} and
  299. ($paragraph eq "") and
  300. ($line =~ m/^((?:NOTE|TIP|IMPORTANT|WARNING|CAUTION):\s+)(.*)$/)) {
  301. my $type = $1;
  302. my $text = $2;
  303. do_paragraph($self,$paragraph,$wrapped_mode);
  304. $paragraph=$text."\n";
  305. $wrapped_mode = 1;
  306. $self->pushline($type);
  307. undef $self->{bullet};
  308. undef $self->{indent};
  309. } elsif ($asciidoc and not defined $self->{verbatim} and
  310. ($line =~ m/^\[(NOTE|TIP|IMPORTANT|WARNING|CAUTION|verse|quote)\]$/)) {
  311. my $type = $1;
  312. do_paragraph($self,$paragraph,$wrapped_mode);
  313. $paragraph="";
  314. $wrapped_mode = 1;
  315. $self->pushline($line."\n");
  316. if ($type eq "verse") {
  317. $wrapped_mode = 0;
  318. }
  319. undef $self->{bullet};
  320. undef $self->{indent};
  321. } elsif ($asciidoc and not defined $self->{verbatim} and
  322. ($line =~ m/^\[(verse|quote), +(.*)\]$/)) {
  323. my $type = $1;
  324. my $arg = $2;
  325. do_paragraph($self,$paragraph,$wrapped_mode);
  326. $paragraph="";
  327. my $t = $self->translate($arg,
  328. $self->{ref},
  329. "$type",
  330. "wrap" => 0);
  331. $self->pushline("[$type, $t]\n");
  332. $wrapped_mode = 1;
  333. if ($type eq "verse") {
  334. $wrapped_mode = 0;
  335. }
  336. $self->{type} = $type;
  337. undef $self->{bullet};
  338. undef $self->{indent};
  339. } elsif ($asciidoc and not defined $self->{verbatim} and
  340. ($line =~ m/^\[icon="(.*)"\]$/)) {
  341. my $arg = $1;
  342. do_paragraph($self,$paragraph,$wrapped_mode);
  343. $paragraph="";
  344. my $t = $self->translate($arg,
  345. $self->{ref},
  346. "icon",
  347. "wrap" => 0);
  348. $self->pushline("[icon=\"$t\"]\n");
  349. $wrapped_mode = 1;
  350. undef $self->{bullet};
  351. undef $self->{indent};
  352. } elsif ($asciidoc and not defined $self->{verbatim} and
  353. ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
  354. my $arg = $1;
  355. do_paragraph($self,$paragraph,$wrapped_mode);
  356. $paragraph="";
  357. my $t = $self->translate($arg,
  358. $self->{ref},
  359. "caption",
  360. "wrap" => 0);
  361. $self->pushline("[icons=None, caption=\"$t\"]\n");
  362. $wrapped_mode = 1;
  363. undef $self->{bullet};
  364. undef $self->{indent};
  365. } elsif ($asciidoc and not defined $self->{verbatim} and
  366. ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
  367. my $indent = $1;
  368. my $label = $2;
  369. my $labelend = $3;
  370. # Found labeled list
  371. do_paragraph($self,$paragraph,$wrapped_mode);
  372. $paragraph="";
  373. $wrapped_mode = 1;
  374. $self->{bullet} = "";
  375. $self->{indent} = $indent;
  376. my $t = $self->translate($label,
  377. $self->{ref},
  378. "Labeled list",
  379. "wrap" => 0);
  380. $self->pushline("$indent$t$labelend\n");
  381. } elsif ($asciidoc and not defined $self->{verbatim} and
  382. ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
  383. my $indent = $1;
  384. my $label = $2;
  385. my $labelend = $3;
  386. my $labeltext = $4;
  387. # Found Horizontal Labeled Lists
  388. do_paragraph($self,$paragraph,$wrapped_mode);
  389. $paragraph=$labeltext."\n";
  390. $wrapped_mode = 1;
  391. $self->{bullet} = "";
  392. $self->{indent} = $indent;
  393. my $t = $self->translate($label,
  394. $self->{ref},
  395. "Labeled list",
  396. "wrap" => 0);
  397. $self->pushline("$indent$t$labelend");
  398. } elsif ($asciidoc and not defined $self->{verbatim} and
  399. ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
  400. my $attrname = $1;
  401. my $attrsep = $2;
  402. my $attrvalue = $3;
  403. # Found a Attribute entry
  404. do_paragraph($self,$paragraph,$wrapped_mode);
  405. $paragraph="";
  406. $wrapped_mode = 1;
  407. undef $self->{bullet};
  408. undef $self->{indent};
  409. my $t = $self->translate($attrvalue,
  410. $self->{ref},
  411. "Attribute :$attrname:",
  412. "wrap" => 0);
  413. $self->pushline(":$attrname$attrsep$t\n");
  414. } elsif ($asciidoc and not defined $self->{verbatim} and
  415. ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
  416. my $title = $1;
  417. # Found block title
  418. do_paragraph($self,$paragraph,$wrapped_mode);
  419. $paragraph="";
  420. $wrapped_mode = 1;
  421. undef $self->{bullet};
  422. undef $self->{indent};
  423. my $t = $self->translate($title,
  424. $self->{ref},
  425. "Block title",
  426. "wrap" => 0);
  427. $self->pushline(".$t\n");
  428. } elsif ($asciidoc and not defined $self->{verbatim} and
  429. ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
  430. my $indent = $1||"";
  431. my $bullet = $2;
  432. my $text = $3;
  433. do_paragraph($self,$paragraph,$wrapped_mode);
  434. $paragraph = $text."\n";
  435. $self->{indent} = $indent;
  436. $self->{bullet} = $bullet;
  437. } elsif ($asciidoc and not defined $self->{verbatim} and
  438. ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
  439. my $bullet = $1;
  440. my $text = $2;
  441. do_paragraph($self,$paragraph,$wrapped_mode);
  442. $paragraph = $text."\n";
  443. $self->{indent} = "";
  444. $self->{bullet} = $bullet;
  445. } elsif ($asciidoc and not defined $self->{verbatim} and
  446. (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
  447. my $indent = $1;
  448. my $text = $2;
  449. if (not defined $self->{indent}) {
  450. $paragraph .= $text."\n";
  451. $self->{indent} = $indent;
  452. } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {
  453. $paragraph .= $text."\n";
  454. } else {
  455. do_paragraph($self,$paragraph,$wrapped_mode);
  456. $paragraph = $text."\n";
  457. $self->{indent} = $indent;
  458. $self->{bullet} = "";
  459. }
  460. } elsif ($markdown and
  461. (not defined($self->{verbatim})) and
  462. ($line =~ m/^(={4,}|-{4,})$/) and
  463. (defined($paragraph) )and
  464. ($paragraph =~ m/^[^\n]*\n$/s) and
  465. (length($paragraph) == (length($line)+1))) {
  466. # XXX: There can be any number of underlining according
  467. # to the documentation. This detection, which avoid
  468. # translating the formatting, is only supported if
  469. # the underlining has the same size as the herder text.
  470. # Found title
  471. $wrapped_mode = 0;
  472. my $level = $line;
  473. $level =~ s/^(.).*$/$1/;
  474. my $t = $self->translate($paragraph,
  475. $self->{ref},
  476. "Title $level",
  477. "wrap" => 0);
  478. $self->pushline($t);
  479. $paragraph="";
  480. $wrapped_mode = 1;
  481. $self->pushline(($level x (length($t)-1))."\n");
  482. } elsif ($markdown and
  483. ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/)) {
  484. my $titlelevel1 = $1;
  485. my $titlespaces = $2;
  486. my $title = $3;
  487. my $titlelevel2 = $4||"";
  488. # Found one line title
  489. do_paragraph($self,$paragraph,$wrapped_mode);
  490. $wrapped_mode = 0;
  491. $paragraph="";
  492. my $t = $self->translate($title,
  493. $self->{ref},
  494. "Title $titlelevel1",
  495. "wrap" => 0);
  496. $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
  497. $wrapped_mode = 1;
  498. } elsif ($markdown and
  499. ($paragraph eq "") and
  500. ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {
  501. # Horizontal rule
  502. $wrapped_mode = 1;
  503. $self->pushline($line."\n");
  504. } elsif ($line =~ /^-- $/) {
  505. # Break paragraphs on email signature hint
  506. do_paragraph($self,$paragraph,$wrapped_mode);
  507. $paragraph="";
  508. $wrapped_mode = 1;
  509. $self->pushline($line."\n");
  510. } elsif ( $line =~ /^=+$/
  511. or $line =~ /^_+$/
  512. or $line =~ /^-+$/) {
  513. $wrapped_mode = 0;
  514. $paragraph .= $line."\n";
  515. do_paragraph($self,$paragraph,$wrapped_mode);
  516. $paragraph="";
  517. $wrapped_mode = 1;
  518. } elsif ($markdown and
  519. ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin
  520. or $line =~ /^\s*"""\s*\]\]\s*$/)) { # """ textblock inside macro end
  521. # Avoid translating Markdown lines containing only markup
  522. do_paragraph($self,$paragraph,$wrapped_mode);
  523. $paragraph="";
  524. $wrapped_mode = 1;
  525. $self->pushline("$line\n");
  526. } elsif ($markdown and
  527. ( $line =~ /^#/ # headline
  528. or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/)) { # sole macro
  529. # Preserve some Markdown markup as a single line
  530. do_paragraph($self,$paragraph,$wrapped_mode);
  531. $paragraph="$line\n";
  532. $wrapped_mode = 0;
  533. $end_of_paragraph = 1;
  534. } elsif ($markdown and
  535. ( $line =~ /^"""/)) { # """ textblock inside macro end
  536. # Markdown markup needing separation _before_ this line
  537. do_paragraph($self,$paragraph,$wrapped_mode);
  538. $paragraph="$line\n";
  539. $wrapped_mode = 1;
  540. } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {
  541. $wrapped_mode = 0;
  542. do_paragraph($self,$paragraph,$wrapped_mode);
  543. $paragraph = "$line\n";
  544. $wrapped_mode = 0;
  545. } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {
  546. do_paragraph($self,$paragraph,$wrapped_mode);
  547. $paragraph = "$line\n";
  548. $wrapped_mode = 1;
  549. } else {
  550. if ($line =~ /^\s/) {
  551. # A line starting by a space indicates a non-wrap
  552. # paragraph
  553. $wrapped_mode = 0;
  554. }
  555. if ($markdown and
  556. ( $line =~ /\S $/ # explicit newline
  557. or $line =~ /"""$/)) { # """ textblock inside macro begin
  558. # Markdown markup needing separation _after_ this line
  559. $end_of_paragraph = 1;
  560. } else {
  561. undef $self->{bullet};
  562. undef $self->{indent};
  563. }
  564. if ($fortunes) {
  565. $line =~ s/%%(.*)$//;
  566. }
  567. # TODO: comments
  568. $paragraph .= $line."\n";
  569. }
  570. # paragraphs starting by a bullet, or numbered
  571. # or paragraphs with a line containing many consecutive spaces
  572. # (more than 3)
  573. # are considered as verbatim paragraphs
  574. $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
  575. or $paragraph =~ m/[ \t][ \t][ \t]/s);
  576. $wrapped_mode = 0 if ( $tabs eq "verbatim"
  577. and $paragraph =~ m/\t/s);
  578. if ($markdown) {
  579. # Some Markdown markup can (or might) not survive wrapping
  580. $wrapped_mode = 0 if (
  581. $paragraph =~ /^>/ms # blockquote
  582. or $paragraph =~ /^( {8}|\t)/ms # monospaced
  583. or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro
  584. or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)
  585. or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)
  586. or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin
  587. );
  588. }
  589. if ($end_of_paragraph) {
  590. do_paragraph($self,$paragraph,$wrapped_mode);
  591. $paragraph="";
  592. $wrapped_mode = 1;
  593. $end_of_paragraph = 0;
  594. }
  595. ($line,$ref)=$self->shiftline();
  596. }
  597. if (length $paragraph) {
  598. do_paragraph($self,$paragraph,$wrapped_mode);
  599. }
  600. }
  601. sub do_paragraph {
  602. my ($self, $paragraph, $wrap) = (shift, shift, shift);
  603. my $type = shift || $self->{type} || "Plain text";
  604. return if ($paragraph eq "");
  605. # DEBUG
  606. # my $b;
  607. # if (defined $self->{bullet}) {
  608. # $b = $self->{bullet};
  609. # } else {
  610. # $b = "UNDEF";
  611. # }
  612. # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";
  613. if ($bullets and not $wrap and not defined $self->{verbatim}) {
  614. # Detect bullets
  615. # | * blah blah
  616. # |<spaces> blah
  617. # | ^-- aligned
  618. # <empty line>
  619. #
  620. # Other bullets supported:
  621. # - blah o blah + blah
  622. # 1. blah 1) blah (1) blah
  623. TEST_BULLET:
  624. if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {
  625. my $para = $5;
  626. my $bullet = $2;
  627. my $indent1 = $1;
  628. my $indent2 = "$1".(' ' x length $bullet);
  629. my $text = $4;
  630. while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
  631. and $para =~ s/^$indent2(\S[^\n]*\n)//s) {
  632. $text .= $1;
  633. }
  634. # TODO: detect if a line starts with the same bullet
  635. if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {
  636. my $bullet_regex = quotemeta($indent1.$bullet);
  637. $bullet_regex =~ s/[0-9]+/\\d\+/;
  638. if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {
  639. my $trans = $self->translate($text,
  640. $self->{ref},
  641. "Bullet: '$indent1$bullet'",
  642. "wrap" => 1,
  643. "wrapcol" => - (length $indent2));
  644. $trans =~ s/^/$indent1$bullet/s;
  645. $trans =~ s/\n(.)/\n$indent2$1/sg;
  646. $self->pushline( $trans."\n" );
  647. if ($para eq '') {
  648. return;
  649. } else {
  650. # Another bullet
  651. $paragraph = $para;
  652. goto TEST_BULLET;
  653. }
  654. }
  655. }
  656. }
  657. }
  658. my $end = "";
  659. if ($wrap) {
  660. $paragraph =~ s/^(.*?)(\n*)$/$1/s;
  661. $end = $2 || "";
  662. }
  663. my $t = $self->translate($paragraph,
  664. $self->{ref},
  665. $type,
  666. "wrap" => $wrap);
  667. if (defined $self->{bullet}) {
  668. my $bullet = $self->{bullet};
  669. my $indent1 = $self->{indent};
  670. my $indent2 = $indent1.(' ' x length($bullet));
  671. $t =~ s/^/$indent1$bullet/s;
  672. $t =~ s/\n(.)/\n$indent2$1/sg;
  673. }
  674. $self->pushline( $t.$end );
  675. }
  676. 1;
  677. =head1 STATUS OF THIS MODULE
  678. Tested successfully on simple text files and NEWS.Debian files.
  679. =head1 AUTHORS
  680. Nicolas François <nicolas.francois@centraliens.net>
  681. =head1 COPYRIGHT AND LICENSE
  682. Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
  683. This program is free software; you may redistribute it and/or modify it
  684. under the terms of GPL (see the COPYING file).