summaryrefslogtreecommitdiff
path: root/IkiWiki/Plugin/aggregate.pm
blob: 7d456534202bafa8e151db14e0a7375a66b24930 (plain)
  1. #!/usr/bin/perl
  2. # Feed aggregation plugin.
  3. package IkiWiki::Plugin::aggregate;
  4. use warnings;
  5. use strict;
  6. use IkiWiki 3.00;
  7. use HTML::Parser;
  8. use HTML::Tagset;
  9. use HTML::Entities;
  10. use open qw{:utf8 :std};
  11. my %feeds;
  12. my %guids;
  13. sub import {
  14. hook(type => "getopt", id => "aggregate", call => \&getopt);
  15. hook(type => "getsetup", id => "aggregate", call => \&getsetup);
  16. hook(type => "checkconfig", id => "aggregate", call => \&checkconfig);
  17. hook(type => "needsbuild", id => "aggregate", call => \&needsbuild);
  18. hook(type => "preprocess", id => "aggregate", call => \&preprocess);
  19. hook(type => "delete", id => "aggregate", call => \&delete);
  20. hook(type => "savestate", id => "aggregate", call => \&savestate);
  21. hook(type => "htmlize", id => "_aggregated", call => \&htmlize);
  22. if (exists $config{aggregate_webtrigger} && $config{aggregate_webtrigger}) {
  23. hook(type => "cgi", id => "aggregate", call => \&cgi);
  24. }
  25. }
  26. sub getopt () {
  27. eval q{use Getopt::Long};
  28. error($@) if $@;
  29. Getopt::Long::Configure('pass_through');
  30. GetOptions(
  31. "aggregate" => \$config{aggregate},
  32. "aggregateinternal!" => \$config{aggregateinternal},
  33. );
  34. }
  35. sub getsetup () {
  36. return
  37. plugin => {
  38. safe => 1,
  39. rebuild => undef,
  40. },
  41. aggregateinternal => {
  42. type => "boolean",
  43. example => 1,
  44. description => "enable aggregation to internal pages?",
  45. safe => 0, # enabling needs manual transition
  46. rebuild => 0,
  47. },
  48. aggregate_webtrigger => {
  49. type => "boolean",
  50. example => 0,
  51. description => "allow aggregation to be triggered via the web?",
  52. safe => 1,
  53. rebuild => 0,
  54. },
  55. }
  56. sub checkconfig () {
  57. if (! defined $config{aggregateinternal}) {
  58. $config{aggregateinternal}=1;
  59. }
  60. if ($config{aggregate} && ! ($config{post_commit} &&
  61. IkiWiki::commit_hook_enabled())) {
  62. launchaggregation();
  63. }
  64. }
  65. sub cgi ($) {
  66. my $cgi=shift;
  67. if (defined $cgi->param('do') &&
  68. $cgi->param("do") eq "aggregate_webtrigger") {
  69. $|=1;
  70. print "Content-Type: text/plain\n\n";
  71. $config{cgi}=0;
  72. $config{verbose}=1;
  73. $config{syslog}=0;
  74. print gettext("Aggregation triggered via web.")."\n\n";
  75. if (launchaggregation()) {
  76. IkiWiki::lockwiki();
  77. IkiWiki::loadindex();
  78. require IkiWiki::Render;
  79. IkiWiki::refresh();
  80. IkiWiki::saveindex();
  81. }
  82. else {
  83. print gettext("Nothing to do right now, all feeds are up-to-date!")."\n";
  84. }
  85. exit 0;
  86. }
  87. }
  88. sub launchaggregation () {
  89. # See if any feeds need aggregation.
  90. loadstate();
  91. my @feeds=needsaggregate();
  92. return unless @feeds;
  93. if (! lockaggregate()) {
  94. debug("an aggregation process is already running");
  95. return;
  96. }
  97. # force a later rebuild of source pages
  98. $IkiWiki::forcerebuild{$_->{sourcepage}}=1
  99. foreach @feeds;
  100. # Fork a child process to handle the aggregation.
  101. # The parent process will then handle building the
  102. # result. This avoids messy code to clear state
  103. # accumulated while aggregating.
  104. defined(my $pid = fork) or error("Can't fork: $!");
  105. if (! $pid) {
  106. IkiWiki::loadindex();
  107. # Aggregation happens without the main wiki lock
  108. # being held. This allows editing pages etc while
  109. # aggregation is running.
  110. aggregate(@feeds);
  111. IkiWiki::lockwiki;
  112. # Merge changes, since aggregation state may have
  113. # changed on disk while the aggregation was happening.
  114. mergestate();
  115. expire();
  116. savestate();
  117. IkiWiki::unlockwiki;
  118. exit 0;
  119. }
  120. waitpid($pid,0);
  121. if ($?) {
  122. error "aggregation failed with code $?";
  123. }
  124. clearstate();
  125. unlockaggregate();
  126. return 1;
  127. }
  128. # Pages with extension _aggregated have plain html markup, pass through.
  129. sub htmlize (@) {
  130. my %params=@_;
  131. return $params{content};
  132. }
  133. # Used by ikiwiki-transition aggregateinternal.
  134. sub migrate_to_internal {
  135. if (! lockaggregate()) {
  136. error("an aggregation process is currently running");
  137. }
  138. IkiWiki::lockwiki();
  139. loadstate();
  140. $config{verbose}=1;
  141. foreach my $data (values %guids) {
  142. next unless $data->{page};
  143. next if $data->{expired};
  144. $config{aggregateinternal} = 0;
  145. my $oldname = "$config{srcdir}/".htmlfn($data->{page});
  146. if (! -e $oldname) {
  147. $oldname = $IkiWiki::Plugin::transient::transientdir."/".htmlfn($data->{page});
  148. }
  149. my $oldoutput = $config{destdir}."/".IkiWiki::htmlpage($data->{page});
  150. $config{aggregateinternal} = 1;
  151. my $newname = $IkiWiki::Plugin::transient::transientdir."/".htmlfn($data->{page});
  152. debug "moving $oldname -> $newname";
  153. if (-e $newname) {
  154. if (-e $oldname) {
  155. error("$newname already exists");
  156. }
  157. else {
  158. debug("already renamed to $newname?");
  159. }
  160. }
  161. elsif (-e $oldname) {
  162. rename($oldname, $newname) || error("$!");
  163. }
  164. else {
  165. debug("$oldname not found");
  166. }
  167. if (-e $oldoutput) {
  168. require IkiWiki::Render;
  169. debug("removing output file $oldoutput");
  170. IkiWiki::prune($oldoutput);
  171. }
  172. }
  173. savestate();
  174. IkiWiki::unlockwiki;
  175. unlockaggregate();
  176. }
  177. sub needsbuild (@) {
  178. my $needsbuild=shift;
  179. loadstate();
  180. foreach my $feed (values %feeds) {
  181. if (exists $pagesources{$feed->{sourcepage}} &&
  182. grep { $_ eq $pagesources{$feed->{sourcepage}} } @$needsbuild) {
  183. # Mark all feeds originating on this page as
  184. # not yet seen; preprocess will unmark those that
  185. # still exist.
  186. markunseen($feed->{sourcepage});
  187. }
  188. }
  189. return $needsbuild;
  190. }
  191. sub preprocess (@) {
  192. my %params=@_;
  193. foreach my $required (qw{name url}) {
  194. if (! exists $params{$required}) {
  195. error sprintf(gettext("missing %s parameter"), $required)
  196. }
  197. }
  198. my $feed={};
  199. my $name=$params{name};
  200. if (exists $feeds{$name}) {
  201. $feed=$feeds{$name};
  202. }
  203. else {
  204. $feeds{$name}=$feed;
  205. }
  206. $feed->{name}=$name;
  207. $feed->{sourcepage}=$params{page};
  208. $feed->{url}=$params{url};
  209. my $dir=exists $params{dir} ? $params{dir} : $params{page}."/".titlepage($params{name});
  210. $dir=~s/^\/+//;
  211. ($dir)=$dir=~/$config{wiki_file_regexp}/;
  212. $feed->{dir}=$dir;
  213. $feed->{feedurl}=defined $params{feedurl} ? $params{feedurl} : "";
  214. $feed->{updateinterval}=defined $params{updateinterval} ? $params{updateinterval} * 60 : 15 * 60;
  215. $feed->{expireage}=defined $params{expireage} ? $params{expireage} : 0;
  216. $feed->{expirecount}=defined $params{expirecount} ? $params{expirecount} : 0;
  217. if (exists $params{template}) {
  218. $params{template}=~s/[^-_a-zA-Z0-9]+//g;
  219. }
  220. else {
  221. $params{template} = "aggregatepost"
  222. }
  223. $feed->{template}=$params{template} . ".tmpl";
  224. delete $feed->{unseen};
  225. $feed->{lastupdate}=0 unless defined $feed->{lastupdate};
  226. $feed->{lasttry}=$feed->{lastupdate} unless defined $feed->{lasttry};
  227. $feed->{numposts}=0 unless defined $feed->{numposts};
  228. $feed->{newposts}=0 unless defined $feed->{newposts};
  229. $feed->{message}=gettext("new feed") unless defined $feed->{message};
  230. $feed->{error}=0 unless defined $feed->{error};
  231. $feed->{tags}=[];
  232. while (@_) {
  233. my $key=shift;
  234. my $value=shift;
  235. if ($key eq 'tag') {
  236. push @{$feed->{tags}}, $value;
  237. }
  238. }
  239. return "<a href=\"".$feed->{url}."\">".$feed->{name}."</a>: ".
  240. ($feed->{error} ? "<em>" : "").$feed->{message}.
  241. ($feed->{error} ? "</em>" : "").
  242. " (".$feed->{numposts}." ".gettext("posts").
  243. ($feed->{newposts} ? "; ".$feed->{newposts}.
  244. " ".gettext("new") : "").
  245. ")";
  246. }
  247. sub delete (@) {
  248. my @files=@_;
  249. # Remove feed data for removed pages.
  250. foreach my $file (@files) {
  251. my $page=pagename($file);
  252. markunseen($page);
  253. }
  254. }
  255. sub markunseen ($) {
  256. my $page=shift;
  257. foreach my $id (keys %feeds) {
  258. if ($feeds{$id}->{sourcepage} eq $page) {
  259. $feeds{$id}->{unseen}=1;
  260. }
  261. }
  262. }
  263. my $state_loaded=0;
  264. sub loadstate () {
  265. return if $state_loaded;
  266. $state_loaded=1;
  267. if (-e "$config{wikistatedir}/aggregate") {
  268. open(IN, "<", "$config{wikistatedir}/aggregate") ||
  269. die "$config{wikistatedir}/aggregate: $!";
  270. while (<IN>) {
  271. $_=IkiWiki::possibly_foolish_untaint($_);
  272. chomp;
  273. my $data={};
  274. foreach my $i (split(/ /, $_)) {
  275. my ($field, $val)=split(/=/, $i, 2);
  276. if ($field eq "name" || $field eq "feed" ||
  277. $field eq "guid" || $field eq "message") {
  278. $data->{$field}=decode_entities($val, " \t\n");
  279. }
  280. elsif ($field eq "tag") {
  281. push @{$data->{tags}}, $val;
  282. }
  283. else {
  284. $data->{$field}=$val;
  285. }
  286. }
  287. if (exists $data->{name}) {
  288. $feeds{$data->{name}}=$data;
  289. }
  290. elsif (exists $data->{guid}) {
  291. $guids{$data->{guid}}=$data;
  292. }
  293. }
  294. close IN;
  295. }
  296. }
  297. sub savestate () {
  298. return unless $state_loaded;
  299. garbage_collect();
  300. my $newfile="$config{wikistatedir}/aggregate.new";
  301. my $cleanup = sub { unlink($newfile) };
  302. open (OUT, ">", $newfile) || error("open $newfile: $!", $cleanup);
  303. foreach my $data (values %feeds, values %guids) {
  304. my @line;
  305. foreach my $field (keys %$data) {
  306. if ($field eq "name" || $field eq "feed" ||
  307. $field eq "guid" || $field eq "message") {
  308. push @line, "$field=".encode_entities($data->{$field}, " \t\n");
  309. }
  310. elsif ($field eq "tags") {
  311. push @line, "tag=$_" foreach @{$data->{tags}};
  312. }
  313. else {
  314. push @line, "$field=".$data->{$field}
  315. if defined $data->{$field};
  316. }
  317. }
  318. print OUT join(" ", @line)."\n" || error("write $newfile: $!", $cleanup);
  319. }
  320. close OUT || error("save $newfile: $!", $cleanup);
  321. rename($newfile, "$config{wikistatedir}/aggregate") ||
  322. error("rename $newfile: $!", $cleanup);
  323. my $timestamp=undef;
  324. foreach my $feed (keys %feeds) {
  325. my $t=$feeds{$feed}->{lastupdate}+$feeds{$feed}->{updateinterval};
  326. if (! defined $timestamp || $timestamp > $t) {
  327. $timestamp=$t;
  328. }
  329. }
  330. $newfile=~s/\.new$/time/;
  331. open (OUT, ">", $newfile) || error("open $newfile: $!", $cleanup);
  332. if (defined $timestamp) {
  333. print OUT $timestamp."\n";
  334. }
  335. close OUT || error("save $newfile: $!", $cleanup);
  336. }
  337. sub garbage_collect () {
  338. foreach my $name (keys %feeds) {
  339. # remove any feeds that were not seen while building the pages
  340. # that used to contain them
  341. if ($feeds{$name}->{unseen}) {
  342. delete $feeds{$name};
  343. }
  344. }
  345. foreach my $guid (values %guids) {
  346. # any guid whose feed is gone should be removed
  347. if (! exists $feeds{$guid->{feed}}) {
  348. if (exists $guid->{page}) {
  349. unlink "$config{srcdir}/".htmlfn($guid->{page});
  350. unlink $IkiWiki::Plugin::transient::transientdir."/".htmlfn($guid->{page});
  351. }
  352. delete $guids{$guid->{guid}};
  353. }
  354. # handle expired guids
  355. elsif ($guid->{expired} && exists $guid->{page}) {
  356. unlink "$config{srcdir}/".htmlfn($guid->{page});
  357. unlink $IkiWiki::Plugin::transient::transientdir."/".htmlfn($guid->{page});
  358. delete $guid->{page};
  359. delete $guid->{md5};
  360. }
  361. }
  362. }
  363. sub mergestate () {
  364. # Load the current state in from disk, and merge into it
  365. # values from the state in memory that might have changed
  366. # during aggregation.
  367. my %myfeeds=%feeds;
  368. my %myguids=%guids;
  369. clearstate();
  370. loadstate();
  371. # All that can change in feed state during aggregation is a few
  372. # fields.
  373. foreach my $name (keys %myfeeds) {
  374. if (exists $feeds{$name}) {
  375. foreach my $field (qw{message lastupdate lasttry
  376. numposts newposts error}) {
  377. $feeds{$name}->{$field}=$myfeeds{$name}->{$field};
  378. }
  379. }
  380. }
  381. # New guids can be created during aggregation.
  382. # Guids have a few fields that may be updated during aggregation.
  383. # It's also possible that guids were removed from the on-disk state
  384. # while the aggregation was in process. That would only happen if
  385. # their feed was also removed, so any removed guids added back here
  386. # will be garbage collected later.
  387. foreach my $guid (keys %myguids) {
  388. if (! exists $guids{$guid}) {
  389. $guids{$guid}=$myguids{$guid};
  390. }
  391. else {
  392. foreach my $field (qw{md5}) {
  393. $guids{$guid}->{$field}=$myguids{$guid}->{$field};
  394. }
  395. }
  396. }
  397. }
  398. sub clearstate () {
  399. %feeds=();
  400. %guids=();
  401. $state_loaded=0;
  402. }
  403. sub expire () {
  404. foreach my $feed (values %feeds) {
  405. next unless $feed->{expireage} || $feed->{expirecount};
  406. my $count=0;
  407. my %seen;
  408. foreach my $item (sort { ($IkiWiki::pagectime{$b->{page}} || 0) <=> ($IkiWiki::pagectime{$a->{page}} || 0) }
  409. grep { exists $_->{page} && $_->{feed} eq $feed->{name} }
  410. values %guids) {
  411. if ($feed->{expireage}) {
  412. my $days_old = (time - ($IkiWiki::pagectime{$item->{page}} || 0)) / 60 / 60 / 24;
  413. if ($days_old > $feed->{expireage}) {
  414. debug(sprintf(gettext("expiring %s (%s days old)"),
  415. $item->{page}, int($days_old)));
  416. $item->{expired}=1;
  417. }
  418. }
  419. elsif ($feed->{expirecount} &&
  420. $count >= $feed->{expirecount}) {
  421. debug(sprintf(gettext("expiring %s"), $item->{page}));
  422. $item->{expired}=1;
  423. }
  424. else {
  425. if (! $seen{$item->{page}}) {
  426. $seen{$item->{page}}=1;
  427. $count++;
  428. }
  429. }
  430. }
  431. }
  432. }
  433. sub needsaggregate () {
  434. return values %feeds if $config{rebuild};
  435. return grep { time - $_->{lastupdate} >= $_->{updateinterval} } values %feeds;
  436. }
  437. sub aggregate (@) {
  438. eval q{use XML::Feed};
  439. error($@) if $@;
  440. eval q{use URI::Fetch};
  441. error($@) if $@;
  442. foreach my $feed (@_) {
  443. $feed->{lasttry}=time;
  444. $feed->{newposts}=0;
  445. $feed->{message}=sprintf(gettext("last checked %s"),
  446. displaytime($feed->{lasttry}));
  447. $feed->{error}=0;
  448. debug(sprintf(gettext("checking feed %s ..."), $feed->{name}));
  449. if (! length $feed->{feedurl}) {
  450. my @urls=XML::Feed->find_feeds($feed->{url});
  451. if (! @urls) {
  452. $feed->{message}=sprintf(gettext("could not find feed at %s"), $feed->{url});
  453. $feed->{error}=1;
  454. debug($feed->{message});
  455. next;
  456. }
  457. $feed->{feedurl}=pop @urls;
  458. }
  459. my $res=URI::Fetch->fetch($feed->{feedurl});
  460. if (! $res) {
  461. $feed->{message}=URI::Fetch->errstr;
  462. $feed->{error}=1;
  463. debug($feed->{message});
  464. next;
  465. }
  466. # lastupdate is only set if we were able to contact the server
  467. $feed->{lastupdate}=$feed->{lasttry};
  468. if ($res->status == URI::Fetch::URI_GONE()) {
  469. $feed->{message}=gettext("feed not found");
  470. $feed->{error}=1;
  471. debug($feed->{message});
  472. next;
  473. }
  474. my $content=$res->content;
  475. my $f=eval{XML::Feed->parse(\$content)};
  476. if ($@) {
  477. # One common cause of XML::Feed crashing is a feed
  478. # that contains invalid UTF-8 sequences. Convert
  479. # feed to ascii to try to work around.
  480. $feed->{message}.=" ".sprintf(gettext("(invalid UTF-8 stripped from feed)"));
  481. $f=eval {
  482. $content=Encode::decode_utf8($content, 0);
  483. XML::Feed->parse(\$content)
  484. };
  485. }
  486. if ($@) {
  487. # Another possibility is badly escaped entities.
  488. $feed->{message}.=" ".sprintf(gettext("(feed entities escaped)"));
  489. $content=~s/\&(?!amp)(\w+);/&amp;$1;/g;
  490. $f=eval {
  491. $content=Encode::decode_utf8($content, 0);
  492. XML::Feed->parse(\$content)
  493. };
  494. }
  495. if ($@) {
  496. $feed->{message}=gettext("feed crashed XML::Feed!")." ($@)";
  497. $feed->{error}=1;
  498. debug($feed->{message});
  499. next;
  500. }
  501. if (! $f) {
  502. $feed->{message}=XML::Feed->errstr;
  503. $feed->{error}=1;
  504. debug($feed->{message});
  505. next;
  506. }
  507. foreach my $entry ($f->entries) {
  508. # XML::Feed doesn't work around XML::Atom's bizarre
  509. # API, so we will. Real unicode strings? Yes please.
  510. # See [[bugs/Aggregated_Atom_feeds_are_double-encoded]]
  511. local $XML::Atom::ForceUnicode = 1;
  512. my $c=$entry->content;
  513. # atom feeds may have no content, only a summary
  514. if (! defined $c && ref $entry->summary) {
  515. $c=$entry->summary;
  516. }
  517. add_page(
  518. feed => $feed,
  519. copyright => $f->copyright,
  520. title => defined $entry->title ? decode_entities($entry->title) : "untitled",
  521. link => $entry->link,
  522. content => (defined $c && defined $c->body) ? $c->body : "",
  523. guid => defined $entry->id ? $entry->id : time."_".$feed->{name},
  524. ctime => $entry->issued ? ($entry->issued->epoch || time) : time,
  525. base => (defined $c && $c->can("base")) ? $c->base : undef,
  526. );
  527. }
  528. }
  529. }
  530. sub add_page (@) {
  531. my %params=@_;
  532. my $feed=$params{feed};
  533. my $guid={};
  534. my $mtime;
  535. if (exists $guids{$params{guid}}) {
  536. # updating an existing post
  537. $guid=$guids{$params{guid}};
  538. return if $guid->{expired};
  539. }
  540. else {
  541. # new post
  542. $guid->{guid}=$params{guid};
  543. $guids{$params{guid}}=$guid;
  544. $mtime=$params{ctime};
  545. $feed->{numposts}++;
  546. $feed->{newposts}++;
  547. # assign it an unused page
  548. my $page=titlepage($params{title});
  549. # escape slashes and periods in title so it doesn't specify
  550. # directory name or trigger ".." disallowing code.
  551. $page=~s!([/.])!"__".ord($1)."__"!eg;
  552. $page=$feed->{dir}."/".$page;
  553. ($page)=$page=~/$config{wiki_file_regexp}/;
  554. if (! defined $page || ! length $page) {
  555. $page=$feed->{dir}."/item";
  556. }
  557. my $c="";
  558. while (exists $IkiWiki::pagecase{lc $page.$c} ||
  559. -e $IkiWiki::Plugin::transient::transientdir."/".htmlfn($page.$c) ||
  560. -e "$config{srcdir}/".htmlfn($page.$c)) {
  561. $c++
  562. }
  563. # Make sure that the file name isn't too long.
  564. # NB: This doesn't check for path length limits.
  565. my $max=POSIX::pathconf($config{srcdir}, &POSIX::_PC_NAME_MAX);
  566. if (defined $max && length(htmlfn($page)) >= $max) {
  567. $c="";
  568. $page=$feed->{dir}."/item";
  569. while (exists $IkiWiki::pagecase{lc $page.$c} ||
  570. -e $IkiWiki::Plugin::transient::transientdir."/".htmlfn($page.$c) ||
  571. -e "$config{srcdir}/".htmlfn($page.$c)) {
  572. $c++
  573. }
  574. }
  575. $guid->{page}=$page;
  576. debug(sprintf(gettext("creating new page %s"), $page));
  577. }
  578. $guid->{feed}=$feed->{name};
  579. # To write or not to write? Need to avoid writing unchanged pages
  580. # to avoid unneccessary rebuilding. The mtime from rss cannot be
  581. # trusted; let's use a digest.
  582. eval q{use Digest::MD5 'md5_hex'};
  583. error($@) if $@;
  584. require Encode;
  585. my $digest=md5_hex(Encode::encode_utf8($params{content}));
  586. return unless ! exists $guid->{md5} || $guid->{md5} ne $digest || $config{rebuild};
  587. $guid->{md5}=$digest;
  588. # Create the page.
  589. my $template;
  590. eval {
  591. $template=template($feed->{template}, blind_cache => 1);
  592. };
  593. if ($@) {
  594. print STDERR gettext("failed to process template:")." $@";
  595. return;
  596. }
  597. $template->param(title => $params{title})
  598. if defined $params{title} && length($params{title});
  599. $template->param(content => wikiescape(htmlabs($params{content},
  600. defined $params{base} ? $params{base} : $feed->{feedurl})));
  601. $template->param(name => $feed->{name});
  602. $template->param(url => $feed->{url});
  603. $template->param(copyright => $params{copyright})
  604. if defined $params{copyright} && length $params{copyright};
  605. $template->param(permalink => IkiWiki::urlabs($params{link}, $feed->{feedurl}))
  606. if defined $params{link};
  607. if (ref $feed->{tags}) {
  608. $template->param(tags => [map { tag => $_ }, @{$feed->{tags}}]);
  609. }
  610. writefile(htmlfn($guid->{page}),
  611. $IkiWiki::Plugin::transient::transientdir, $template->output);
  612. if (defined $mtime && $mtime <= time) {
  613. # Set the mtime, this lets the build process get the right
  614. # creation time on record for the new page.
  615. utime $mtime, $mtime,
  616. $IkiWiki::Plugin::transient::transientdir."/".htmlfn($guid->{page});
  617. # Store it in pagectime for expiry code to use also.
  618. $IkiWiki::pagectime{$guid->{page}}=$mtime
  619. unless exists $IkiWiki::pagectime{$guid->{page}};
  620. }
  621. else {
  622. # Dummy value for expiry code.
  623. $IkiWiki::pagectime{$guid->{page}}=time
  624. unless exists $IkiWiki::pagectime{$guid->{page}};
  625. }
  626. }
  627. sub wikiescape ($) {
  628. # escape accidental wikilinks and preprocessor stuff
  629. return encode_entities(shift, '\[\]');
  630. }
  631. sub htmlabs ($$) {
  632. # Convert links in html from relative to absolute.
  633. # Note that this is a heuristic, which is not specified by the rss
  634. # spec and may not be right for all feeds. Also, see Debian
  635. # bug #381359.
  636. my $html=shift;
  637. my $urlbase=shift;
  638. my $ret="";
  639. my $p = HTML::Parser->new(api_version => 3);
  640. $p->handler(default => sub { $ret.=join("", @_) }, "text");
  641. $p->handler(start => sub {
  642. my ($tagname, $pos, $text) = @_;
  643. if (ref $HTML::Tagset::linkElements{$tagname}) {
  644. while (4 <= @$pos) {
  645. # use attribute sets from right to left
  646. # to avoid invalidating the offsets
  647. # when replacing the values
  648. my($k_offset, $k_len, $v_offset, $v_len) =
  649. splice(@$pos, -4);
  650. my $attrname = lc(substr($text, $k_offset, $k_len));
  651. next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}};
  652. next unless $v_offset; # 0 v_offset means no value
  653. my $v = substr($text, $v_offset, $v_len);
  654. $v =~ s/^([\'\"])(.*)\1$/$2/;
  655. my $new_v=IkiWiki::urlabs($v, $urlbase);
  656. $new_v =~ s/\"/&quot;/g; # since we quote with ""
  657. substr($text, $v_offset, $v_len) = qq("$new_v");
  658. }
  659. }
  660. $ret.=$text;
  661. }, "tagname, tokenpos, text");
  662. $p->parse($html);
  663. $p->eof;
  664. return $ret;
  665. }
  666. sub htmlfn ($) {
  667. return shift().".".($config{aggregateinternal} ? "_aggregated" : $config{htmlext});
  668. }
  669. my $aggregatelock;
  670. sub lockaggregate () {
  671. # Take an exclusive lock to prevent multiple concurrent aggregators.
  672. # Returns true if the lock was aquired.
  673. if (! -d $config{wikistatedir}) {
  674. mkdir($config{wikistatedir});
  675. }
  676. open($aggregatelock, '>', "$config{wikistatedir}/aggregatelock") ||
  677. error ("cannot open to $config{wikistatedir}/aggregatelock: $!");
  678. if (! flock($aggregatelock, 2 | 4)) { # LOCK_EX | LOCK_NB
  679. close($aggregatelock) || error("failed closing aggregatelock: $!");
  680. return 0;
  681. }
  682. return 1;
  683. }
  684. sub unlockaggregate () {
  685. return close($aggregatelock) if $aggregatelock;
  686. return;
  687. }
  688. 1