aboutsummaryrefslogtreecommitdiff
path: root/_make/rdf.mk
blob: 0586ade3a40179ffc4a8173a13f1e510e9cb1a5d (plain)
  1. # Make snippet for extracting RDF data from PDF documents
  2. #
  3. # Copyright 2024, Jonas Smedegaard <dr@jones.dk>
  4. # SPDX-License-Identifier: GPL-3+
  5. #
  6. # Setup:
  7. # In main Makefile...
  8. # * set variable PDF_DOCUMENTS or use simple and slow default
  9. # * set variable BASE_IRI or use default file URI
  10. # * include this make snippet
  11. #
  12. # Dependencies:
  13. # * podofoxmp (e.g. Debian package libpodofo-utils)
  14. # * rapper (e.g. Debian package raptor2-utils)
  15. # * perl v5.36.0 or newer
  16. # list of relative paths to PDF documents
  17. #PDF_DOCUMENTS = \
  18. # main_paper.pdf \
  19. # promo_article.pdf \
  20. # research/deep/superconductors/report.pdf
  21. # research/deep/fringe/index.pdf
  22. PDF_DOCUMENTS ?= $(wildcard *.pdf)
  23. # extract XMP metadata from PDF document,
  24. # and convert to the human-friendlier RDF/Turtle serialization
  25. $(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%:
  26. @pdfinfo -meta $* \
  27. | perl -gp \
  28. -e 's,\s*<\?xpacket [^>]+>\s*,,;' \
  29. -e 's,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;' \
  30. -e 's,\s*</x:xmpmeta>,,;' \
  31. -e 's,\s*<\?xpacket [^>]+>,,;' \
  32. | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*)
  33. .PHONY: $(PDF_DOCUMENTS:%=turtle-from-%)