# Make snippet for extracting RDF data from PDF documents # # Copyright 2024, Jonas Smedegaard # SPDX-License-Identifier: GPL-3+ # # Setup: # In main Makefile... # * set variable PDF_DOCUMENTS or use simple and slow default # * set variable BASE_IRI or use default file URI # * include this make snippet # # Dependencies: # * podofoxmp (e.g. Debian package libpodofo-utils) # * rapper (e.g. Debian package raptor2-utils) # * perl v5.36.0 or newer # list of relative paths to PDF documents #PDF_DOCUMENTS = \ # main_paper.pdf \ # promo_article.pdf \ # research/deep/superconductors/report.pdf # research/deep/fringe/index.pdf PDF_DOCUMENTS ?= $(wildcard *.pdf) # extract XMP metadata from PDF document, # and convert to the human-friendlier RDF/Turtle serialization $(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%: @pdfinfo -meta $* \ | perl -gp \ -e 's,\s*<\?xpacket [^>]+>\s*,,;' \ -e 's,]+>,,;' \ -e 's,\s*,,;' \ -e 's,\s*<\?xpacket [^>]+>,,;' \ | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*) .PHONY: $(PDF_DOCUMENTS:%=turtle-from-%)