diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-05-11 19:40:01 +0200 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-05-11 19:41:10 +0200 |
| commit | 3ff58c4d9987148e20f8d00de66ea3df54ad4ea3 (patch) | |
| tree | ef6a104924822440b7166ccacfb886ed94ac7e2e /_make | |
| parent | 765ceca3bf63c1ea99c4060e793d96953582e19a (diff) | |
add Makefile with quarto and rdf snippets
Diffstat (limited to '_make')
| -rw-r--r-- | _make/quarto.mk | 56 | ||||
| -rw-r--r-- | _make/rdf.mk | 37 |
2 files changed, 93 insertions, 0 deletions
diff --git a/_make/quarto.mk b/_make/quarto.mk new file mode 100644 index 0000000..63ad617 --- /dev/null +++ b/_make/quarto.mk @@ -0,0 +1,56 @@ +# Make snippet for rendering Quarto documents +# +# Copyright 2024, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ +# +# Setup: +# In main Makefile... +# * set variable DOCUMENTS +# * set variable DOCUMENT_APPENDIX_REGEX if needed +# * include this make snippet +# +# Dependencies: +# * quarto <https://quarto.org/> +# * bat <https://github.com/sharkdp/bat>, with executable named "batcat" +# * perl v5.10.1 or newer + +# list of relative paths to directories, +# each containing an index.qmd file. +#DOCUMENTS = \ +# main_paper \ +# promo_article \ +# research/deep/superconductors +# research/deep/fringe + +# regular expressions anchored at the beginning of a single line, +# to match the beginning of content in document to omit from counting +DOCUMENT_APPENDIX_REGEX ?= Appendix\\b +DOCUMENT_BIBLIOGRAPHY_REGEX ?= Bibliography\\b + +$(DOCUMENTS:%=doc-render-%): doc-render-%: %.qmd + quarto render $< + +$(DOCUMENTS:%=doc-screening-of-%): doc-screening-of-%: %.qmd + QUARTO_LOG_LEVEL=quiet \ + quarto render $< --to markdown --output - | batcat --file-name index.qmd --language markdown + +# count all characters except horisontal rulers, +# until appendices +$(DOCUMENTS:%=doc-charcount-of-%): doc-charcount-of-%: %.qmd + QUARTO_LOG_LEVEL=quiet \ + quarto render $< --to plain --columns=9999 --output - \ + | perl -nsE 'next if /^-*$$/; $$done += /^$$re/; $$chars += length unless $$done; END { say $$chars }' \ + -- -re="$(DOCUMENT_APPENDIX_REGEX)" + +# count all characters except horisontal rulers, +# until bibliography, +# divide by 2400 and floor the result +$(DOCUMENTS:%=doc-pagecount-of-%): doc-pagecount-of-%: %.qmd + QUARTO_LOG_LEVEL=quiet \ + quarto render $< --to plain --columns=9999 --output - \ + | perl -nsE 'next if /^-*$$/; $$done += /^$$re/; $$chars += length unless $$done; END { say $$chars }' \ + -- -re="$(DOCUMENT_BIBLIOGRAPHY_REGEX)" \ + | perl -nE 'say int($$_ / 2400)' + +.PHONY: $(DOCUMENTS:%=doc-render-%) $(DOCUMENTS:%=doc-screening-of-%) \ + $(DOCUMENTS:%=doc-charcount-of-%) $(DOCUMENTS:%=doc-pagecount-of-%) diff --git a/_make/rdf.mk b/_make/rdf.mk new file mode 100644 index 0000000..0586ade --- /dev/null +++ b/_make/rdf.mk @@ -0,0 +1,37 @@ +# Make snippet for extracting RDF data from PDF documents +# +# Copyright 2024, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ +# +# Setup: +# In main Makefile... +# * set variable PDF_DOCUMENTS or use simple and slow default +# * set variable BASE_IRI or use default file URI +# * include this make snippet +# +# Dependencies: +# * podofoxmp (e.g. Debian package libpodofo-utils) +# * rapper (e.g. Debian package raptor2-utils) +# * perl v5.36.0 or newer + +# list of relative paths to PDF documents +#PDF_DOCUMENTS = \ +# main_paper.pdf \ +# promo_article.pdf \ +# research/deep/superconductors/report.pdf +# research/deep/fringe/index.pdf + +PDF_DOCUMENTS ?= $(wildcard *.pdf) + +# extract XMP metadata from PDF document, +# and convert to the human-friendlier RDF/Turtle serialization +$(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%: + @pdfinfo -meta $* \ + | perl -gp \ + -e 's,\s*<\?xpacket [^>]+>\s*,,;' \ + -e 's,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;' \ + -e 's,\s*</x:xmpmeta>,,;' \ + -e 's,\s*<\?xpacket [^>]+>,,;' \ + | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*) + +.PHONY: $(PDF_DOCUMENTS:%=turtle-from-%) |
