aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2025-05-11 19:40:01 +0200
committerJonas Smedegaard <dr@jones.dk>2025-05-11 19:41:10 +0200
commit3ff58c4d9987148e20f8d00de66ea3df54ad4ea3 (patch)
treeef6a104924822440b7166ccacfb886ed94ac7e2e
parent765ceca3bf63c1ea99c4060e793d96953582e19a (diff)
add Makefile with quarto and rdf snippets
-rw-r--r--Makefile45
-rw-r--r--_make/quarto.mk56
-rw-r--r--_make/rdf.mk37
3 files changed, 138 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2238b76
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,45 @@
+DOCUMENTS = report
+
+PDF_DOCUMENTS = _site/report.pdf
+
+include _make/*.mk
+
+FILTER = _extensions/ruc-play/semantic-markdown/semantic-markdown.lua
+
+DIFFTESTS = $(patsubst %.md,%,$(wildcard test/*.md))
+
+#DIFF = diff -u
+DIFF = git --no-pager diff --no-index
+
+#MARKDOWN_DIALECT = markdown+fenced_divs+bracketed_spans-auto_identifiers
+MARKDOWN_DIALECT = commonmark
+
+check: $(DIFFTESTS:%=check-%)
+ luacheck --quiet $(FILTER)
+
+$(DIFFTESTS:%=check-%): check-%: %.md
+ -pandoc --lua-filter $(FILTER) --from $(MARKDOWN_DIALECT) \
+ --wrap preserve < $< \
+ | $(DIFF) $*.plain.html /dev/stdin
+
+$(DIFFTESTS:%=debug-%): debug-%: %.md
+ $(strip \
+ pandoc --from $(MARKDOWN_DIALECT) --to native \
+ --lua-filter $(FILTER) < $<)
+
+$(DIFFTESTS:%=debugdiff-%): SHELL = /bin/bash
+$(DIFFTESTS:%=debugdiff-%): debugdiff-%: %.md
+ -$(strip $(DIFF) \
+ <(pandoc --from $(MARKDOWN_DIALECT) --to native < $<) \
+ <(pandoc --from $(MARKDOWN_DIALECT) --to native \
+ --lua-filter $(FILTER) < $<))
+
+$(DIFFTESTS:%=turtle-from-%): turtle-from-%: %.rdfa.html
+ rapper -i rdfa -o turtle - ex < $<
+
+$(DIFFTESTS:%=xmp-from-%): xmp-from-%: %.rdfa.html
+ rapper -i rdfa -o rdfxml-xmp - ex < $<
+
+.PHONY: check \
+ $(foreach x,check debug debugdiff turtle-from xmp-from,\
+ $(DIFFTESTS:%=$x-%))
diff --git a/_make/quarto.mk b/_make/quarto.mk
new file mode 100644
index 0000000..63ad617
--- /dev/null
+++ b/_make/quarto.mk
@@ -0,0 +1,56 @@
+# Make snippet for rendering Quarto documents
+#
+# Copyright 2024, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+#
+# Setup:
+# In main Makefile...
+# * set variable DOCUMENTS
+# * set variable DOCUMENT_APPENDIX_REGEX if needed
+# * include this make snippet
+#
+# Dependencies:
+# * quarto <https://quarto.org/>
+# * bat <https://github.com/sharkdp/bat>, with executable named "batcat"
+# * perl v5.10.1 or newer
+
+# list of relative paths to directories,
+# each containing an index.qmd file.
+#DOCUMENTS = \
+# main_paper \
+# promo_article \
+# research/deep/superconductors
+# research/deep/fringe
+
+# regular expressions anchored at the beginning of a single line,
+# to match the beginning of content in document to omit from counting
+DOCUMENT_APPENDIX_REGEX ?= Appendix\\b
+DOCUMENT_BIBLIOGRAPHY_REGEX ?= Bibliography\\b
+
+$(DOCUMENTS:%=doc-render-%): doc-render-%: %.qmd
+ quarto render $<
+
+$(DOCUMENTS:%=doc-screening-of-%): doc-screening-of-%: %.qmd
+ QUARTO_LOG_LEVEL=quiet \
+ quarto render $< --to markdown --output - | batcat --file-name index.qmd --language markdown
+
+# count all characters except horisontal rulers,
+# until appendices
+$(DOCUMENTS:%=doc-charcount-of-%): doc-charcount-of-%: %.qmd
+ QUARTO_LOG_LEVEL=quiet \
+ quarto render $< --to plain --columns=9999 --output - \
+ | perl -nsE 'next if /^-*$$/; $$done += /^$$re/; $$chars += length unless $$done; END { say $$chars }' \
+ -- -re="$(DOCUMENT_APPENDIX_REGEX)"
+
+# count all characters except horisontal rulers,
+# until bibliography,
+# divide by 2400 and floor the result
+$(DOCUMENTS:%=doc-pagecount-of-%): doc-pagecount-of-%: %.qmd
+ QUARTO_LOG_LEVEL=quiet \
+ quarto render $< --to plain --columns=9999 --output - \
+ | perl -nsE 'next if /^-*$$/; $$done += /^$$re/; $$chars += length unless $$done; END { say $$chars }' \
+ -- -re="$(DOCUMENT_BIBLIOGRAPHY_REGEX)" \
+ | perl -nE 'say int($$_ / 2400)'
+
+.PHONY: $(DOCUMENTS:%=doc-render-%) $(DOCUMENTS:%=doc-screening-of-%) \
+ $(DOCUMENTS:%=doc-charcount-of-%) $(DOCUMENTS:%=doc-pagecount-of-%)
diff --git a/_make/rdf.mk b/_make/rdf.mk
new file mode 100644
index 0000000..0586ade
--- /dev/null
+++ b/_make/rdf.mk
@@ -0,0 +1,37 @@
+# Make snippet for extracting RDF data from PDF documents
+#
+# Copyright 2024, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+#
+# Setup:
+# In main Makefile...
+# * set variable PDF_DOCUMENTS or use simple and slow default
+# * set variable BASE_IRI or use default file URI
+# * include this make snippet
+#
+# Dependencies:
+# * podofoxmp (e.g. Debian package libpodofo-utils)
+# * rapper (e.g. Debian package raptor2-utils)
+# * perl v5.36.0 or newer
+
+# list of relative paths to PDF documents
+#PDF_DOCUMENTS = \
+# main_paper.pdf \
+# promo_article.pdf \
+# research/deep/superconductors/report.pdf
+# research/deep/fringe/index.pdf
+
+PDF_DOCUMENTS ?= $(wildcard *.pdf)
+
+# extract XMP metadata from PDF document,
+# and convert to the human-friendlier RDF/Turtle serialization
+$(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%:
+ @pdfinfo -meta $* \
+ | perl -gp \
+ -e 's,\s*<\?xpacket [^>]+>\s*,,;' \
+ -e 's,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;' \
+ -e 's,\s*</x:xmpmeta>,,;' \
+ -e 's,\s*<\?xpacket [^>]+>,,;' \
+ | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*)
+
+.PHONY: $(PDF_DOCUMENTS:%=turtle-from-%)