diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-05-11 19:40:01 +0200 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-05-11 19:41:10 +0200 |
| commit | 3ff58c4d9987148e20f8d00de66ea3df54ad4ea3 (patch) | |
| tree | ef6a104924822440b7166ccacfb886ed94ac7e2e | |
| parent | 765ceca3bf63c1ea99c4060e793d96953582e19a (diff) | |
add Makefile with quarto and rdf snippets
| -rw-r--r-- | Makefile | 45 | ||||
| -rw-r--r-- | _make/quarto.mk | 56 | ||||
| -rw-r--r-- | _make/rdf.mk | 37 |
3 files changed, 138 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2238b76 --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +DOCUMENTS = report + +PDF_DOCUMENTS = _site/report.pdf + +include _make/*.mk + +FILTER = _extensions/ruc-play/semantic-markdown/semantic-markdown.lua + +DIFFTESTS = $(patsubst %.md,%,$(wildcard test/*.md)) + +#DIFF = diff -u +DIFF = git --no-pager diff --no-index + +#MARKDOWN_DIALECT = markdown+fenced_divs+bracketed_spans-auto_identifiers +MARKDOWN_DIALECT = commonmark + +check: $(DIFFTESTS:%=check-%) + luacheck --quiet $(FILTER) + +$(DIFFTESTS:%=check-%): check-%: %.md + -pandoc --lua-filter $(FILTER) --from $(MARKDOWN_DIALECT) \ + --wrap preserve < $< \ + | $(DIFF) $*.plain.html /dev/stdin + +$(DIFFTESTS:%=debug-%): debug-%: %.md + $(strip \ + pandoc --from $(MARKDOWN_DIALECT) --to native \ + --lua-filter $(FILTER) < $<) + +$(DIFFTESTS:%=debugdiff-%): SHELL = /bin/bash +$(DIFFTESTS:%=debugdiff-%): debugdiff-%: %.md + -$(strip $(DIFF) \ + <(pandoc --from $(MARKDOWN_DIALECT) --to native < $<) \ + <(pandoc --from $(MARKDOWN_DIALECT) --to native \ + --lua-filter $(FILTER) < $<)) + +$(DIFFTESTS:%=turtle-from-%): turtle-from-%: %.rdfa.html + rapper -i rdfa -o turtle - ex < $< + +$(DIFFTESTS:%=xmp-from-%): xmp-from-%: %.rdfa.html + rapper -i rdfa -o rdfxml-xmp - ex < $< + +.PHONY: check \ + $(foreach x,check debug debugdiff turtle-from xmp-from,\ + $(DIFFTESTS:%=$x-%)) diff --git a/_make/quarto.mk b/_make/quarto.mk new file mode 100644 index 0000000..63ad617 --- /dev/null +++ b/_make/quarto.mk @@ -0,0 +1,56 @@ +# Make snippet for rendering Quarto documents +# +# Copyright 2024, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ +# +# Setup: +# In main Makefile... +# * set variable DOCUMENTS +# * set variable DOCUMENT_APPENDIX_REGEX if needed +# * include this make snippet +# +# Dependencies: +# * quarto <https://quarto.org/> +# * bat <https://github.com/sharkdp/bat>, with executable named "batcat" +# * perl v5.10.1 or newer + +# list of relative paths to directories, +# each containing an index.qmd file. +#DOCUMENTS = \ +# main_paper \ +# promo_article \ +# research/deep/superconductors +# research/deep/fringe + +# regular expressions anchored at the beginning of a single line, +# to match the beginning of content in document to omit from counting +DOCUMENT_APPENDIX_REGEX ?= Appendix\\b +DOCUMENT_BIBLIOGRAPHY_REGEX ?= Bibliography\\b + +$(DOCUMENTS:%=doc-render-%): doc-render-%: %.qmd + quarto render $< + +$(DOCUMENTS:%=doc-screening-of-%): doc-screening-of-%: %.qmd + QUARTO_LOG_LEVEL=quiet \ + quarto render $< --to markdown --output - | batcat --file-name index.qmd --language markdown + +# count all characters except horisontal rulers, +# until appendices +$(DOCUMENTS:%=doc-charcount-of-%): doc-charcount-of-%: %.qmd + QUARTO_LOG_LEVEL=quiet \ + quarto render $< --to plain --columns=9999 --output - \ + | perl -nsE 'next if /^-*$$/; $$done += /^$$re/; $$chars += length unless $$done; END { say $$chars }' \ + -- -re="$(DOCUMENT_APPENDIX_REGEX)" + +# count all characters except horisontal rulers, +# until bibliography, +# divide by 2400 and floor the result +$(DOCUMENTS:%=doc-pagecount-of-%): doc-pagecount-of-%: %.qmd + QUARTO_LOG_LEVEL=quiet \ + quarto render $< --to plain --columns=9999 --output - \ + | perl -nsE 'next if /^-*$$/; $$done += /^$$re/; $$chars += length unless $$done; END { say $$chars }' \ + -- -re="$(DOCUMENT_BIBLIOGRAPHY_REGEX)" \ + | perl -nE 'say int($$_ / 2400)' + +.PHONY: $(DOCUMENTS:%=doc-render-%) $(DOCUMENTS:%=doc-screening-of-%) \ + $(DOCUMENTS:%=doc-charcount-of-%) $(DOCUMENTS:%=doc-pagecount-of-%) diff --git a/_make/rdf.mk b/_make/rdf.mk new file mode 100644 index 0000000..0586ade --- /dev/null +++ b/_make/rdf.mk @@ -0,0 +1,37 @@ +# Make snippet for extracting RDF data from PDF documents +# +# Copyright 2024, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ +# +# Setup: +# In main Makefile... +# * set variable PDF_DOCUMENTS or use simple and slow default +# * set variable BASE_IRI or use default file URI +# * include this make snippet +# +# Dependencies: +# * podofoxmp (e.g. Debian package libpodofo-utils) +# * rapper (e.g. Debian package raptor2-utils) +# * perl v5.36.0 or newer + +# list of relative paths to PDF documents +#PDF_DOCUMENTS = \ +# main_paper.pdf \ +# promo_article.pdf \ +# research/deep/superconductors/report.pdf +# research/deep/fringe/index.pdf + +PDF_DOCUMENTS ?= $(wildcard *.pdf) + +# extract XMP metadata from PDF document, +# and convert to the human-friendlier RDF/Turtle serialization +$(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%: + @pdfinfo -meta $* \ + | perl -gp \ + -e 's,\s*<\?xpacket [^>]+>\s*,,;' \ + -e 's,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;' \ + -e 's,\s*</x:xmpmeta>,,;' \ + -e 's,\s*<\?xpacket [^>]+>,,;' \ + | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*) + +.PHONY: $(PDF_DOCUMENTS:%=turtle-from-%) |
