diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-03-17 22:53:16 +0100 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-03-17 22:53:16 +0100 |
| commit | f205b64b8df25f3b586b599c9a217f1a24070c99 (patch) | |
| tree | 38fe1e212f980507029d8c658d34e414dcd64285 | |
| parent | b6968cb02d9a78a81dfd8e99b2e9f3e7a007d038 (diff) | |
update title page and intro
| -rw-r--r-- | _intro.qmd | 90 | ||||
| -rw-r--r-- | ref.bib | 49 | ||||
| -rw-r--r-- | report.qmd | 10 |
3 files changed, 113 insertions, 36 deletions
@@ -1,11 +1,32 @@ ## Problem space +A text document often has multiple viewing modes -- +one mode for authoring and another for the final product. +The author gets to see the wet ink when writing with a pen, +or editing widgets in word processing systems, +or markup codes in plaintext editors. +The target audience typically gets a view of only the authored content, +unless the document is (intentionally or not) opened in an authoring tool. + + then source and target media might differ, +e.g. a text authored in a word processor +but delivered as a PDF file. + Two major modes for authoring texts: WYSIWYG and plaintext. A fundamental benefit of plaintext approach is freedom of choice regarding authoring tools [@White2022, p. 3]. +Among plaintext authoring modes, +some are oriented towards technical accuracy +while others prioritise simplicity and ease of use. +Some authors requiring technical accuracy in their writing, +notably in academic settings, +use LaTeX as authoring format, +while others use LaTeX only as an intermediate format +with Markdown as their authoring format. + Where @White2022 examines benefits for authors with accessibility needs, the interest here is benefits for authors with unconventional needs for expression; @@ -14,29 +35,60 @@ separately from or prior to targeting another audience. ## Problem -How to extend Unix-style tools to support personal authoring -for collecting multi-facetted thoughts - -e.g. educatational exploration for making sense of a new academic field? +This project is framed as the following problem statement: + +**How can Unix-style tools for authoring linear texts be extended +to help structure non-linear thinking?** ## Idea -Extend the Quarto document publishing system to handle semantic annotations. +Implement plugins for the Pandoc document converter +to enable authoring of ontological annotations in the text content, +inspired by the conceptual idea in @Francart2020, +and publish the plugins +for easy use with the Quarto document publishing system. -Quarto takes as input plaintext files written in Markdown, -processes it through pandoc and a set of extensions and templates, -to produce a website, a PDF document, a presentation slide show and more. +Pandoc reads a text document, +parses its structural components into an internal data structure +called Abstract Syntax Tree (AST), +and serialises and writes back into a text document. +The AST is deliberately prioritises structural information +and is relaxed about visual information, +to preserve literal content +while reducing format-specific stylistic details, +relevant especially when processing between different formats. +Most common is to read plaintext Markdown files +and write LaTeX code further compiled into a PDF file. +Pandoc allows supplying custom reader and writer functions +as well as plugging into and manipulating the AST, +which this project will exploit: This project will write an extension -to detect semantic annotations in the Markdown input, -[@FrancartSemanticMarkdown2020] -transforming them as appropriate to the various output formats - -e.g. XMP metadata for PDF output and RDFa for html output. -[@WikipediaXMPPage] -[@rdfaInfoPage] +to adjust the AST +when abusing the default Markdown reader +to read Markdown with added markup for ontological annotations, +as proposed in @Francart2020 +and further sketched as a draft markup format in @Smedegaard2022. +The implemented Pandoc extensions will be designed +both for use standalone and as part of the document authoring framework Quarto, +which uses Pandoc as central tool with a large set of extensions and templates. + +First milestone is reached +when the filter can simply suppress the added markup. +A further milestone is to embed the expressed annotations +in supported output formats, +e.g. as XMP metadata in PDF output +[@PDFAssociation2020 chapter 14.3] +and as RDFa in html output +[@Herman2015]. +Another further milestone is to make use of the added markup, +e.g. to annotate purpose of scholarly citations +as presented in @Daquino2023. -Already a specification has been drafted -[@SementicMarkdownSpec] -for how to annotate semantics embedded in Markdown. -The challenge is to implement that specification - -and possibly spot and refine the spec, -and explore use cases for Markdown+semantics. +As mentioned above, +a draft specification has already been drafted +in @Smedegaard2022 +for the syntax of embedding ontological annotations in Markdown. +The main challenge of this project is to implement that specification +as extensions for the existing Pandoc tool and Quarto framework, +and as part of that potentially also refine the draft specification. @@ -182,22 +182,17 @@ journaltitle = {Lecture Notes in Computer Science}, } -@Misc{WikipediaXMPPage, - editor = {wikipedia}, - title = {Extensible Metadata Platform}, +@Misc{Herman2015, + date = {2015-03-17}, + editor = {Ivan Herman and Ben Adida and Manu Sporny and Mark Birbeck}, + title = {RDFa 1.1 Primer - Third Edition}, language = {English}, - url = {https://en.wikipedia.org/wiki/Extensible_Metadata_Platform}, + subtitle = {Rich Structured Data Markup for Web Documents}, + url = {https://www.w3.org/TR/rdfa-primer/}, urldate = {2025}, } -@Misc{rdfaInfoPage, - title = {Linked Data in HTML}, - language = {English}, - url = {https://rdfa.info/}, - urldate = {2025}, -} - -@Article{FrancartSemanticMarkdown2020, +@Article{Francart2020, author = {Thomas Francart}, date = {2020-02-20}, title = {Semantic Markdown Specifications}, @@ -205,13 +200,39 @@ url = {https://blog.sparna.fr/2020/02/20/semantic-markdown/}, } -@Misc{SementicMarkdownSpec, +@Misc{Smedegaard2022, + author = {Jonas Smedegaard and Thomas Francart}, date = {2022-04-09}, - editor = {jones.dk}, + editor = {Jonas Smedegaard}, title = {Semantic Markdown Spec (Alpha Draft)}, url = {https://source.jones.dk/semantic-markdown/about/}, } +@Misc{Daquino2023, + author = {Daquino, Marilena and Massari, Arcangelo and Peroni, Silvio and Shotton, David}, + date = {2023}, + title = {The OpenCitations Data Model}, + doi = {10.6084/M9.FIGSHARE.3443876}, + copyright = {Creative Commons Attribution 4.0 International}, + keywords = {Library and information studies not elsewhere classified}, + publisher = {figshare}, +} + +@TechReport{PDFAssociation2020, + author = {{PDF Association}}, + date = {2020-12}, + title = {Document management - Portable document format -}, + number = {ISO 32000-2:2020}, + subtitle = {Part 2: PDF 2.0}, + url = {https://www.iso.org/standard/75839.html}, + urldate = {2024-12-22}, + version = {2}, + comment = {https://pdfa.org/resource/iso-32000-2/}, + file = {:PDFAssociation2020 - Document Management Portable Document Format.pdf:PDF}, + organisation = {PDF Association}, + publisher = {International Organization for Standardization}, +} + @Comment{jabref-meta: databaseType:biblatex;} @Comment{jabref-meta: fileDirectory-jonas-bastian:/home/jonas/Projects/RUC/LIB/md;} @@ -1,8 +1,12 @@ --- -title: Making Markdown authoring better -subtitle: Extensions to Quarto for authoring semantic markdown +title: Semantic Markdown +subtitle: | + Pandoc plugins to support + ontological annotations + [preliminary draft] -date: 2025-05-27 +#date: 2025-05-27 +date: 2025-03-19 toc-depth: 2 |
