From 779d396eec65a25b2f0ae59845a23707b602746b Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Thu, 25 Apr 2013 17:53:46 +0200 Subject: Save mkdiff.sh --- mkdiff.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 mkdiff.sh diff --git a/mkdiff.sh b/mkdiff.sh new file mode 100755 index 0000000..889e136 --- /dev/null +++ b/mkdiff.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +set -e + +test ! -d .git # simple safety check +git init + +[ -f mkdiff.sh ] || cp "$0" mkdiff.sh +chmod +x mkdiff.sh +git add mkdiff.sh; git commit -m "Save mkdiff.sh" + +states=$(ls -1 *.pdf | xargs basename -s .pdf) +for state in $states; do + outfile=$state.txt + git checkout -b $state + git add $state.pdf; git commit -m "Pristine file $state.pdf" + pdftotext -nopgbrk $state.pdf $state.txt + git add $state.txt; git commit -m "Extract content from $state.pdf" + if [ -f $state.pl ]; then + chmod +x $state.pl + git add $state.pl; git commit -m "Save normalizing script $state.pl" + ./$state.pl $state.txt + git commit -m "Normalize $state.pdf" $state.txt + fi + cp $state.txt content.txt + git add content.txt; git commit -m "Copy $state.txt to content.txt" + git checkout master + git merge $state +done + +echo "sources prepared and ready for inspection, e.g. with this command:" +echo " git log -p -w --color-words --patience content.txt" -- cgit v1.2.3