summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2013-04-25 17:53:46 +0200
committerJonas Smedegaard <dr@jones.dk>2013-04-25 17:53:46 +0200
commit779d396eec65a25b2f0ae59845a23707b602746b (patch)
tree492ad0813cccb1807707dea50efd86c2fbde56ab
Save mkdiff.sh
-rwxr-xr-xmkdiff.sh32
1 files changed, 32 insertions, 0 deletions
diff --git a/mkdiff.sh b/mkdiff.sh
new file mode 100755
index 0000000..889e136
--- /dev/null
+++ b/mkdiff.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+set -e
+
+test ! -d .git # simple safety check
+git init
+
+[ -f mkdiff.sh ] || cp "$0" mkdiff.sh
+chmod +x mkdiff.sh
+git add mkdiff.sh; git commit -m "Save mkdiff.sh"
+
+states=$(ls -1 *.pdf | xargs basename -s .pdf)
+for state in $states; do
+ outfile=$state.txt
+ git checkout -b $state
+ git add $state.pdf; git commit -m "Pristine file $state.pdf"
+ pdftotext -nopgbrk $state.pdf $state.txt
+ git add $state.txt; git commit -m "Extract content from $state.pdf"
+ if [ -f $state.pl ]; then
+ chmod +x $state.pl
+ git add $state.pl; git commit -m "Save normalizing script $state.pl"
+ ./$state.pl $state.txt
+ git commit -m "Normalize $state.pdf" $state.txt
+ fi
+ cp $state.txt content.txt
+ git add content.txt; git commit -m "Copy $state.txt to content.txt"
+ git checkout master
+ git merge $state
+done
+
+echo "sources prepared and ready for inspection, e.g. with this command:"
+echo " git log -p -w --color-words --patience content.txt"