#!/bin/sh set -e test ! -d .git # simple safety check git init [ -f mkdiff.sh ] || cp "$0" mkdiff.sh chmod +x mkdiff.sh git add mkdiff.sh; git commit -m "Save mkdiff.sh" states=$(ls -1 *.pdf | xargs basename -s .pdf) for state in $states; do outfile=$state.txt git checkout -b $state git add $state.pdf; git commit -m "Pristine file $state.pdf" pdftotext -nopgbrk $state.pdf $state.txt git add $state.txt; git commit -m "Extract content from $state.pdf" if [ -f $state.pl ]; then chmod +x $state.pl git add $state.pl; git commit -m "Save normalizing script $state.pl" ./$state.pl $state.txt git commit -m "Normalize $state.pdf" $state.txt fi cp $state.txt content.txt git add content.txt; git commit -m "Copy $state.txt to content.txt" git checkout master git merge $state done echo "sources prepared and ready for inspection, e.g. with this command:" echo " git log -p -w --color-words --patience content.txt"