blob: 889e136c123e7f3f86207a955f64885bb61a8762 (
plain)
- #!/bin/sh
- set -e
- test ! -d .git # simple safety check
- git init
- [ -f mkdiff.sh ] || cp "$0" mkdiff.sh
- chmod +x mkdiff.sh
- git add mkdiff.sh; git commit -m "Save mkdiff.sh"
- states=$(ls -1 *.pdf | xargs basename -s .pdf)
- for state in $states; do
- outfile=$state.txt
- git checkout -b $state
- git add $state.pdf; git commit -m "Pristine file $state.pdf"
- pdftotext -nopgbrk $state.pdf $state.txt
- git add $state.txt; git commit -m "Extract content from $state.pdf"
- if [ -f $state.pl ]; then
- chmod +x $state.pl
- git add $state.pl; git commit -m "Save normalizing script $state.pl"
- ./$state.pl $state.txt
- git commit -m "Normalize $state.pdf" $state.txt
- fi
- cp $state.txt content.txt
- git add content.txt; git commit -m "Copy $state.txt to content.txt"
- git checkout master
- git merge $state
- done
- echo "sources prepared and ready for inspection, e.g. with this command:"
- echo " git log -p -w --color-words --patience content.txt"
|