summaryrefslogtreecommitdiff
path: root/mkdiff.sh
blob: 889e136c123e7f3f86207a955f64885bb61a8762 (plain)
  1. #!/bin/sh
  2. set -e
  3. test ! -d .git # simple safety check
  4. git init
  5. [ -f mkdiff.sh ] || cp "$0" mkdiff.sh
  6. chmod +x mkdiff.sh
  7. git add mkdiff.sh; git commit -m "Save mkdiff.sh"
  8. states=$(ls -1 *.pdf | xargs basename -s .pdf)
  9. for state in $states; do
  10. outfile=$state.txt
  11. git checkout -b $state
  12. git add $state.pdf; git commit -m "Pristine file $state.pdf"
  13. pdftotext -nopgbrk $state.pdf $state.txt
  14. git add $state.txt; git commit -m "Extract content from $state.pdf"
  15. if [ -f $state.pl ]; then
  16. chmod +x $state.pl
  17. git add $state.pl; git commit -m "Save normalizing script $state.pl"
  18. ./$state.pl $state.txt
  19. git commit -m "Normalize $state.pdf" $state.txt
  20. fi
  21. cp $state.txt content.txt
  22. git add content.txt; git commit -m "Copy $state.txt to content.txt"
  23. git checkout master
  24. git merge $state
  25. done
  26. echo "sources prepared and ready for inspection, e.g. with this command:"
  27. echo " git log -p -w --color-words --patience content.txt"