summaryrefslogtreecommitdiff
path: root/foaf/mkfoaf.sh
blob: 3c2a36ce006fc08f89acdce64e61a55221c4a06f (plain)
  1. #!/bin/sh
  2. # Origin: http://code.google.com/p/lindenb/source/browse/trunk/src/xsl/linkedin2foaf.xsl
  3. # info: http://plindenbaum.blogspot.com/2010/02/linkedinxslt-foaf-people-from.html
  4. set -e
  5. exit1() {
  6. echo "ERROR: $1"
  7. exit 1
  8. }
  9. bindir=$(dirname "$0")
  10. #xsltdir="$bindir"
  11. turtle2foaf() {
  12. set -e
  13. inpath="$1"
  14. outpath="$2"
  15. [ -n "$outpath" ] || outpath=$(echo "$inpath" | perl -pe 's/\.ttl$/.rdf/ or exit 1') || exit1 "Failed resolving output RDF file from input Turtle file \"$inpath\"."
  16. [ -e "$inpath" ] || exit1 "Turtle file \"$inpath\" does not exist."
  17. # [ ! -e "$outpath" ] || exit1 "RDF file \"$outpath\" already exists."
  18. base="$(perl -ne '/^\@base\s+<(http.+)>/ and print $1 and exit;' "$inpath")" || true
  19. rapper ${base:+-I "$base"} -i turtle -o rdfxml-abbrev "$inpath" > "$outpath"
  20. foafsign "$outpath"
  21. }
  22. linkedin2foaf() {
  23. set -e
  24. inpath="$1"
  25. outpath="$2"
  26. [ -n "$outpath" ] || outfile=index.rdf && outpath="$(dirname "$inpath")/linkedin/$outfile"
  27. outdir=$(dirname "$outpath")
  28. tmppath="$outdir/index.html"
  29. [ -e "$inpath" ] || exit1 "Turtle file \"$inpath\" does not exist."
  30. [ ! -e "$tmppath" ] || exit1 "Tempfile \"$tmppath\" already exists."
  31. # TODO: support homepage as fallback for accountName
  32. # id=$(perl -0 -ne '/foaf:accountServiceHomepage\s+<http:\/\/www.linkedin.com\/>\s+;\s+foaf:(?:homepage\s+<(?=http)|accountName\s+")([^<"\s]+)/ and print $1 and exit;' "$inpath") #'
  33. # id=$(perl -0 -ne '/^:me.*?foaf:accountServiceHomepage\s+<http:\/\/www.linkedin.com\/>\s+;\s+foaf:accountName\s+"([^<"\s]+)/ms and print $1 and exit;' "$inpath") #'
  34. # id=62345396
  35. id=jonassm
  36. [ -n "$id" ] || exit1 "Failed to resolve LinkedIn account name."
  37. uri="http://www.linkedin.com/in/$id"
  38. mkdir -p "$outdir"
  39. # work around unescaped &'s in linkedin pages
  40. # xsltproc --html "$bindir/linkedin2foaf.xsl" "$uri" > "$outpath"
  41. wget -q -O "$tmppath" "$uri"
  42. # perl -i -pe 's/&([a-zA-Z0-9]+=|\s)/&amp;$1/g' "$tmppath"
  43. # xsltproc --html "$xsltdir/linkedin2foaf.xsl" "$tmppath" > "$outpath"
  44. perl "$bindir/linkedin2rdf.pl" "$tmppath" "$uri" > "$outpath"
  45. rm -f "$tmppath"
  46. foafsign "$outpath"
  47. }
  48. tidyfacebookfoaf() {
  49. set -e
  50. inpath="$1"
  51. outpath="$2"
  52. [ -e "$inpath" ] || exit1 "Facebook Exporter RDF file \"$inpath\" does not exist."
  53. perl "$bindir/fbfixup.pl" "$inpath" > "$outpath"
  54. foafsign "$outpath"
  55. }
  56. foafsign() {
  57. set -e
  58. inpath="$1"
  59. outpath="${inpath}_sig.pgp"
  60. gpg -a -o- --detach-sign "$inpath" > "$outpath"
  61. }
  62. paths="$*"
  63. [ -n "$paths" ] || paths=index.ttl
  64. for path in $paths; do
  65. basedir=$(dirname "$path")
  66. turtle2foaf "$path"
  67. linkedin2foaf "$path" || true
  68. # tidyfacebookfoaf "$basedir/facebook/foaf.xml" "$basedir/facebook/index.rdf" || true
  69. foafsign "$basedir/facebook/index.rdf" || true
  70. done