diff options
author | Jonas Smedegaard <dr@jones.dk> | 2010-10-15 16:34:12 +0200 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2010-10-15 16:34:12 +0200 |
commit | 3cdba8ac30ad6b8c822e7259bfb74ad56a983471 (patch) | |
tree | baeda8302118266f06503079ad05cbd260d7d053 /projects/foaf/mkfoaf.sh | |
parent | 9f4ccc261f0653ff52d467524720ba733f3dc988 (diff) |
Add foaf files and scripts.
Diffstat (limited to 'projects/foaf/mkfoaf.sh')
-rwxr-xr-x | projects/foaf/mkfoaf.sh | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/projects/foaf/mkfoaf.sh b/projects/foaf/mkfoaf.sh new file mode 100755 index 0000000..dbb842f --- /dev/null +++ b/projects/foaf/mkfoaf.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# Origin: http://code.google.com/p/lindenb/source/browse/trunk/src/xsl/linkedin2foaf.xsl +# info: http://plindenbaum.blogspot.com/2010/02/linkedinxslt-foaf-people-from.html + +set -e + +exit1() { + echo "ERROR: $1" + exit 1 +} + +bindir=$(dirname "$0") +xsltdir="$bindir" + +turtle2foaf() { + inpath="$1" + outpath="$2" + [ -n "$outpath" ] || outpath=$(echo "$inpath" | perl -pe 's/\.ttl$/.rdf/ or exit 1') || exit1 "Failed resolving output RDF file from input Turtle file \"$inpath\"." + + [ -e "$inpath" ] || exit1 "Turtle file \"$inpath\" does not exist." +# [ ! -e "$outpath" ] || exit1 "RDF file \"$outpath\" already exists." + + base="$(perl -ne '/^\@base\s+<(http.+)>/ and print $1 and exit;' "$inpath")" || true + rapper ${base:+-I "$base"} -i turtle -o rdfxml-abbrev "$inpath" > "$outpath" + foafsign "$outpath" +} + +linkedin2foaf() { + inpath="$1" + outpath="$2" + [ -n "$outpath" ] || outfile=index.rdf && outpath="$(dirname "$inpath")/linkedin/$outfile" + outdir=$(dirname "$outpath") + tmppath="$outdir/index.html" + + [ -e "$inpath" ] || exit1 "Turtle file \"$inpath\" does not exist." + [ ! -e "$tmppath" ] || exit1 "Tempfile \"$tmppath\" already exists." + +# TODO: support homepage as fallback for accountName +# id=$(perl -0 -ne '/foaf:accountServiceHomepage\s+<http:\/\/www.linkedin.com\/>\s+;\s+foaf:(?:homepage\s+<(?=http)|accountName\s+")([^<"\s]+)/ and print $1 and exit;' "$inpath") #' + id=$(perl -0 -ne '/^<#me>.*?foaf:accountServiceHomepage\s+<http:\/\/www.linkedin.com\/>\s+;\s+foaf:accountName\s+"([^<"\s]+)/ms and print $1 and exit;' "$inpath") #' + [ -n "$id" ] || exit1 "Failed to resolve LinkedIn account name." + + mkdir -p "$outdir" +# work around unescaped &'s in linkedin pages +# xsltproc --html "$bindir/linkedin2foaf.xsl" "http://www.linkedin.com/in/$id" > "$outpath" + wget -q -O "$tmppath" "http://www.linkedin.com/in/$id" + perl -i -pe 's/&([a-zA-Z0-9]+=)/&$1/g' "$tmppath" + xsltproc --html "$xsltdir/linkedin2foaf.xsl" "$tmppath" > "$outpath" + rm -f "$tmppath" + foafsign "$outpath" +} + +tidyfacebookfoaf() { + inpath="$1" + outpath="$2" + + [ -e "$inpath" ] || exit1 "Facebook Exporter RDF file \"$inpath\" does not exist." + + perl "$bindir/fbfixup.pl" "$inpath" > "$outpath" + foafsign "$outpath" +} + +foafsign() { + inpath="$1" + outpath="$inpath.asc" + gpg -a -o- --detach-sign "$inpath" > "$outpath" +} + +paths="$@" +[ -n "$paths" ] || paths=index.ttl + +for path in $paths; do + basedir=$(dirname "$path") + turtle2foaf "$path" + linkedin2foaf "$path" || true + tidyfacebookfoaf "$basedir/facebook/foaf.xml" "$basedir/facebook/index.rdf" || true +done |