diff options
Diffstat (limited to 'projects')
-rwxr-xr-x | projects/foaf/fbfixup.pl | 41 | ||||
-rw-r--r-- | projects/foaf/linkedin2foaf.xsl | 306 | ||||
-rwxr-xr-x | projects/foaf/mkfoaf.sh | 78 | ||||
-rwxr-xr-x | projects/foaf/pingfoaf.sh | 5 |
4 files changed, 430 insertions, 0 deletions
diff --git a/projects/foaf/fbfixup.pl b/projects/foaf/fbfixup.pl new file mode 100755 index 0000000..5cc5f60 --- /dev/null +++ b/projects/foaf/fbfixup.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl + +# Sanitize RDF from Facebook Exporter <http://apps.facebook.com/foaf_exporter/> + +undef $/; + +while (<>) { + + s{ + <foaf:locality>Mørkøv</foaf:locality>\n\s* + <foaf:region>Vestsjalland</foaf:region>\n\s* + <foaf:country>Denmark</foaf:country>\n\s* + + <os:aboutMe></os:aboutMe>\n\s* + <os:relationshipStatus>.*?</os:relationshipStatus>\n\s* + <os:books></os:books>\n\s* + <os:music></os:music>\n\s* + <os:quotes></os:quotes> + }{}gsx; + + s{ + </foaf:Person>\n\s* + <foaf:Person> + }{</foaf:Person> + </foaf:knows> + <foaf:knows> + <foaf:Person>}gsx; + + s{ + <foaf:holdsAccount>\n\s* + <foaf:OnlineAccount>\n\s* + <foaf:accountServiceHomepage\srdf:resource="http://www.facebook.com/"/>\n\s* + <foaf:accountProfilePage\srdf:resource="(http://www.facebook.com/.*?)"/>\n\s* + <foaf:accountName>.*?</foaf:accountName>\n\s* + </foaf:OnlineAccount>\n\s* + </foaf:holdsAccount> + }{<foaf:homepage rdf:resource="$1"/>}gsx; + + s,(( *)<foaf:homepage rdf:resource="http://www.facebook.com/([a-z.]+)"/>),$1\n$2<foaf:jabberID>$3\@chat.facebook.com</foaf:jabberID>,gs; + print; +} diff --git a/projects/foaf/linkedin2foaf.xsl b/projects/foaf/linkedin2foaf.xsl new file mode 100644 index 0000000..2d24452 --- /dev/null +++ b/projects/foaf/linkedin2foaf.xsl @@ -0,0 +1,306 @@ +<?xml version='1.0' ?> +<xsl:stylesheet + xmlns:xsl='http://www.w3.org/1999/XSL/Transform' + xmlns:foaf="http://xmlns.com/foaf/0.1/" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:vcard="http://www.w3.org/2001/vcard-rdf/3.0#" + xmlns:doac="http://ramonantonio.net/doac/0.1/" + version='1.0' + > +<xsl:output method="xml" indent="yes" encoding="UTF-8" /> + +<!-- + +Author: + Pierre Lindenbaum PhD + plindenbaum@yahoo.fr + http://plindenbaum.blogspot.com + +Motivation: + transform a linkedin profile to a FOAF profile + Warning it just works with the current linkedin html (Last updated: 2010-02-08) + +Param: + 'geoloc'=false: don't use geonames.org to find position + 'ppd'=false: don't print foaf:personalProfileDocument + +Usage: + warning USE a 'www' URL + http://www.linkedin.com... + + rather than a local one + http://it.linkedin.com.... + http://fr.linkedin.com.... + http://en.linkedin.com.... + + xsltproc \-\-html linkedin2foaf.xsl http://www.linkedin.com/in/lindenbaum + xsltproc \-\-html linkedin2foaf.xsl http://www.linkedin.com/in/dsingh +--> +<!-- use geonames --> +<xsl:param name="geoloc">yes</xsl:param> +<!-- print foaf:personalProfileDocument --> +<xsl:param name="ppd">yes</xsl:param> + +<xsl:template match="/"> +<rdf:RDF> +<xsl:apply-templates select="html"/> +</rdf:RDF> +</xsl:template> + +<xsl:template match="html"> + +<xsl:apply-templates select="body"/> + +</xsl:template> + +<xsl:template match="body"> +<xsl:variable name="action"> +<xsl:value-of select="//a[@class='action' and @rel='nofollow'][1]/@href"/> +</xsl:variable> +<xsl:variable name="lkid" select="concat('http://www.linkedin.com/ppl/webprofile?id=',substring-before(substring-after($action,'id='),'&'))"/> + +<xsl:if test="$ppd='yes'"> +<xsl:element name="foaf:PersonalProfileDocument"> + <xsl:attribute name="rdf:about"><xsl:text></xsl:text></xsl:attribute> + <xsl:element name="foaf:maker"> + <xsl:attribute name="rdf:resource">http://code.google.com/p/lindenb/source/browse/trunk/src/xsl/linkedin2foaf.xsl</xsl:attribute> + </xsl:element> + <xsl:element name="foaf:primaryTopic"> + <xsl:attribute name="rdf:resource"><xsl:value-of select="$lkid"/></xsl:attribute> + </xsl:element> +</xsl:element> +</xsl:if> + +<xsl:element name="foaf:Person"> +<xsl:attribute name="rdf:about"><xsl:value-of select="$lkid"/></xsl:attribute> + <xsl:apply-templates/> + + <foaf:holdsAccount> + <xsl:element name="foaf:OnlineAccount"> + <xsl:attribute name="rdf:about"> + <xsl:value-of select="$lkid"/> + </xsl:attribute> + <foaf:accountServiceHomepage rdf:resource="http://www.linkedin.com"/> + </xsl:element> + </foaf:holdsAccount> + +</xsl:element> +</xsl:template> + +<xsl:template match="h1[@id='name']"> +<foaf:name><xsl:value-of select="."/></foaf:name> +<xsl:apply-templates/> +</xsl:template> + +<xsl:template match="span[@class='given-name']"> +<foaf:givenname><xsl:value-of select="."/></foaf:givenname> +</xsl:template> + +<xsl:template match="span[@class='family-name']"> +<foaf:family_name><xsl:value-of select="."/></foaf:family_name> +</xsl:template> + + +<xsl:template match="p[@class='headline title summary']|p[@class='headline title']"> +<doac:summary> +<xsl:value-of select="normalize-space(.)"/> +</doac:summary> +</xsl:template> + +<xsl:template match="ul[@class='websites']"> +<xsl:apply-templates select="li/a[@href]"/> +</xsl:template> + +<xsl:template match="div[@id='experience']"> +<xsl:for-each select="ul/li"> +<doac:experience> +<doac:Experience> +<doac:title><xsl:value-of select="normalize-space(h3)"/></doac:title> +<doac:location><xsl:value-of select="normalize-space(h4[@class='org summary'])"/></doac:location> + +<xsl:if test="p[@class='period']/abbr[@class='dtstart']/@title"> + <doac:date-starts> + <xsl:value-of select="normalize-space(p[@class='period']/abbr[@class='dtstart']/@title)"/> + </doac:date-starts> +</xsl:if> + +<xsl:if test="p[@class='period']/abbr[@class='dtend']/@title"> + <doac:date-ends> + <xsl:value-of select="normalize-space(p[@class='period']/abbr[@class='dtend']/@title)"/> + </doac:date-ends> +</xsl:if> + +<xsl:if test="p[@class='description']"> + <doac:activity> + <xsl:value-of select="normalize-space(p[@class='description'])"/> + </doac:activity> +</xsl:if> +</doac:Experience> +</doac:experience> +</xsl:for-each> +</xsl:template> + + +<xsl:template match="a[@href][@class='url']"> +<xsl:choose> +<xsl:when test="starts-with(@href,'http://twitter.com/')"> + <foaf:holdsAccount> + <xsl:element name="foaf:OnlineAccount"> + <xsl:attribute name="rdf:about"> + <xsl:value-of select="@href"/> + </xsl:attribute> + <foaf:accountName> + <xsl:value-of select="substring-after(@href,'.com/')"/> + </foaf:accountName> + <foaf:accountServiceHomepage rdf:resource="http://twitter.com"/> + </xsl:element> + </foaf:holdsAccount> +</xsl:when> +<xsl:when test="starts-with(@href,'http://friendfeed.com/')"> + <foaf:holdsAccount> + <xsl:element name="foaf:OnlineAccount"> + <xsl:attribute name="rdf:about"> + <xsl:value-of select="@href"/> + </xsl:attribute> + <foaf:accountName> + <xsl:value-of select="substring-after(@href,'.com/')"/> + </foaf:accountName> + <foaf:accountServiceHomepage rdf:resource="http://friendfeed.com"/> + </xsl:element> + </foaf:holdsAccount> +</xsl:when> +<xsl:otherwise> + <xsl:element name="foaf:homepage"> + <xsl:attribute name="rdf:resource"> + <xsl:value-of select="@href"/> + </xsl:attribute> + </xsl:element> +</xsl:otherwise> +</xsl:choose> +</xsl:template> + +<xsl:template match="div[@class='image']"> + <xsl:apply-templates select="img[@class='photo']"/> +</xsl:template> + +<xsl:template match="img[@class='photo']"> +<foaf:depiction> + <xsl:element name="foaf:Image"> + <xsl:attribute name="rdf:about"> + <xsl:value-of select="@src"/> + </xsl:attribute> + <dc:title><xsl:value-of select="@alt"/></dc:title> + </xsl:element> +</foaf:depiction> +</xsl:template> + +<xsl:template match="p[@class='skills']"> +<xsl:call-template name="skills"> +<xsl:with-param name="s" select="normalize-space(translate(.,',',' '))"/> +</xsl:call-template> +</xsl:template> + +<xsl:template name="skills"> +<xsl:param name="s"/> +<xsl:choose> +<xsl:when test="contains($s,' ')"> + <xsl:call-template name="skills"> + <xsl:with-param name="s" select="normalize-space(substring-after($s,' '))"/> + </xsl:call-template> + <doac:skill><xsl:value-of select="substring-before($s,' ')"/></doac:skill> +</xsl:when> +<xsl:otherwise> + <doac:skill><xsl:value-of select="$s"/></doac:skill> +</xsl:otherwise> +</xsl:choose> +</xsl:template> + + +<xsl:template match="div[@id='education']"> +<xsl:for-each select="ul[@class='vcalendar']/li"> +<doac:education> + <doac:Education> + <foaf:organization><xsl:value-of select="normalize-space(h3)"/></foaf:organization> + <doac:title><xsl:value-of select="normalize-space(div/p/span[@class='degree'])"/></doac:title> + + <xsl:if test="div/p/abbr[@class='dtstart']/@title"> + <doac:date-starts> + <xsl:value-of select="normalize-space(div/p/abbr[@class='dtstart']/@title)"/> + </doac:date-starts> + </xsl:if> + + <xsl:if test="div/p/abbr[@class='dtend']/@title"> + <doac:date-ends> + <xsl:value-of select="normalize-space(div/p/abbr[@class='dtend']/@title)"/> + </doac:date-ends> + </xsl:if> + + <xsl:if test="div/p[@class='notes']"> + <doac:subject> + <xsl:value-of select="normalize-space(div/p[@class='notes'])"/> + </doac:subject> + </xsl:if> + </doac:Education> +</doac:education> +</xsl:for-each> +</xsl:template> + + +<xsl:template match="p[@class='locality']"> +<xsl:if test="$geoloc='yes'"> +<xsl:variable name="s1" select="normalize-space(.)"/> +<xsl:variable name="s2"> + <xsl:choose> + <xsl:when test="contains($s1,' Area,')"> + <xsl:value-of select="concat(substring-before($s1,' Area,'),substring-after($s1,' Area,'))"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$s1"/> + </xsl:otherwise> + </xsl:choose> +</xsl:variable> + +<xsl:variable name="url" select="concat('http://ws.geonames.org/search?q=',translate($s2,' ','+'),'&maxRows=1')"/> +<xsl:message terminate="no">Downloading <xsl:value-of select="$url"/> ...</xsl:message> + <xsl:apply-templates select="document($url,/geonames)" mode="geo"/> +<xsl:message terminate="no">Done (<xsl:value-of select="$url"/>).</xsl:message> +</xsl:if> +</xsl:template> + +<xsl:template match="script|head|meta|link"> +</xsl:template> + +<xsl:template match="div|span"> +<xsl:apply-templates/> +</xsl:template> + +<xsl:template match="*|text()"> +<xsl:apply-templates/> +</xsl:template> + +<xsl:template match="/" mode="geo"> +<xsl:apply-templates select="geonames" mode="geo"/> +</xsl:template> + +<xsl:template match="geonames" mode="geo"> +<xsl:apply-templates select="geoname[1]" mode="geo"/> +</xsl:template> + +<xsl:template match="geoname" mode="geo"> +<foaf:based_near> + <geo:Point> + <dc:title> + <xsl:value-of select="name"/> + <xsl:text>, </xsl:text> + <xsl:value-of select="countryCode"/> + </dc:title> + <geo:long><xsl:value-of select="lng"/></geo:long> + <geo:lat><xsl:value-of select="lat"/></geo:lat> + </geo:Point> +</foaf:based_near> +</xsl:template> + + +</xsl:stylesheet> diff --git a/projects/foaf/mkfoaf.sh b/projects/foaf/mkfoaf.sh new file mode 100755 index 0000000..dbb842f --- /dev/null +++ b/projects/foaf/mkfoaf.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# Origin: http://code.google.com/p/lindenb/source/browse/trunk/src/xsl/linkedin2foaf.xsl +# info: http://plindenbaum.blogspot.com/2010/02/linkedinxslt-foaf-people-from.html + +set -e + +exit1() { + echo "ERROR: $1" + exit 1 +} + +bindir=$(dirname "$0") +xsltdir="$bindir" + +turtle2foaf() { + inpath="$1" + outpath="$2" + [ -n "$outpath" ] || outpath=$(echo "$inpath" | perl -pe 's/\.ttl$/.rdf/ or exit 1') || exit1 "Failed resolving output RDF file from input Turtle file \"$inpath\"." + + [ -e "$inpath" ] || exit1 "Turtle file \"$inpath\" does not exist." +# [ ! -e "$outpath" ] || exit1 "RDF file \"$outpath\" already exists." + + base="$(perl -ne '/^\@base\s+<(http.+)>/ and print $1 and exit;' "$inpath")" || true + rapper ${base:+-I "$base"} -i turtle -o rdfxml-abbrev "$inpath" > "$outpath" + foafsign "$outpath" +} + +linkedin2foaf() { + inpath="$1" + outpath="$2" + [ -n "$outpath" ] || outfile=index.rdf && outpath="$(dirname "$inpath")/linkedin/$outfile" + outdir=$(dirname "$outpath") + tmppath="$outdir/index.html" + + [ -e "$inpath" ] || exit1 "Turtle file \"$inpath\" does not exist." + [ ! -e "$tmppath" ] || exit1 "Tempfile \"$tmppath\" already exists." + +# TODO: support homepage as fallback for accountName +# id=$(perl -0 -ne '/foaf:accountServiceHomepage\s+<http:\/\/www.linkedin.com\/>\s+;\s+foaf:(?:homepage\s+<(?=http)|accountName\s+")([^<"\s]+)/ and print $1 and exit;' "$inpath") #' + id=$(perl -0 -ne '/^<#me>.*?foaf:accountServiceHomepage\s+<http:\/\/www.linkedin.com\/>\s+;\s+foaf:accountName\s+"([^<"\s]+)/ms and print $1 and exit;' "$inpath") #' + [ -n "$id" ] || exit1 "Failed to resolve LinkedIn account name." + + mkdir -p "$outdir" +# work around unescaped &'s in linkedin pages +# xsltproc --html "$bindir/linkedin2foaf.xsl" "http://www.linkedin.com/in/$id" > "$outpath" + wget -q -O "$tmppath" "http://www.linkedin.com/in/$id" + perl -i -pe 's/&([a-zA-Z0-9]+=)/&$1/g' "$tmppath" + xsltproc --html "$xsltdir/linkedin2foaf.xsl" "$tmppath" > "$outpath" + rm -f "$tmppath" + foafsign "$outpath" +} + +tidyfacebookfoaf() { + inpath="$1" + outpath="$2" + + [ -e "$inpath" ] || exit1 "Facebook Exporter RDF file \"$inpath\" does not exist." + + perl "$bindir/fbfixup.pl" "$inpath" > "$outpath" + foafsign "$outpath" +} + +foafsign() { + inpath="$1" + outpath="$inpath.asc" + gpg -a -o- --detach-sign "$inpath" > "$outpath" +} + +paths="$@" +[ -n "$paths" ] || paths=index.ttl + +for path in $paths; do + basedir=$(dirname "$path") + turtle2foaf "$path" + linkedin2foaf "$path" || true + tidyfacebookfoaf "$basedir/facebook/foaf.xml" "$basedir/facebook/index.rdf" || true +done diff --git a/projects/foaf/pingfoaf.sh b/projects/foaf/pingfoaf.sh new file mode 100755 index 0000000..7f3e795 --- /dev/null +++ b/projects/foaf/pingfoaf.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +set -e + +curl -H "Accept: text/plain" --data-binary 'http://dr.jones.dk/me http://dr.jones.dk/me/facebook http://dr.jones.dk/me/linkedin' http://sindice.com/api/v2/ping |