summaryrefslogtreecommitdiff
path: root/foaf/linkedin2rdf.pl
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2011-07-14 20:12:18 +0200
committerJonas Smedegaard <dr@jones.dk>2011-07-14 20:12:18 +0200
commita437311bce168de6fad29f55543ef6574aa5fc6d (patch)
treef9108a3770f3e35af551ae603fc0adbbac6aeccf /foaf/linkedin2rdf.pl
parent4de7c4cd72951b7c623debf8d363358768861432 (diff)
Add linkedin2rdf.pl and (slower but slightly more greedy) any2rdf.pl, and use the latter in mkfoaf.sh.
Diffstat (limited to 'foaf/linkedin2rdf.pl')
-rwxr-xr-xfoaf/linkedin2rdf.pl43
1 files changed, 43 insertions, 0 deletions
diff --git a/foaf/linkedin2rdf.pl b/foaf/linkedin2rdf.pl
new file mode 100755
index 0000000..3e4eb24
--- /dev/null
+++ b/foaf/linkedin2rdf.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl -w
+
+use HTML::Microformats;
+use RDF::TrineShortcuts;
+use File::Slurp;
+
+my $markup = read_file( $ARGV[0] );
+my $base_uri = $ARGV[1];
+
+# hint: locate common abbreviations at http://prefix.cc/
+#
+my $NAMESPACES= {
+ rdfs => 'http://www.w3.org/2000/01/rdf-schema#',
+ dc => 'http://purl.org/dc/terms/',
+ foaf => 'http://xmlns.com/foaf/0.1/',
+ xhtml => 'http://www.w3.org/1999/xhtml/vocab#',
+ vcard => 'http://www.w3.org/2006/vcard/ns#',
+ vcardx => 'http://buzzword.org.uk/rdf/vcardx#',
+ hcterms => 'http://purl.org/uF/hCard/terms/',
+ ical => 'http://www.w3.org/2002/12/cal/icaltzd#',
+ cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#',
+ hresume => 'http://ontologi.es/hresume#',
+# TODO: somehow make RDF::Trine abbreviate labels
+# 'xsd:dateTime' => 'http://www.w3.org/2001/XMLSchema#dateTime',
+# 'geo:SpatialThing' => 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing',
+};
+
+my $doc = HTML::Microformats
+ ->new_document($markup, $base_uri)
+# ->assume_profile(qw(hResume hCard hCalendar))
+ ->assume_all_profiles
+;
+$doc->parse_microformats;
+#print $doc->serialise_model(as => 'RDFXML');
+my $model = $doc->model;
+my $output = rdf_string($model,'RDFXML',
+ namespaces => { %$NAMESPACES },
+);
+
+# TODO: somehow make RDF::Trine generate stable IDs
+$output =~ s/(?<=rdf:nodeID=")B[a-f0-9]{32}(?=0[0-9]{3}")//g;
+
+print $output;