From a437311bce168de6fad29f55543ef6574aa5fc6d Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Thu, 14 Jul 2011 20:12:18 +0200 Subject: Add linkedin2rdf.pl and (slower but slightly more greedy) any2rdf.pl, and use the latter in mkfoaf.sh. --- foaf/linkedin2rdf.pl | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 foaf/linkedin2rdf.pl (limited to 'foaf/linkedin2rdf.pl') diff --git a/foaf/linkedin2rdf.pl b/foaf/linkedin2rdf.pl new file mode 100755 index 0000000..3e4eb24 --- /dev/null +++ b/foaf/linkedin2rdf.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w + +use HTML::Microformats; +use RDF::TrineShortcuts; +use File::Slurp; + +my $markup = read_file( $ARGV[0] ); +my $base_uri = $ARGV[1]; + +# hint: locate common abbreviations at http://prefix.cc/ +# +my $NAMESPACES= { + rdfs => 'http://www.w3.org/2000/01/rdf-schema#', + dc => 'http://purl.org/dc/terms/', + foaf => 'http://xmlns.com/foaf/0.1/', + xhtml => 'http://www.w3.org/1999/xhtml/vocab#', + vcard => 'http://www.w3.org/2006/vcard/ns#', + vcardx => 'http://buzzword.org.uk/rdf/vcardx#', + hcterms => 'http://purl.org/uF/hCard/terms/', + ical => 'http://www.w3.org/2002/12/cal/icaltzd#', + cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#', + hresume => 'http://ontologi.es/hresume#', +# TODO: somehow make RDF::Trine abbreviate labels +# 'xsd:dateTime' => 'http://www.w3.org/2001/XMLSchema#dateTime', +# 'geo:SpatialThing' => 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing', +}; + +my $doc = HTML::Microformats + ->new_document($markup, $base_uri) +# ->assume_profile(qw(hResume hCard hCalendar)) + ->assume_all_profiles +; +$doc->parse_microformats; +#print $doc->serialise_model(as => 'RDFXML'); +my $model = $doc->model; +my $output = rdf_string($model,'RDFXML', + namespaces => { %$NAMESPACES }, +); + +# TODO: somehow make RDF::Trine generate stable IDs +$output =~ s/(?<=rdf:nodeID=")B[a-f0-9]{32}(?=0[0-9]{3}")//g; + +print $output; -- cgit v1.2.3