diff options
author | Jonas Smedegaard <dr@jones.dk> | 2011-07-14 20:12:18 +0200 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2011-07-14 20:12:18 +0200 |
commit | a437311bce168de6fad29f55543ef6574aa5fc6d (patch) | |
tree | f9108a3770f3e35af551ae603fc0adbbac6aeccf /foaf/any2rdf.pl | |
parent | 4de7c4cd72951b7c623debf8d363358768861432 (diff) |
Add linkedin2rdf.pl and (slower but slightly more greedy) any2rdf.pl, and use the latter in mkfoaf.sh.
Diffstat (limited to 'foaf/any2rdf.pl')
-rwxr-xr-x | foaf/any2rdf.pl | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/foaf/any2rdf.pl b/foaf/any2rdf.pl new file mode 100755 index 0000000..7fa156a --- /dev/null +++ b/foaf/any2rdf.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w + +use RDF::Trine; +use HTML::Data::Parser; +use RDF::TrineShortcuts; +use File::Slurp; + +my $markup = read_file( $ARGV[0] ); +my $base_uri = $ARGV[1]; + +# hint: locate common abbreviations at http://prefix.cc/ +# +my $NAMESPACES= { + rdfs => 'http://www.w3.org/2000/01/rdf-schema#', + dc => 'http://purl.org/dc/terms/', + foaf => 'http://xmlns.com/foaf/0.1/', + xhtml => 'http://www.w3.org/1999/xhtml/vocab#', + vcard => 'http://www.w3.org/2006/vcard/ns#', + vcardx => 'http://buzzword.org.uk/rdf/vcardx#', + hcterms => 'http://purl.org/uF/hCard/terms/', + ical => 'http://www.w3.org/2002/12/cal/icaltzd#', + cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#', + hresume => 'http://ontologi.es/hresume#', +# TODO: somehow make RDF::Trine abbreviate labels +# 'xsd:dateTime' => 'http://www.w3.org/2001/XMLSchema#dateTime', +# 'geo:SpatialThing' => 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing', +}; + +my $parser = HTML::Data::Parser->new; +my $model = RDF::Trine::Model->temporary_model; +my $writer = RDF::Trine::Serializer->new('RDFXML'); + +$parser->parse_into_model($base_uri, $markup, $model); +my $output = rdf_string($model,'RDFXML', + namespaces => { %$NAMESPACES }, +); + +# TODO: somehow make RDF::Trine generate stable IDs +$output =~ s/(?<=rdf:nodeID=")B[a-f0-9]{32}(?=0[0-9]{3}")//g; + +print $output; |