blob: 3e4eb24475a7b0cb5191e9f7e97c9c66323f3f26 (
plain)
- #!/usr/bin/perl -w
- use HTML::Microformats;
- use RDF::TrineShortcuts;
- use File::Slurp;
- my $markup = read_file( $ARGV[0] );
- my $base_uri = $ARGV[1];
- # hint: locate common abbreviations at http://prefix.cc/
- #
- my $NAMESPACES= {
- rdfs => 'http://www.w3.org/2000/01/rdf-schema#',
- dc => 'http://purl.org/dc/terms/',
- foaf => 'http://xmlns.com/foaf/0.1/',
- xhtml => 'http://www.w3.org/1999/xhtml/vocab#',
- vcard => 'http://www.w3.org/2006/vcard/ns#',
- vcardx => 'http://buzzword.org.uk/rdf/vcardx#',
- hcterms => 'http://purl.org/uF/hCard/terms/',
- ical => 'http://www.w3.org/2002/12/cal/icaltzd#',
- cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#',
- hresume => 'http://ontologi.es/hresume#',
- # TODO: somehow make RDF::Trine abbreviate labels
- # 'xsd:dateTime' => 'http://www.w3.org/2001/XMLSchema#dateTime',
- # 'geo:SpatialThing' => 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing',
- };
- my $doc = HTML::Microformats
- ->new_document($markup, $base_uri)
- # ->assume_profile(qw(hResume hCard hCalendar))
- ->assume_all_profiles
- ;
- $doc->parse_microformats;
- #print $doc->serialise_model(as => 'RDFXML');
- my $model = $doc->model;
- my $output = rdf_string($model,'RDFXML',
- namespaces => { %$NAMESPACES },
- );
- # TODO: somehow make RDF::Trine generate stable IDs
- $output =~ s/(?<=rdf:nodeID=")B[a-f0-9]{32}(?=0[0-9]{3}")//g;
- print $output;
|