#!/usr/bin/perl -w use RDF::Trine; use HTML::Data::Parser; use RDF::TrineShortcuts; use File::Slurp; my $markup = read_file( $ARGV[0] ); my $base_uri = $ARGV[1]; # hint: locate common abbreviations at http://prefix.cc/ # my $NAMESPACES= { rdfs => 'http://www.w3.org/2000/01/rdf-schema#', dc => 'http://purl.org/dc/terms/', foaf => 'http://xmlns.com/foaf/0.1/', xhtml => 'http://www.w3.org/1999/xhtml/vocab#', vcard => 'http://www.w3.org/2006/vcard/ns#', vcardx => 'http://buzzword.org.uk/rdf/vcardx#', hcterms => 'http://purl.org/uF/hCard/terms/', ical => 'http://www.w3.org/2002/12/cal/icaltzd#', cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#', hresume => 'http://ontologi.es/hresume#', }; my $parser = HTML::Data::Parser->new; my $model = RDF::Trine::Model->temporary_model; my $writer = RDF::Trine::Serializer->new('RDFXML'); $parser->parse_into_model($base_uri, $markup, $model); my $output = rdf_string($model,'RDFXML', namespaces => { %$NAMESPACES }, ); # TODO: somehow make RDF::Trine generate stable IDs $output =~ s/(?<=rdf:nodeID="B)[a-f0-9]{32}(?=0[0-9]{3}")//g; print $output;