summaryrefslogtreecommitdiff
path: root/foaf/any2rdf.pl
blob: 7fa156ac6a4093cb5dd70f5127e469ca511046d2 (plain)
  1. #!/usr/bin/perl -w
  2. use RDF::Trine;
  3. use HTML::Data::Parser;
  4. use RDF::TrineShortcuts;
  5. use File::Slurp;
  6. my $markup = read_file( $ARGV[0] );
  7. my $base_uri = $ARGV[1];
  8. # hint: locate common abbreviations at http://prefix.cc/
  9. #
  10. my $NAMESPACES= {
  11. rdfs => 'http://www.w3.org/2000/01/rdf-schema#',
  12. dc => 'http://purl.org/dc/terms/',
  13. foaf => 'http://xmlns.com/foaf/0.1/',
  14. xhtml => 'http://www.w3.org/1999/xhtml/vocab#',
  15. vcard => 'http://www.w3.org/2006/vcard/ns#',
  16. vcardx => 'http://buzzword.org.uk/rdf/vcardx#',
  17. hcterms => 'http://purl.org/uF/hCard/terms/',
  18. ical => 'http://www.w3.org/2002/12/cal/icaltzd#',
  19. cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#',
  20. hresume => 'http://ontologi.es/hresume#',
  21. # TODO: somehow make RDF::Trine abbreviate labels
  22. # 'xsd:dateTime' => 'http://www.w3.org/2001/XMLSchema#dateTime',
  23. # 'geo:SpatialThing' => 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing',
  24. };
  25. my $parser = HTML::Data::Parser->new;
  26. my $model = RDF::Trine::Model->temporary_model;
  27. my $writer = RDF::Trine::Serializer->new('RDFXML');
  28. $parser->parse_into_model($base_uri, $markup, $model);
  29. my $output = rdf_string($model,'RDFXML',
  30. namespaces => { %$NAMESPACES },
  31. );
  32. # TODO: somehow make RDF::Trine generate stable IDs
  33. $output =~ s/(?<=rdf:nodeID=")B[a-f0-9]{32}(?=0[0-9]{3}")//g;
  34. print $output;