summaryrefslogtreecommitdiff
path: root/foaf/linkedin2rdf.pl
blob: 3e4eb24475a7b0cb5191e9f7e97c9c66323f3f26 (plain)
  1. #!/usr/bin/perl -w
  2. use HTML::Microformats;
  3. use RDF::TrineShortcuts;
  4. use File::Slurp;
  5. my $markup = read_file( $ARGV[0] );
  6. my $base_uri = $ARGV[1];
  7. # hint: locate common abbreviations at http://prefix.cc/
  8. #
  9. my $NAMESPACES= {
  10. rdfs => 'http://www.w3.org/2000/01/rdf-schema#',
  11. dc => 'http://purl.org/dc/terms/',
  12. foaf => 'http://xmlns.com/foaf/0.1/',
  13. xhtml => 'http://www.w3.org/1999/xhtml/vocab#',
  14. vcard => 'http://www.w3.org/2006/vcard/ns#',
  15. vcardx => 'http://buzzword.org.uk/rdf/vcardx#',
  16. hcterms => 'http://purl.org/uF/hCard/terms/',
  17. ical => 'http://www.w3.org/2002/12/cal/icaltzd#',
  18. cv => 'http://purl.org/captsolo/resume-rdf/0.2/cv#',
  19. hresume => 'http://ontologi.es/hresume#',
  20. # TODO: somehow make RDF::Trine abbreviate labels
  21. # 'xsd:dateTime' => 'http://www.w3.org/2001/XMLSchema#dateTime',
  22. # 'geo:SpatialThing' => 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing',
  23. };
  24. my $doc = HTML::Microformats
  25. ->new_document($markup, $base_uri)
  26. # ->assume_profile(qw(hResume hCard hCalendar))
  27. ->assume_all_profiles
  28. ;
  29. $doc->parse_microformats;
  30. #print $doc->serialise_model(as => 'RDFXML');
  31. my $model = $doc->model;
  32. my $output = rdf_string($model,'RDFXML',
  33. namespaces => { %$NAMESPACES },
  34. );
  35. # TODO: somehow make RDF::Trine generate stable IDs
  36. $output =~ s/(?<=rdf:nodeID=")B[a-f0-9]{32}(?=0[0-9]{3}")//g;
  37. print $output;