---+Virtuoso Programmer's Guide - RDF Middleware ("Sponger") (Part 3) * [[VirtSpongerCartridgeProgrammersGuide][Part 1]] * [[VirtuosoSpongerCartridgeProgrammersGuide_Part2][Part 2]] ---+++Contents (Part 3) * Sponger Queue API * Functions * REST Web service * Useful Virtuoso Functions * String Functions * sprintf_inverse * split_and_decode * Retrieving URLs * http_get * http_request_header * Handling Non-XML Response Content * json_parse * Writing Arbitrarily Long Text * http * string_output * string_output_string * XML & XSLT * xtree_doc * xpath_eval * DB.DBA.RDF_MAPPER_XSLT * Character Set Conversion * serialize_to_UTF8_xml * Loading Data Into the Quad Store * DB.DBA.RDF_LOAD_RDFXML * DB.DBA.TTLP * Debug Output * dbg_obj_print * References * Appendix A: PingTheSemanticWeb RDF Notification Service * Appendix B: Main Namespaces used by OpenLink Cartridges * Appendix C: Freebase Cartridge & Stylesheet ---++Sponger Queue API ---+++Functions * DB.DBA.RDF_SPONGER_QUEUE_ADD: This function is available when the cartridges vad is installed. DB.DBA.RDF_SPONGER_QUEUE_ADD (url, options); * url: the Network Resource URI to be fetched; * options: an array usually typical sponger pragmas, for ex: vector ('get:soft', 'soft', 'refresh_free_text', 1); ---+++REST Web service The Sponger REST Web service has the following characteristics: * endpoint: http://cname/about/service * parameters: 1 op=add: type of operation, for now addition to the queue is supported 1 uris=[json array]: an array of URIs to be added to the sponger queue, the format is JSON array, for example: { "uris":["http://www.amazon.co.uk/Hama-Stylus-Input-Apple-iPad/dp/B003O0OM0C", "http://www.amazon.co.uk/Krusell-GAIA-Case-Apple-iPad/dp/B003QHXWWC" ] } The service will return a json encoded result of the number of items added, for example: { "result":2 } In case of error a JSON with error text will be returned and http status 500. ---++++cURL example 1 Assume file.txt which contains URL encoded JSON string: uris=%7B%20%22uris%22%3A%5B%22http%3A%2F%2Fwww.amazon.co.uk%2FHama-Stylus-Input-Apple-iPad%2Fdp%2FB003O0OM0C%22%2C%20%22http%3A%2F%2Fwww.amazon.co.uk%2FKrusell-GAIA-Case-Apple-iPad%2Fdp%2FB003QHXWWC%22%20%5D%20%7D 1 Execute the following command: curl -i -d@file.txt http://cname/about/service?op=add HTTP/1.1 200 OK Server: Virtuoso/06.02.3129 (Darwin) i686-apple-darwin10.0.0 VDB Connection: Keep-Alive Date: Thu, 05 May 2011 12:06:24 GMT Accept-Ranges: bytes Content-Type: applcation/json; charset="UTF-8" Content-Length: 14 { "result":2 } ---++Useful Virtuoso Functions ---+++String Functions ---++++sprintf_inverse sprintf_inverse takes a string to parse and a format string. If the first argument matches the format string then it returns vector of the values matching the placeholders in the format string. sprintf_inverse comes in useful for extracting fields from URLs. Full description: [[http://docs.openlinksw.com/virtuoso/fn_sprintf_inverse.html][Virtuoso Functions Guide entry]] ---+++++Example tmp := sprintf_inverse (new_origin_uri, 'http://farm%s.static.flickr.com/%s/%s_%s.%s', 0); img_id := tmp[2]; ---++++split_and_decode Converts escaped key=value pairs in an input string into a vector of strings. Full description: [[http://docs.openlinksw.com/virtuoso/fn_split_and_decode.html][Virtuoso Functions Guide entry]] ---+++++Example To split the HTTP request and response headers passed to the cartridge hook function. request_hdr := headers[0]; response_hdr := headers[1]; host := http_request_header (request, 'Host'); tmp := split_and_decode (request_hdr[0], 0, '\0\0 '); http_method := tmp[0]; url := tmp[1]; protocol_version := substring (tmp[2], 6, 8); tmp := rtrim (response_hdr[0], '\r\n'); tmp := split_and_decode (response_hdr[0], 0, '\0\0 '); ---+++Retrieving URLs ---++++http_get Returns a string containing the body of the requested URL and optionally the request headers. Full description: [[http://docs.openlinksw.com/virtuoso/fn_http_get.html][Virtuoso Functions Guide entry]] ---+++++Example url := sprintf('http://api.flickr.com/services/rest/?? method=flickr.photos.getInfo&photo_id=%s&api_key=%s', img_id, api_key); tmp := http_get (url, hdr); if (hdr[0] not like 'HTTP/1._ 200 %') signal ('22023', trim(hdr[0], '\r\n'), 'RDFXX'); xd := xtree_doc (tmp); ---++++DB.DBA.RDF_HTTP_URL_GET A wrapper around http_get. Retrieves a URL using the specified HTTP method (defaults to GET). The function can handle proxies, redirects (up to fifteen) and HTTPS. ---+++++Example uri := sprintf ('http://musicbrainz.org/ws/1/%s/%s?type=xml&inc=%U', kind, id, inc); cnt := RDF_HTTP_URL_GET (uri, '', hdr, 'GET', 'Accept: */*'); xt := xtree_doc (cnt); xd := DB.DBA.RDF_MAPPER_XSLT (registry_get ('_cartridges_path_') || 'xslt/main/mbz2rdf.xsl', xt, vector ('baseUri', new_origin_uri)); ---++++http_request_header Returns an array containing the HTTP request header lines. Full description: [[http://docs.openlinksw.com/virtuoso/fn_http_request_header.html][Virtuoso Functions Guide entry]] ---+++++Example content := RDF_HTTP_URL_GET (rdf_url, new_origin_uri, hdr, 'GET', 'Accept: application/rdf+xml, text/rdf+n3, */*'); ret_content_type := http_request_header (hdr, 'Content-Type', null, null); ---+++Handling Non-XML Response Content ---++++json_parse Parses JSON content into a tree. ---+++++Example url := sprintf ('http://www.freebase.com/api/service/mqlread?queries=%U', qr); content := http_get (url, hdr); tree := json_parse (content); tree := get_keyword ('ROOT', tree); tree := get_keyword ('result', tree); ---+++Writing Arbitrarily Long Text ---++++http Writes to an http client or a string output stream. Full description: [[http://docs.openlinksw.com/virtuoso/fn_http.html][Virtuoso Functions Guide entry]] ---+++++Example Writing N3 to a string output stream using function http(), parsing the N3 into a graph, then loading the graph into the quad store. ses := string_output (); http ('@prefix opl: .\n', ses); http ('@prefix rdfs: .\n', ses); ... DB.DBA.TTLP (ses, base, graph); DB.DBA.RDF_LOAD_RDFXML (strg, base, graph); ---++++string_output Makes a string output stream, a special object that may be used to buffer arbitrarily long streams of data. The HTTP output functions optionally take a string output stream handle as a parameter and then output to the string stream instead of the HTTP client. Full description: [[http://docs.openlinksw.com/virtuoso/fn_string_output.html][Virtuoso Functions Guide entry]] ---+++++Example ses := string_output (); cnt := http_get (sprintf ('http://download.finance.yahoo.com/d/quotes.csv?s=%U&f=nsbavophg&e=.csv', symbol)); arr := rdfm_yq_parse_csv (cnt); http ('', ses); foreach (any q in arr) do { http_value (q[0], 'company', ses); http_value (q[1], 'symbol', ses); ... } http ('', ses); content := string_output_string (ses); xt := xtree_doc (content); ---++++string_output_string Produces a string out of a string output stream. Full description: [[http://docs.openlinksw.com/virtuoso/fn_string_output_string.html][Virtuoso Functions Guide entry]] ---+++++Example See string_output above. ---+++XML & XSLT ---++++xtree_doc Parses the input string, which is expected to be a well formed XML fragment and returns a parse tree as a special memory-resident object. Full description: [[http://docs.openlinksw.com/virtuoso/fn_xtree_doc.html][Virtuoso Functions Guide entry]] ---+++++Example content := RDF_HTTP_URL_GET (uri, '', hdr, 'GET', 'Accept: */*'); xt := xtree_doc (content); ---++++xpath_eval Applies an XPATH expression to a context node and returns the result(s). Full description: [[http://docs.openlinksw.com/virtuoso/fn_xpath_eval.html][Virtuoso Functions Guide entry]] ---+++++Example profile := cast (xpath_eval ('/html/head/@profile', xt) as varchar); ---++++DB.DBA.RDF_MAPPER_XSLT A simple wrapper around the Virtuoso function xslt which sets the current user to dba before returning an XML document transformed by an XSLT stylesheet. Full description: [[http://docs.openlinksw.com/virtuoso/fn_xslt.html][Virtuoso Functions Guide entry]] ---+++++Example tmp := http_get (url); xd := xtree_doc (tmp); xt := DB.DBA.RDF_MAPPER_XSLT ( registry_get ('_cartridges_path_') || 'xslt/main/atom2rdf.xsl', xd, vector ('baseUri', coalesce (dest, graph_iri))); ---+++Character Set Conversion ---++++serialize_to_UTF8_xml Converts a value of arbitrary type to a UTF-8 string representation. Full description: [[http://docs.openlinksw.com/virtuoso/fn_serialize_to_UTF8_xml.html][Virtuoso Functions Guide entry]] ---+++++Example xt := DB.DBA.RDF_MAPPER_XSLT ( registry_get ('_cartridges_path_') || 'xslt/main/crunchbase2rdf.xsl', xt, vector ('baseUri', coalesce (dest, graph_iri), 'base', base, 'suffix', suffix)); xd := serialize_to_UTF8_xml (xt); DB.DBA.RM_RDF_LOAD_RDFXML (xd, new_origin_uri, coalesce (dest, graph_iri)); ---+++Loading Data Into the Quad Store ---++++DB.DBA.RDF_LOAD_RDFXML Parses the content of RDF/XML text into a sequence of separate triples and loads the triples into the specified graph in the Virtuoso Quad Store. Full description: [[http://docs.openlinksw.com/virtuoso/fn_rdf_load_rdfxml.html][Virtuoso Functions Guide entry]] ---+++++Example content := RDF_HTTP_URL_GET (uri, '', hdr, 'GET', 'Accept: */*'); xt := xtree_doc (content); xd := DB.DBA.RDF_MAPPER_XSLT ( registry_get ('_cartridges_path_') || 'xslt/main/mbz2rdf.xsl', xt, vector ('baseUri', new_origin_uri)); xd := serialize_to_UTF8_xml (xd); DB.DBA.RM_RDF_LOAD_RDFXML (xd, new_origin_uri, coalesce (dest, graph_iri)); ---++++DB.DBA.TTLP Parses the content of TTL (TURTLE or N3) text and loads the triples into the specified graph in the Virtuoso Quad Store. Full description: [[http://docs.openlinksw.com/virtuoso/fn_ttlp.html][Virtuoso Functions Guide entry]] ---+++++Example sess := string_output (); ... http (sprintf (' .\n', symbol, x), sess); http (sprintf (' .\n', x, x), sess); content := string_output_string (sess); DB.DBA.TTLP (content, new_origin_uri, coalesce (dest, graph_iri)); ---+++Debug Output ---++++dbg_obj_print Prints to the Virtuoso system console. Full description: [[http://demo.openlinksw.com/doc/html/fn_dbg_obj_print.html][Virtuoso Functions Guide entry]] ---+++++Example dbg_obj_print ('try all grddl mappings here'); ---+References * RDF Primer: http://www.w3.org/TR/2004/REC-rdf-primer-20040210/ * RDF/XML Syntax Specification: http://www.w3.org/TR/rdf-syntax-grammar/ * GRDDL Primer: http://www.w3.org/TR/grddl-primer/ ---+Appendix A: PingTheSemanticWeb RDF Notification Service [[http://www.pingthesemanticweb.com/][PingtheSemanticWeb]] (PTSW) is a repository for RDF documents. The PTSW web service archives the location of recently created or updated RDF documents on the Web. It is intended for use by crawlers or other types of software agents which need to know when and where the latest updated RDF documents can be found. They can request a list of recently updated documents as a starting location to crawl the Semantic Web. You may find this service useful for publicizing your own RDF content. Content authors can notify PTSW that an RDF document has been created or updated by pinging the service with the URL of the document. The Sponger supports this facility through the async_queue and ping_service parameters of the cartridge hook function, where the ping_service parameter contains the ping service URL as configured in the SPARQL section of the virtuoso.ini file: [SPARQL] ... PingService = http://rpc.pingthesemanticweb.com/ ... The configured ping service can be called using an asynchronous request and the RDF_SW_PING procedure as illustrated below. create procedure DB.DBA.RDF_LOAD_HTML_RESPONSE ( in graph_iri varchar, in new_origin_uri varchar, in dest varchar, inout ret_body any, inout async_queue any, inout ping_service any, inout _key any, inout opts any ) { ... if ( ... and async_queue is not null) aq_request (async_queue, 'DB.DBA.RDF_SW_PING', vector (ping_service, new_origin_uri)); For more details, please refer to the section [[http://docs.openlinksw.com/virtuoso/ASYNCEXECMULTITHREAD.html][Asynchronous Execution and Multithreading in Virtuoso/PL]] in the Virtuoso reference documentation. ---+Appendix B: Main Namespaces used by OpenLink Cartridges A list of the main namespaces / ontologies used by OpenLink-provided Sponger cartridges is given below. Some of these ontologies may prove useful when creating your own cartridges. * - http://www.openlinksw.com/virtuoso/xslt/ * - http://www.openlinksw.com/schemas/XHTML# * rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# * rdfs: http://www.w3.org/2000/01/rdf-schema# * dc: http://purl.org/dc/elements/1.1/ * dcterms: http://purl.org/dc/terms/ * foaf: http://xmlns.com/foaf/0.1/ * sioc: http://rdfs.org/sioc/ns# * sioct: http://rdfs.org/sioc/types# * skos: http://www.w3.org/2004/02/skos/core# * bibo: http://purl.org/ontology/bibo/ ---+Appendix C: Freebase Cartridge & Stylesheet Snapshots of the Freebase cartridge and stylesheet compatible with the meta-cartridge example presented earlier in this document can be found below. ---+++DB.DBA.RDF_LOAD_MQL: --no_c_escapes- create procedure DB.DBA.RDF_LOAD_MQL (in graph_iri varchar, in new_origin_uri varchar, in dest varchar, inout _ret_body any, inout aq any, inout ps any, inout _key any, inout opts any) { declare qr, path, hdr any; declare tree, xt, xd, types any; declare k, cnt, url, sa varchar; hdr := null; sa := ''; declare exit handler for sqlstate '*' { --dbg_printf ('%s', __SQL_MESSAGE); return 0; }; path := split_and_decode (new_origin_uri, 0, '%\0/'); if (length (path) < 1) return 0; k := path [length(path) - 1]; if (path [length(path) - 2] = 'guid') k := sprintf ('"id":"/guid/%s"', k); else { if (k like '#%') k := sprintf ('"id":"%s"', k); else { sa := DB.DBA.RDF_MQL_GET_WIKI_URI (k); k := sprintf ('"key":"%s"', k); } } qr := sprintf ('{"ROOT":{"query":[{%s, "type":[]}]}}', k); url := sprintf ('http://www.freebase.com/api/service/mqlread?queries=%U', qr); cnt := http_get (url, hdr); tree := json_parse (cnt); xt := get_keyword ('ROOT', tree); if (not isarray (xt)) return 0; xt := get_keyword ('result', xt); types := vector (); foreach (any tp in xt) do { declare tmp any; tmp := get_keyword ('type', tp); types := vector_concat (types, tmp); } --types := get_keyword ('type', xt); delete from DB.DBA.RDF_QUAD where g = iri_to_id(new_origin_uri); foreach (any tp in types) do { qr := sprintf ('{"ROOT":{"query":{%s, "type":"%s", "*":[]}}}', k, tp); url := sprintf ('http://www.freebase.com/api/service/mqlread?queries=%U', qr); cnt := http_get (url, hdr); --dbg_printf ('%s', cnt); tree := json_parse (cnt); xt := get_keyword ('ROOT', tree); xt := DB.DBA.MQL_TREE_TO_XML (tree); --dbg_obj_print (xt); xt := DB.DBA.RDF_MAPPER_XSLT (registry_get ('_cartridges_path_') || 'xslt/main/mql2rdf.xsl', xt, vector ('baseUri', coalesce (dest, graph_iri), 'wpUri', sa)); sa := ''; xd := serialize_to_UTF8_xml (xt); -- dbg_printf ('%s', xd); DB.DBA.RM_RDF_LOAD_RDFXML (xd, new_origin_uri, coalesce (dest, graph_iri)); } return 1; } ---+++mql2rdf.xsl: ]> http://www.freebase.com/ Resource &xsd;dateTime