%META:TOPICPARENT{name="VirtTipsAndTricksGuide"}% ---+ RDF dumps from Virtuoso Quad store hosted data Every DBMS needs to offer a mechanism for bulk export and import of data. Virtuoso supports dumping and reloading graph model data (e.g., RDF), as well as relational data (e.g., SQL) (discussed elsewhere). %TOC% ---++ How is it done? We have created stored procedures for the task. The dump procedures leverage SPARQL to facilitate selective data dump(s) from one or more Named Graphs, each denoted by an IRI. ---+++ Dump One Graph CREATE PROCEDURE dump_one_graph ( IN srcgraph VARCHAR , IN out_file VARCHAR , IN file_length_limit INTEGER := 1000000000 ) { DECLARE file_name VARCHAR; DECLARE env, ses ANY; DECLARE ses_len, max_ses_len, file_len, file_idx INTEGER; SET ISOLATION = 'uncommitted'; max_ses_len := 10000000; file_len := 0; file_idx := 1; file_name := sprintf ('%s%06d.ttl', out_file, file_idx); string_to_file ( file_name || '.graph', srcgraph, -2 ); string_to_file ( file_name, sprintf ( '# Dump of graph <%s>, as of %s\n', srcgraph, CAST (NOW() AS VARCHAR) ), -2 ); env := vector (dict_new (16000), 0, '', '', '', 0, 0, 0, 0); ses := string_output (); FOR (SELECT * FROM ( SPARQL DEFINE input:storage "" SELECT ?s ?p ?o { GRAPH `iri(?:srcgraph)` { ?s ?p ?o } } ) AS sub OPTION (LOOP)) DO { http_ttl_triple (env, "s", "p", "o", ses); ses_len := length (ses); IF (ses_len > max_ses_len) { file_len := file_len + ses_len; IF (file_len > file_length_limit) { http (' .\n', ses); string_to_file (file_name, ses, -1); file_len := 0; file_idx := file_idx + 1; file_name := sprintf ('%s%06d.ttl', out_file, file_idx); string_to_file ( file_name, sprintf ( '# Dump of graph <%s>, as of %s (part %d)\n', srcgraph, CAST (NOW() AS VARCHAR), file_idx), -2 ); env := VECTOR (dict_new (16000), 0, '', '', '', 0, 0, 0, 0); } ELSE string_to_file (file_name, ses, -1); ses := string_output (); } } IF (LENGTH (ses)) { http (' .\n', ses); string_to_file (file_name, ses, -1); } } ; ---+++ Dump Multiple Graphs CREATE PROCEDURE dump_graphs ( IN dir VARCHAR := 'dumps' , IN file_length_limit INTEGER := 1000000000 ) { DECLARE inx INT; inx := 1; SET ISOLATION = 'uncommitted'; FOR ( SELECT * FROM ( SPARQL DEFINE input:storage "" SELECT DISTINCT ?g { GRAPH ?g { ?s ?p ?o } . FILTER ( ?g != virtrdf: ) } ) AS sub OPTION ( LOOP )) DO { dump_one_graph ( "g", sprintf ('%s/graph%06d_', dir, inx), file_length_limit ); inx := inx + 1; } } ; ---+++ Load Graphs This stored procedure performs a bulk load from a file. CREATE PROCEDURE load_graphs ( IN dir VARCHAR := 'dumps/' ) { DECLARE arr ANY; DECLARE g VARCHAR; arr := sys_dirlist (dir, 1); log_enable (2, 1); FOREACH (VARCHAR f IN arr) DO { IF (f LIKE '*.ttl') { DECLARE CONTINUE HANDLER FOR SQLSTATE '*' { log_message (sprintf ('Error in %s', f)); }; g := file_to_string (dir || '/' || f || '.graph'); DB.DBA.TTLP_MT (file_open (dir || '/' || f), g, g, 255); } } EXEC ('checkpoint'); }; ---++ Example 1. Call the dump_one_graph procedure, for ex. with the following values: SQL> dump_one_graph ('http://daas.openlinksw.com/data#', './data_', 1000000000); Done. -- 1438 msec. 1. As a result, a dump of the graph will be found in the files data_XX (located in your Virtuoso db folder): data_000001.ttl data_000002.ttl .... data_000001.ttl.graph ---++ Related * [[VirtTipsAndTricksGuide][Virtuoso Tips and Tricks Collection]] * [[VirtDumpLoadRdfGraphs][Virtuoso Scripts to Dump and Reload RDF Graphs - more examples]] * [[http://docs.openlinksw.com/virtuoso/rdfperformancetuning.html#rdfperfdumpandreloadgraphs][Virtuoso Documentation]]