Every DBMS needs to offer a mechanism for bulk export and import of data.
Virtuoso supports dumping and reloading graph model data (e.g., RDF), as well as relational data (e.g., SQL) (discussed elsewhere).
We have created stored procedures for the task. The dump procedures leverage SPARQL to facilitate selective data dump(s) from one or more Named Graphs, each denoted by an IRI.
CREATE PROCEDURE dump_one_graph ( IN srcgraph VARCHAR , IN out_file VARCHAR , IN file_length_limit INTEGER := 1000000000 ) { DECLARE file_name VARCHAR; DECLARE env, ses ANY; DECLARE ses_len, max_ses_len, file_len, file_idx INTEGER; SET ISOLATION = 'uncommitted'; max_ses_len := 10000000; file_len := 0; file_idx := 1; file_name := sprintf ('%s%06d.ttl', out_file, file_idx); string_to_file ( file_name || '.graph', srcgraph, -2 ); string_to_file ( file_name, sprintf ( '# Dump of graph <%s>, as of %s\n', srcgraph, CAST (NOW() AS VARCHAR) ), -2 ); env := vector (dict_new (16000), 0, '', '', '', 0, 0, 0, 0); ses := string_output (); FOR (SELECT * FROM ( SPARQL DEFINE input:storage "" SELECT ?s ?p ?o { GRAPH `iri(?:srcgraph)` { ?s ?p ?o } } ) AS sub OPTION (LOOP)) DO { http_ttl_triple (env, "s", "p", "o", ses); ses_len := length (ses); IF (ses_len > max_ses_len) { file_len := file_len + ses_len; IF (file_len > file_length_limit) { http (' .\n', ses); string_to_file (file_name, ses, -1); file_len := 0; file_idx := file_idx + 1; file_name := sprintf ('%s%06d.ttl', out_file, file_idx); string_to_file ( file_name, sprintf ( '# Dump of graph <%s>, as of %s (part %d)\n', srcgraph, CAST (NOW() AS VARCHAR), file_idx), -2 ); env := VECTOR (dict_new (16000), 0, '', '', '', 0, 0, 0, 0); } ELSE string_to_file (file_name, ses, -1); ses := string_output (); } } IF (LENGTH (ses)) { http (' .\n', ses); string_to_file (file_name, ses, -1); } } ;
CREATE PROCEDURE dump_graphs ( IN dir VARCHAR := 'dumps' , IN file_length_limit INTEGER := 1000000000 ) { DECLARE inx INT; inx := 1; SET ISOLATION = 'uncommitted'; FOR ( SELECT * FROM ( SPARQL DEFINE input:storage "" SELECT DISTINCT ?g { GRAPH ?g { ?s ?p ?o } . FILTER ( ?g != virtrdf: ) } ) AS sub OPTION ( LOOP )) DO { dump_one_graph ( "g", sprintf ('%s/graph%06d_', dir, inx), file_length_limit ); inx := inx + 1; } } ;
This stored procedure performs a bulk load from a file.
CREATE PROCEDURE load_graphs ( IN dir VARCHAR := 'dumps/' ) { DECLARE arr ANY; DECLARE g VARCHAR; arr := sys_dirlist (dir, 1); log_enable (2, 1); FOREACH (VARCHAR f IN arr) DO { IF (f LIKE '*.ttl') { DECLARE CONTINUE HANDLER FOR SQLSTATE '*' { log_message (sprintf ('Error in %s', f)); }; g := file_to_string (dir || '/' || f || '.graph'); DB.DBA.TTLP_MT (file_open (dir || '/' || f), g, g, 255); } } EXEC ('checkpoint'); };
dump_one_graph
procedure, for ex.
with the following values:
SQL> dump_one_graph ('http://daas.openlinksw.com/data#', './data_', 1000000000); Done. -- 1438 msec.
data_XX
(located in your Virtuoso db folder):
data_000001.ttl data_000002.ttl .... data_000001.ttl.graph