The RDF_QM_TREE_DUMP
procedure and its associated procedures below are used for dumping one or more RDFView Graphs in a Virtuoso server to a set of turtle ttl
dataset files in the specified dump directory.
The dump generation is made as fast as possible by grouping mappings by underlying tables so many properties from neighbor database columns can be extracted in one table scan.
The size of the generated files is limited to 5MB.
The dump process creates internal stored procedures; their texts are saved in file .dump_procedures.sql
in the directory of dump files for debugging purposes.
Note that the dump directory must be included in the
parameter of the Virtuoso configuration file (e.g., virtuoso.ini
), or the server will not be allowed to create nor access the dataset file(s).
The Virtuoso RDF bulk loader scripts can then be used to load the dumped datasets for the RDFView graphs directly into a Virtuoso RDF QUAD store.
in dest_dir VARCHAR
- dump directory in graph_iri VARCHAR
- IRI of the graph to be dumped; triples from other graphs will be excluded.
If NULL
, then there's no restriction by graph.
in storage VARCHAR
- IRI of the quad map storage to use.
NULL
means use default storage.
in root VARCHAR
- IRI of the quad map to use, e.g., an IRI of an RDF View (or its part).
NULL
means use all RDF Views of the storage (and the default mapping as well).CREATE PROCEDURE DB.DBA.RDF_QM_TREE_DUMP ( in dest_dir VARCHAR, in graph_iri VARCHAR := NULL, in storage VARCHAR := NULL, in root VARCHAR := NULL ) { DECLARE all_qms, grouped_qmvs, launcher_text ANY; DECLARE grp_ctr, qm_ctr, qm_count INTEGER; DECLARE sql_file, launcher_name VARCHAR; IF (NOT (dest_dir LIKE '%/')) dest_dir := dest_dir || '/'; sql_file := dest_dir || '.dump_procedures.sql'; IF (storage IS NULL) storage := 'http://www.openlinksw.com/schemas/virtrdf#DefaultQuadStorage'; string_to_file ( sql_file, '-- This file contains procedure created by DB.DBA.RDF_QM_TREE_DUMP() for storage ' || COALESCE (storage, 'NULL') || ' and root quad map ' || COALESCE (root, 'NULL') || '\n\n', -2); all_qms := dict_list_keys (DB.DBA.RDF_QM_CONTENT_OF_QM_TREE (graph_iri, storage, root), 2); grouped_qmvs := DB.DBA.RDF_QM_GROUP_BY_SOURCE_TABLES (all_qms); launcher_name := 'RDF_QM_TREE_DUMP_BATCH_' || md5 (serialize (graph_iri) || storage || serialize (root)); launcher_text := string_output (); http ('CREATE PROCEDURE DB.DBA."' || launcher_name || '" (in dest_dir VARCHAR)\n{\n', launcher_text); FOR (grp_ctr := length (grouped_qmvs); grp_ctr > 0; grp_ctr := grp_ctr-2) { DECLARE tables, qms, proc_text ANY; DECLARE group_key, proc_name, dump_prefix, cmt VARCHAR; tables := grouped_qmvs [grp_ctr-2]; qms := grouped_qmvs [grp_ctr-1]; qm_count := length (qms); group_key := md5 (serialize (graph_iri) || storage || serialize (root) || serialize (tables)); proc_name := 'RDF_QM_TREE_DUMP_GRP_' || group_key; proc_text := string_output (); cmt := sprintf ('%d quad maps on join of', qm_count); FOREACH (VARCHAR t IN tables) DO cmt := cmt || ' ' || t; http (' -- ' || cmt || '\n', launcher_text); http (' DB.DBA."' || proc_name || '" (dest_dir);\n', launcher_text); http ('CREATE PROCEDURE DB.DBA."' || proc_name || '" (in dest_dir VARCHAR)\n', proc_text); http ('{\n', proc_text); http (' -- ' || cmt || '\n', proc_text); http (' DECLARE ses, env ANY;\n', proc_text); http (' DECLARE file_ctr, cmt_len INTEGER;\n', proc_text); http (' file_ctr := 0;\n', proc_text); http (' dbg_obj_princ (' || WS.WS.STR_SQL_APOS (cmt) || ', '', file '', file_ctr);\n', proc_text); http (' ses := string_output ();\n', proc_text); http (' http (' || WS.WS.STR_SQL_APOS ('#' || cmt || '\n') || ', ses);\n', proc_text); http (' env := VECTOR (dict_new (16000), 0, '''', '''', '''', 0, 0, 0, 0);\n', proc_text); http (' cmt_len := LENGTH (ses);\n', proc_text); http (' FOR (SPARQL DEFINE input:storage <' || storage || '>\n', proc_text); http (' SELECT ?s1, ?p1, ?o1\n', proc_text); IF (graph_iri IS NOT NULL) { http (' WHERE { GRAPH <', proc_text); http_escape (graph_iri, 12, proc_text, 1, 1); http ('> {\n', proc_text); } ELSE http (' WHERE { GRAPH ?g1 {\n', proc_text); FOR (qm_ctr := 0; qm_ctr < qm_count; qm_ctr := qm_ctr + 1) { IF (qm_ctr > 0) http (' UNION\n', proc_text); http (' { quad map <' || qms[qm_ctr] || '> { ?s1 ?p1 ?o1 } }\n', proc_text); } http (' } } ) DO {\n', proc_text); http (' http_ttl_triple (env, "s1", "p1", "o1", ses);\n', proc_text); http (' IF (LENGTH (ses) > 5000000)\n', proc_text); http (' {\n', proc_text); http (' http ('' .\\n'', ses);\n', proc_text); http (' string_to_file (sprintf (''%s' || group_key || '_%05d.ttl'', dest_dir, file_ctr), ses, -2);\n', proc_text); http (' file_ctr := file_ctr + 1;\n', proc_text); http (' dbg_obj_princ (' || WS.WS.STR_SQL_APOS (cmt) || ', '', file '', file_ctr);\n', proc_text); http (' ses := string_output ();\n', proc_text); http (' http (' || WS.WS.STR_SQL_APOS ('#' || cmt || '\n') || ', ses);\n', proc_text); http (' env := VECTOR (dict_new (16000), 0, '''', '''', '''', 0, 0, 0, 0);\n', proc_text); http (' }\n', proc_text); http (' }\n', proc_text); http (' IF (LENGTH (ses) > cmt_len)\n', proc_text); http (' {\n', proc_text); http (' http ('' .\\n'', ses);\n', proc_text); http (' string_to_file (sprintf (''%s' || group_key || '_%05d.ttl'', dest_dir, file_ctr), ses, -2);\n', proc_text); http (' }\n', proc_text); http ('}\n', proc_text); proc_text := string_output_string (proc_text); string_to_file (sql_file, proc_text || ';\n\n' , -1); EXEC (proc_text); } http ('}\n', launcher_text); launcher_text := string_output_string (launcher_text); string_to_file (sql_file, launcher_text || ';\n\n' , -1); EXEC (launcher_text); CALL ('DB.DBA.' || launcher_name)(dest_dir); } ; CREATE FUNCTION DB.DBA.RDF_QM_CONTENT_OF_QM_TREE ( in graph_iri VARCHAR := NULL, in storage VARCHAR := NULL, in root VARCHAR := NULL, in dict ANY := NULL ) returns ANY { DECLARE res, subqms any; DECLARE graphiri varchar; graphiri := DB.DBA.JSO_SYS_GRAPH(); IF (storage IS NULL) storage := 'http://www.openlinksw.com/schemas/virtrdf#DefaultQuadStorage'; DB.DBA.RDF_QM_ASSERT_STORAGE_FLAG (storage, 0); IF (dict IS NULL) dict := dict_new (); IF (root IS NULL) { subqms := ((SELECT DB.DBA.VECTOR_AGG (sub."qmiri") FROM ( SPARQL DEFINE input:storage "" SELECT DISTINCT (str(?qm)) AS ?qmiri WHERE { GRAPH `iri(?:graphiri)` { { `iri(?:storage)` virtrdf:qsUserMaps ?lst . ?lst ?p ?qm . FILTER (0 = bif:strstr (str(?p), str(rdf:_))) } UNION { `iri(?:storage)` virtrdf:qsDefaultMap ?qm . } } } ) AS sub ) ); FOREACH (varchar qmid IN subqms) DO DB.DBA.RDF_QM_CONTENT_OF_QM_TREE (graph_iri, storage, qmid, dict); RETURN dict; } DB.DBA.RDF_QM_ASSERT_JSO_TYPE (root, 'http://www.openlinksw.com/schemas/virtrdf#QuadMap'); IF (graph_iri IS NOT NULL AND EXISTS ((SPARQL DEFINE input:storage "" SELECT (1) WHERE { GRAPH `iri(?:graphiri)` { `iri(?:root)` virtrdf:qmGraphRange-rvrFixedValue ?g . FILTER (str (?g) != str(?:graph_iri)) } } ) ) ) RETURN dict; IF (NOT EXISTS ((SPARQL DEFINE input:storage "" SELECT (1) WHERE { GRAPH `iri(?:graphiri)` { `iri(?:root)` virtrdf:qmMatchingFlags virtrdf:SPART_QM_EMPTY . } } ) ) ) dict_put (dict, root, 1); subqms := ((SELECT DB.DBA.VECTOR_AGG (sub."qmiri") FROM ( SPARQL DEFINE input:storage "" SELECT DISTINCT (str(?qm)) as ?qmiri WHERE { GRAPH `iri(?:graphiri)` { `iri(?:root)` virtrdf:qmUserSubMaps ?lst . ?lst ?p ?qm . FILTER (0 = bif:strstr (str(?p), str(rdf:_))) } } ) AS sub ) ); FOREACH (VARCHAR qmid IN subqms) DO DB.DBA.RDF_QM_CONTENT_OF_QM_TREE (graph_iri, storage, qmid, dict); RETURN dict; } ; CREATE FUNCTION DB.DBA.RDF_QM_GROUP_BY_SOURCE_TABLES (in qms ANY) returns ANY { DECLARE res ANY; DECLARE ctr INTEGER; DECLARE graphiri VARCHAR; graphiri := DB.DBA.JSO_SYS_GRAPH(); res := dict_new (LENGTH (qms) / 20); FOREACH (VARCHAR qmiri IN qms) DO { DECLARE tbls, acc ANY; tbls := ((SELECT DB.DBA.VECTOR_AGG (sub."tbl") FROM (SELECT subsub."tbl" FROM ( SPARQL DEFINE input:storage "" SELECT DISTINCT ?tbl WHERE { GRAPH `iri(?:graphiri)` { { `iri(?:qmiri)` virtrdf:qmTableName ?tbl . } UNION { `iri(?:qmiri)` virtrdf:qmATables ?atbls . ?atbls ?p ?atbl . ?atbl virtrdf:qmvaTableName ?tbl } UNION { `iri(?:qmiri)` ?fldmap ?qmv . ?qmv virtrdf:qmvATables ?atbls . ?atbls ?p ?atbl . ?atbl virtrdf:qmvaTableName ?tbl . } } } ) subsub ORDER BY 1 ) AS sub ) ); acc := dict_get (res, tbls); IF (acc IS NULL) vectorbld_init (acc); vectorbld_acc (acc, qmiri); dict_put (res, tbls, acc); } res := dict_to_vector (res, 2); FOR (ctr := LENGTH (res); ctr > 0; ctr := ctr-2) { DECLARE acc ANY; acc := aref_set_0 (res, ctr-1); vectorbld_final (acc); aset_zap_arg (res, ctr-1, acc); } RETURN res; } ; --test dbg_obj_princ (DB.DBA.RDF_QM_GROUP_BY_SOURCE_TABLES (dict_list_keys (DB.DBA.RDF_QM_CONTENT_OF_QM_TREE (null), 2))); --test dbg_obj_princ (dict_list_keys (DB.DBA.RDF_QM_CONTENT_OF_QM_TREE (null), 2)); --test DB.DBA.RDF_QM_TREE_DUMP ('dump/demo', null, null, null); --test DB.DBA.RDF_QM_TREE_DUMP ('dump/tpch', 'http://localhost:8600/tpch', null, null);
$ pwd /Applications/OpenLink Virtuoso/Virtuoso 6.1/database $ grep DirsAllowed virtuoso.iniDirsAllowed = ., ../vad, ./graphs $ /opt/virtuoso/bin/isql 1111 Connected to OpenLink Virtuoso Driver: 06.01.3127 OpenLink Virtuoso ODBC Driver OpenLink Interactive SQL (Virtuoso), version 0.9849b. Type HELP; for help and EXIT; to exit. SQL> DB.DBA.RDF_QM_TREE_DUMP('./graphs', 'http://opllinux5.usnet.private:8890/HR#', NULL, NULL); Done. -- 3297 msec. SQL> quit; $ ls -a graphs . .. 0501399cf19a391edea424b49f8fddec_00000.ttl 0e92baa118032b03d45b671f8102d49b_00000.ttl 1517d16a92affbb0918234caa9ee4506_00000.ttl 1803645533d6b04b4ee1ba1d2b2ba517_00000.ttl 21794e4c25ee4d3958cc7b92ea1ab2af_00000.ttl 25e61545a84174e4f0e95b9dbd8400f1_00000.ttl 2b3c3263a3ac32408124e3cf3119edb2_00000.ttl 3d9bf6c213d70375e601125446a2e4b1_00000.ttl 451ab264eb7b8ba653fab05ddb2fbb8f_00000.ttl 47ac12f3099a1093359b91ffc141a4de_00000.ttl 491406fea677f4663b68dd1c27d404bd_00000.ttl 4f8d21f21e4ad7380f7da51b8b4750ba_00000.ttl 5c6f12b92bb3b2f3601826ce7c66b4fa_00000.ttl 7ad9ed11ef3557b3d11197037b672fce_00000.ttl 7ae906be3dd4dd7198bf7bb9ebeb75c7_00000.ttl 847132e12c95df9326b0f0c183c789cc_00000.ttl 85da7768db9d91356ad56a4b2296ffeb_00000.ttl 86ec197853e6e243b657593daadba07b_00000.ttl 99227cc049c9b2cdf5f3c20278e85eb4_00000.ttl a9d1085b8966bcbdbb3c1fc167e0bbe4_00000.ttl aaef0bbd9f1172e795d41b9ee63575d4_00000.ttl afbb87dc2897a63491fe95d834e9bd3e_00000.ttl b5d8d72cab725914bff265c5766cc8e9_00000.ttl b7f574e8394e0408245a02c603618afe_00000.ttl c74baad55e1277b26fa002d070960b85_00000.ttl ccd6ab4e49b6b3ac5b0e676e8b402639_00000.ttl d0e3534ac8ffa67e7e221ef6eec422ab_00000.ttl d1c409acf4379aa4e1d3e40b8bd1b059_00000.ttl de55244ca7b18e6a9ea7693b6275fb6c_00000.ttl .dump_procedures.sql e00e827d43678857097f706f66e050e4_00000.ttl ebe38a29534ec94fbd5098014064b4d1_00000.ttl ee7ff61ceb22dae7ae22b32cd99ace03_00000.ttl f87218215ec64026d7313e91f4c1ccbf_00000.ttl