Last active
October 2, 2019 08:29
-
-
Save evren/7601614 to your computer and use it in GitHub Desktop.
Showing examples of efficiently loading multiple files into different named graphs using Stardog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2013 -- Clark & Parsia, LLC. <http://www.clarkparsia.com> | |
// For more information about licensing and copyright of this software, please contact [email protected]. | |
package com.complexible.stardog; | |
import java.io.File; | |
import java.io.PrintStream; | |
import java.nio.file.Path; | |
import java.util.Map; | |
import java.util.concurrent.TimeUnit; | |
import com.complexible.common.rdf.rio.RDFCompressedFormat; | |
import org.openrdf.model.Literal; | |
import org.openrdf.model.Resource; | |
import com.complexible.common.rdf.model.Values; | |
import com.complexible.stardog.api.Connection; | |
import com.complexible.stardog.api.admin.AdminConnection; | |
import com.complexible.stardog.api.admin.AdminConnectionConfiguration; | |
import com.google.common.base.Stopwatch; | |
import com.google.common.collect.Maps; | |
import com.google.common.io.ByteStreams; | |
/** | |
* Creates a Stardog DB with multiple files where each file is loaded into a different named graph. Named graph URI | |
* is compute by concatenating a given namespace URI with the name of the file. Different named graph templates can | |
* be used by tweaking the named graph function below. | |
* | |
* @author Evren Sirin | |
*/ | |
public class CreateDBWithNamedGraphs { | |
public static void main(String[] args) throws Exception { | |
if (args.length != 3) { | |
System.err.println("usage: " + CreateDBWithNamedGraphs.class.getName() + " database inputDir namespace"); | |
System.exit(0); | |
} | |
final String database = args[0]; | |
final String inputDir = args[1]; | |
final String namespace = args[2]; | |
// create admin connection | |
AdminConnection admin = AdminConnectionConfiguration | |
.toServer("http://localhost:5820") | |
.credentials("admin", "admin") | |
.connect(); | |
System.out.print("Creating..."); | |
// lets' time this | |
Stopwatch w = Stopwatch.createStarted(); | |
// create a mapping from the input files to named graphs | |
Map<Path, Resource> paths = Maps.newHashMap(); | |
for (File file : new File(inputDir).listFiles()) { | |
// ignore non-RDF files | |
if (RDFCompressedFormat.forFileName(file.getName()) == null) { | |
continue; | |
} | |
// named graph is concatenation of namespace and file name | |
Resource namedGraph = Values.iri(namespace, file.getName()); | |
paths.put(file.toPath(), namedGraph); | |
} | |
// create db with all the files in the directory and connect to the db at the end | |
Connection conn = admin | |
.disk(database) | |
.reporter(new PrintStream(ByteStreams.nullOutputStream())) | |
.create(path -> paths.get(path), paths.keySet().toArray(new Path[0])) | |
.connect(); | |
w.stop(); | |
// get the number of triples and named graphs | |
long size = conn.size(); | |
long contexts = ((Literal) conn | |
.select("select (count(distinct ?g) as ?count) where {graph ?g {?s ?p ?o}}") | |
.execute() | |
.next() | |
.getValue("count")) | |
.longValue(); | |
System.out.format("finished. Loaded %d triples into %d graphs in %s (%1.1fK triples/sec)%n", size, contexts, w, | |
(float) size / w.elapsed(TimeUnit.MILLISECONDS)); | |
admin.close(); | |
conn.close(); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2013 -- Clark & Parsia, LLC. <http://www.clarkparsia.com> | |
// For more information about licensing and copyright of this software, please contact [email protected]. | |
package com.complexible.stardog; | |
import java.io.File; | |
import java.util.concurrent.TimeUnit; | |
import com.complexible.common.rdf.rio.RDFCompressedFormat; | |
import org.openrdf.model.Literal; | |
import org.openrdf.model.Resource; | |
import org.openrdf.rio.RDFFormat; | |
import com.complexible.common.rdf.model.Values; | |
import com.complexible.stardog.api.Connection; | |
import com.complexible.stardog.api.ConnectionConfiguration; | |
import com.complexible.stardog.api.IO; | |
import com.google.common.base.Stopwatch; | |
/** | |
* Loads multiple files to a Stardog DB where each file is added into a different named graph. Named graph URI | |
* is computes by concatenating a given namespace URI with the name of the file. Different named graph templates can | |
* be used by tweaking the named graph function below. | |
* | |
* @author Evren Sirin | |
*/ | |
public class LoadFilesToNamedGraphs { | |
public static void main(String[] args) throws Exception { | |
if (args.length != 3) { | |
System.err.println("usage: " + LoadFilesToNamedGraphs.class.getName() + " database inputDir namespace"); | |
System.exit(0); | |
} | |
final String database = args[0]; | |
final String inputDir = args[1]; | |
final String namespace = args[2]; | |
// create db with all the files in the directory and connect to the db | |
Connection conn = ConnectionConfiguration | |
.to(database) | |
.server("http://localhost:5820") | |
.credentials("admin", "admin") | |
.connect(); | |
System.out.print("Loading..."); | |
Stopwatch w = Stopwatch.createStarted(); | |
// start tx | |
conn.begin(); | |
// we'll add multiple files and files are on the server side so no need to send them through the | |
// connection and server can read them from the file system directly | |
IO io = conn.add().io().serverSide(); | |
// add all the files | |
for (File file : new File(inputDir).listFiles()) { | |
RDFFormat format = RDFCompressedFormat.forFileName(file.getName()); | |
// ignore non-RDF files | |
if (format == null) { | |
continue; | |
} | |
// named graph is concatenation of namespace and file name | |
Resource namedGraph = Values.iri(namespace, file.getName()); | |
// it is important to send absolute paths since the working dir for client may not be same | |
// as the working dir for the server | |
io.format(format).context(namedGraph).file(file.getAbsoluteFile().toPath()); | |
} | |
// commit tx | |
conn.commit(); | |
w.stop(); | |
long size = conn.size(); | |
long contexts = ((Literal) conn | |
.select("select (count(distinct ?g) as ?count) where {graph ?g {?s ?p ?o}}") | |
.execute() | |
.next() | |
.getValue("count")) | |
.longValue(); | |
System.out.format("finished. Loaded %d triples into %d contexts in %s (%1.1fK triples/sec)%n", size, contexts, w, | |
(float) size / w.elapsed(TimeUnit.MILLISECONDS)); | |
conn.close(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment