blob: 2473a8fc59a259784ae4877c6d68a7a20f60ff28 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tdb2;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import jena.cmd.ArgDecl;
import jena.cmd.CmdException;
import org.apache.jena.atlas.lib.InternalErrorException;
import org.apache.jena.atlas.lib.ListUtils;
import org.apache.jena.atlas.lib.Timer;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.query.ARQ;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFLanguages;
import org.apache.jena.sparql.core.DatasetGraph;
import org.apache.jena.system.Txn;
import org.apache.jena.tdb2.loader.DataLoader;
import org.apache.jena.tdb2.loader.LoaderFactory;
import org.apache.jena.tdb2.loader.base.LoaderOps;
import org.apache.jena.tdb2.loader.base.MonitorOutput;
import org.apache.jena.tdb2.loader.main.LoaderPlans;
import tdb2.cmdline.CmdTDB;
import tdb2.cmdline.CmdTDBGraph;
public class tdbloader extends CmdTDBGraph {
private static final ArgDecl argStats = new ArgDecl(ArgDecl.HasValue, "stats");
private static final ArgDecl argLoader = new ArgDecl(ArgDecl.HasValue, "loader");
private enum LoaderEnum { Basic, Parallel, Sequential, Light, Phased }
private boolean showProgress = true;
private boolean generateStats = false;
private LoaderEnum loader = null;
public static void main(String... args) {
CmdTDB.init();
new tdbloader(args).mainRun();
}
protected tdbloader(String[] argv) {
super(argv);
// super.add(argStats, "Generate statistics");
super.add(argLoader, "--loader=", "Loader to use: 'basic', 'phased' (default), 'sequential', 'parallel' or 'light'");
}
@Override
protected void processModulesAndArgs() {
super.processModulesAndArgs();
if ( contains(argLoader) ) {
String loadername = getValue(argLoader).toLowerCase();
if ( loadername.matches("basic.*") )
loader = LoaderEnum.Basic;
else if ( loadername.matches("phas.*") )
loader = LoaderEnum.Phased;
else if ( loadername.matches("seq.*") )
loader = LoaderEnum.Sequential;
else if ( loadername.matches("para.*") )
loader = LoaderEnum.Parallel;
else if ( loadername.matches("para.*") )
loader = LoaderEnum.Parallel;
else if ( loadername.matches("light") )
loader = LoaderEnum.Light;
else
throw new CmdException("Unrecognized value for --loader: "+loadername);
}
if ( super.contains(argStats) ) {
if ( ! hasValueOfTrue(argStats) && ! hasValueOfFalse(argStats) )
throw new CmdException("Not a boolean value: "+getValue(argStats));
generateStats = super.hasValueOfTrue(argStats);
}
}
@Override
protected String getSummary() {
return getCommandName() + "--loader= [--desc DATASET | --loc DIR] FILE ...";
}
@Override
protected void exec() {
if ( isVerbose() ) {
System.out.println("Java maximum memory: " + Runtime.getRuntime().maxMemory());
System.out.println(ARQ.getContext());
}
if ( isVerbose() )
showProgress = true;
if ( isQuiet() )
showProgress = false;
List<String> urls = getPositional();
if ( urls.size() == 0 )
urls.add("-");
else
checkFiles(urls);
if ( graphName == null ) {
loadQuads(urls);
return;
}
// There's a --graph.
// Check/warn that there are no quads formats mentioned
for ( String url : urls ) {
Lang lang = RDFLanguages.filenameToLang(url);
if ( lang != null && RDFLanguages.isQuads(lang) ) {
throw new CmdException("Warning: Quads format given - only the default graph is loaded into the graph for --graph");
}
}
loadTriples(graphName, urls);
}
// Check files exists before starting.
private void checkFiles(List<String> urls) {
List<String> problemFiles =
ListUtils.toList(
urls.stream()
.map(Paths::get)
.filter(p-> !Files.exists(p) || !Files.isRegularFile(p /*follow links*/) || !Files.isReadable(p) )
.map(Path::toString)
);
if ( ! problemFiles.isEmpty() ) {
throw new CmdException("Can't read files : ["+problemFiles+"]");
}
}
private void loadTriples(String graphName, List<String> urls) {
execBulkLoad(super.getDatasetGraph(), graphName, urls, showProgress);
}
private void loadQuads(List<String> urls) {
// generateStats
execBulkLoad(super.getDatasetGraph(), null, urls, showProgress);
}
private long execBulkLoad(DatasetGraph dsg, String graphName, List<String> urls, boolean showProgress) {
DataLoader loader = chooseLoader(dsg, graphName);
long elapsed = Timer.time(()->{
loader.startBulk();
loader.load(urls);
loader.finishBulk();
});
return elapsed;
}
/** Decide on the bulk loader. */
private DataLoader chooseLoader(DatasetGraph dsg, String graphName) {
Objects.requireNonNull(dsg);
Node gn = null;
if ( graphName != null )
gn = NodeFactory.createURI(graphName);
LoaderEnum useLoader = loader;
if ( useLoader == null ) {
// Default choice - phased if empty. basic if not.
boolean isEmpty = Txn.calculateRead(dsg, ()->dsg.isEmpty());
if ( isEmpty )
useLoader = LoaderEnum.Phased;
else
useLoader = LoaderEnum.Basic;
}
MonitorOutput output = isQuiet() ? LoaderOps.nullOutput() : LoaderOps.outputToLog();
DataLoader loader = createLoader(useLoader, dsg, gn, output);
if ( output != null )
output.print("Loader = %s", loader.getClass().getSimpleName());
return loader ;
}
private DataLoader createLoader(LoaderEnum useLoader, DatasetGraph dsg, Node gn, MonitorOutput output) {
switch(useLoader) {
case Phased :
return LoaderFactory.phasedLoader(dsg, gn, output);
case Parallel :
return LoaderFactory.parallelLoader(dsg, gn, output);
case Sequential :
return LoaderFactory.sequentialLoader(dsg, gn, output);
case Light :
return LoaderFactory.createLoader(LoaderPlans.loaderPlanLight, dsg, output);
case Basic :
return LoaderFactory.basicLoader(dsg, gn, output);
default :
throw new InternalErrorException("Unrecognized loader: "+useLoader);
}
}
}