blob: d0057878cee30a12144f6aa5a80ec1f5c1ccac30 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tdb2.xloader;
import org.apache.jena.atlas.lib.FileOps;
import org.apache.jena.tdb.store.bulkloader.BulkLoader;
import org.apache.jena.tdb2.xloader.BulkLoaderX;
/**
* A version of xloader/TDB2 that runs in a single JVM.
* <p>
* This still requires an external sort programme.
* Normally, xloader is run by script which uses one JVM per operation.
* Exiting the JVM and starting a new one
* <p>
* This program does not need much RAM. Do not set the heap size large.
* 4Gbytes is enough, usually 2Gbytes is sufficient.
* More heap is not faster (in fact, it is often slower).
*/
public class CmdxLoader extends AbstractCmdxLoad {
public static void main(String... args) {
new CmdxLoader("AIO", args).mainRun();
}
protected CmdxLoader(String stageName, String[] argv) {
super(stageName, argv);
}
@Override
protected void setCmdArgs() {
super.add(argLocation, "--loc=", "Database location");
super.add(argTmpdir, "--tmpdir=", "Temporary directory (defaults to --loc)");
}
@Override
protected String getSummary() {
return getCommandName()+" "+getArgsSummary();
}
@Override
protected void subCheckArgs() {}
@Override
protected String getCommandName() {
return "cmd-xloader";
}
@Override
protected void exec() {
// Java code to do all the steps.
String TMPDIR = super.tmpdir;
String DIR = super.location;
String datafile = super.filenames.get(0);
FileOps.ensureDir(TMPDIR);
FileOps.clearAll(TMPDIR);
if ( !TMPDIR.equals(DIR) ) {
FileOps.ensureDir(DIR);
FileOps.clearAll(DIR);
}
BulkLoaderX.DataTick = 100_000;
BulkLoader.DataTickPoint = BulkLoaderX.DataTick;
long maxMemory = Runtime.getRuntime().maxMemory();
System.out.printf("RAM = %,d\n", maxMemory);
System.out.println("STEP 1 - load node table");
CmdxBuildNodeTable.main("--loc=" + DIR, datafile);
System.out.println("STEP 2 - ingest triples and quads");
CmdxIngestData.main("--loc=" + DIR, datafile);
System.out.println("STEP 3 - build indexes");
CmdxBuildIndex.main("--loc=" + DIR, "--index=SPO");
CmdxBuildIndex.main("--loc=" + DIR, "--index=POS");
CmdxBuildIndex.main("--loc=" + DIR, "--index=OSP");
CmdxBuildIndex.main("--loc=" + DIR, "--index=GSPO");
CmdxBuildIndex.main("--loc=" + DIR, "--index=GPOS");
CmdxBuildIndex.main("--loc=" + DIR, "--index=GOSP");
CmdxBuildIndex.main("--loc=" + DIR, "--index=SPOG");
CmdxBuildIndex.main("--loc=" + DIR, "--index=POSG");
CmdxBuildIndex.main("--loc=" + DIR, "--index=OSPG");
}
}