blob: 83311f52ca1e877a00634b59e51e59eafc9fe52e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.rya.kafka.connect.client.command;
import static java.util.Objects.requireNonNull;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.rya.kafka.connect.api.StatementsSerializer;
import org.apache.rya.kafka.connect.client.RyaKafkaClientCommand;
import org.apache.rya.rdftriplestore.utils.RdfFormatUtils;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.UnsupportedRDFormatException;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import edu.umd.cs.findbugs.annotations.DefaultAnnotation;
import edu.umd.cs.findbugs.annotations.NonNull;
/**
* Writes {@link Statement}s to a Kafka topic using the Rya Kafka Connect Sink format.
*/
@DefaultAnnotation(NonNull.class)
public class WriteStatementsCommand implements RyaKafkaClientCommand {
private static final Logger log = LoggerFactory.getLogger(WriteStatementsCommand.class);
/**
* Command line parameters that are used by this command to configure itself.
*/
public static class WriteParameters extends KafkaParameters {
@Parameter(names = {"--statementsFile", "-f"}, required = true, description = "The file of RDF statements to load into Rya Streams.")
public String statementsFile;
}
@Override
public String getCommand() {
return "write";
}
@Override
public String getDescription() {
return "Writes Statements to the specified Kafka topic.";
}
@Override
public boolean validArguments(final String[] args) {
boolean valid = true;
try {
new JCommander(new WriteParameters(), args);
} catch(final ParameterException e) {
valid = false;
}
return valid;
}
/**
* @return Describes what arguments may be provided to the command.
*/
@Override
public String getUsage() {
final JCommander parser = new JCommander(new WriteParameters());
final StringBuilder usage = new StringBuilder();
parser.usage(usage);
return usage.toString();
}
@Override
public void execute(final String[] args) throws ArgumentsException, ExecutionException {
requireNonNull(args);
// Parse the command line arguments.
final WriteParameters params = new WriteParameters();
try {
new JCommander(params, args);
} catch(final ParameterException e) {
throw new ArgumentsException("Could not stream the query's results because of invalid command line parameters.", e);
}
// Verify the configured statements file path.
final Path statementsPath = Paths.get(params.statementsFile);
if(!statementsPath.toFile().exists()) {
throw new ArgumentsException("Could not load statements at path '" + statementsPath + "' because that " +
"file does not exist. Make sure you've entered the correct path.");
}
// Create an RDF Parser whose format is derived from the statementPath's file extension.
final String filename = statementsPath.getFileName().toString();
final RDFFormat format = RdfFormatUtils.forFileName(filename);
if (format == null) {
throw new UnsupportedRDFormatException("Unknown RDF format for the file: " + filename);
}
final RDFParser parser = Rio.createParser(format);
// Set up the producer.
try(Producer<String, Set<Statement>> producer = makeProducer(params)) {
// Set a handler that writes the statements to the specified kafka topic. It writes batches of 5 Statements.
parser.setRDFHandler(new AbstractRDFHandler() {
private Set<Statement> batch = new HashSet<>(5);
@Override
public void startRDF() throws RDFHandlerException {
log.trace("Starting loading statements.");
}
@Override
public void handleStatement(final Statement stmnt) throws RDFHandlerException {
log.trace("Adding statement.");
batch.add(stmnt);
if(batch.size() == 5) {
flushBatch();
}
}
@Override
public void endRDF() throws RDFHandlerException {
if(!batch.isEmpty()) {
flushBatch();
}
log.trace("Done.");
}
private void flushBatch() {
log.trace("Flushing batch of size " + batch.size());
producer.send(new ProducerRecord<>(params.topic, null, batch));
batch = new HashSet<>(5);
producer.flush();
}
});
// Do the parse and load.
try {
parser.parse(Files.newInputStream(statementsPath), "");
} catch (RDFParseException | RDFHandlerException | IOException e) {
throw new ExecutionException("Could not load the RDF file's Statements into the Kafka topic.", e);
}
}
}
private static Producer<String, Set<Statement>> makeProducer(final KafkaParameters params) {
requireNonNull(params);
final Properties props = new Properties();
props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, params.bootstrapServers);
props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StatementsSerializer.class.getName());
return new KafkaProducer<>(props);
}
}