blob: fd126d07bde562469f097a824da2834979525316 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.kudu;
import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplitAssigner;
import org.apache.flink.core.io.LocatableInputSplit;
import org.apache.flink.streaming.connectors.kudu.connector.*;
import org.apache.flink.util.Preconditions;
import org.apache.kudu.client.KuduException;
import org.apache.kudu.client.KuduScanToken;
import org.apache.kudu.client.LocatedTablet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class KuduInputFormat extends RichInputFormat<KuduRow, KuduInputFormat.KuduInputSplit> {
private String kuduMasters;
private KuduTableInfo tableInfo;
private List<KuduFilterInfo> tableFilters;
private List<String> tableProjections;
private Long rowsLimit;
private boolean endReached;
private transient KuduConnector tableContext;
private transient KuduRowIterator resultIterator;
private static final Logger LOG = LoggerFactory.getLogger(KuduInputFormat.class);
public KuduInputFormat(String kuduMasters, KuduTableInfo tableInfo) {
Preconditions.checkNotNull(kuduMasters,"kuduMasters could not be null");
this.kuduMasters = kuduMasters;
Preconditions.checkNotNull(tableInfo,"tableInfo could not be null");
this.tableInfo = tableInfo;
this.endReached = false;
}
public KuduInputFormat withTableFilters(KuduFilterInfo... tableFilters) {
return withTableFilters(Arrays.asList(tableFilters));
}
public KuduInputFormat withTableFilters(List<KuduFilterInfo> tableFilters) {
this.tableFilters = tableFilters;
return this;
}
public KuduInputFormat withTableProjections(String... tableProjections) {
return withTableProjections(Arrays.asList(tableProjections));
}
public KuduInputFormat withTableProjections(List<String> tableProjections) {
this.tableProjections = tableProjections;
return this;
}
public KuduInputFormat withRowsLimit(Long rowsLimit) {
this.rowsLimit = rowsLimit;
return this;
}
@Override
public void configure(Configuration parameters) {
}
@Override
public void open(KuduInputSplit split) throws IOException {
endReached = false;
startTableContext();
resultIterator = tableContext.scanner(split.getScanToken());
}
@Override
public void close() {
if (resultIterator != null) {
try {
resultIterator.close();
} catch (KuduException e) {
e.printStackTrace();
}
}
}
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException {
return cachedStatistics;
}
@Override
public InputSplitAssigner getInputSplitAssigner(KuduInputSplit[] inputSplits) {
return new LocatableInputSplitAssigner(inputSplits);
}
private void startTableContext() throws IOException {
if (tableContext == null) {
tableContext = new KuduConnector(kuduMasters, tableInfo);
}
}
@Override
public KuduInputSplit[] createInputSplits(int minNumSplits) throws IOException {
startTableContext();
Preconditions.checkNotNull(tableContext,"tableContext should not be null");
List<KuduScanToken> tokens = tableContext.scanTokens(tableFilters, tableProjections, rowsLimit);
KuduInputSplit[] splits = new KuduInputSplit[tokens.size()];
for (int i = 0; i < tokens.size(); i++) {
KuduScanToken token = tokens.get(i);
List<String> locations = new ArrayList<>(token.getTablet().getReplicas().size());
for (LocatedTablet.Replica replica : token.getTablet().getReplicas()) {
locations.add(getLocation(replica.getRpcHost(), replica.getRpcPort()));
}
KuduInputSplit split = new KuduInputSplit(
token.serialize(),
i,
locations.toArray(new String[locations.size()])
);
splits[i] = split;
}
if (splits.length < minNumSplits) {
LOG.warn(" The minimum desired number of splits with your configured parallelism level " +
"is {}. Current kudu splits = {}. {} instances will remain idle.",
minNumSplits,
splits.length,
(minNumSplits - splits.length)
);
}
return splits;
}
@Override
public boolean reachedEnd() throws IOException {
return endReached;
}
@Override
public KuduRow nextRecord(KuduRow reuse) throws IOException {
// check that current iterator has next rows
if (this.resultIterator.hasNext()) {
return resultIterator.next();
} else {
endReached = true;
return null;
}
}
/**
* Returns a endpoint url in the following format: <host>:<ip>
*
* @param host Hostname
* @param port Port
* @return Formatted URL
*/
private String getLocation(String host, Integer port) {
StringBuilder builder = new StringBuilder();
builder.append(host).append(":").append(port);
return builder.toString();
}
public class KuduInputSplit extends LocatableInputSplit {
private byte[] scanToken;
/**
* Creates a new KuduInputSplit
* @param splitNumber the number of the input split
* @param hostnames The names of the hosts storing the data this input split refers to.
*/
public KuduInputSplit(byte[] scanToken, final int splitNumber, final String[] hostnames) {
super(splitNumber, hostnames);
this.scanToken = scanToken;
}
public byte[] getScanToken() {
return scanToken;
}
}
}