blob: bd28978bcf5e2d6ac702a98f36f36c5ac3b0dcf8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.pdf;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.physical.impl.scan.v3.file.FileReaderFactory;
import org.apache.drill.exec.physical.impl.scan.v3.file.FileScanLifecycleBuilder;
import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
import org.apache.drill.exec.store.dfs.easy.EasySubScan;
import org.apache.hadoop.conf.Configuration;
public class PdfFormatPlugin extends EasyFormatPlugin<PdfFormatConfig> {
protected static final String DEFAULT_NAME = "pdf";
private static class PdfReaderFactory extends FileReaderFactory {
private final PdfBatchReader.PdfReaderConfig readerConfig;
public PdfReaderFactory(PdfBatchReader.PdfReaderConfig config) {
readerConfig = config;
}
@Override
public ManagedReader newReader(FileSchemaNegotiator negotiator) {
return new PdfBatchReader(readerConfig, negotiator);
}
}
public PdfFormatPlugin(String name, DrillbitContext context,
Configuration fsConf, StoragePluginConfig storageConfig,
PdfFormatConfig formatConfig) {
super(name, easyConfig(fsConf, formatConfig), context, storageConfig, formatConfig);
}
private static EasyFormatPlugin.EasyFormatConfig easyConfig(Configuration fsConf, PdfFormatConfig pluginConfig) {
return EasyFormatConfig.builder()
.readable(true)
.writable(false)
.blockSplittable(false)
.compressible(true)
.supportsProjectPushdown(true)
.extensions(pluginConfig.extensions())
.fsConf(fsConf)
.defaultName(DEFAULT_NAME)
.scanVersion(ScanFrameworkVersion.EVF_V2)
.supportsLimitPushdown(true)
.build();
}
@Override
protected void configureScan(FileScanLifecycleBuilder builder, EasySubScan scan) {
PdfBatchReader.PdfReaderConfig readerConfig = new PdfBatchReader.PdfReaderConfig(this);
builder.nullType(Types.optional(TypeProtos.MinorType.VARCHAR));
builder.readerFactory(new PdfReaderFactory(readerConfig));
}
}