DRILL-8317: Convert LogRegex Format Plugin to EVF V2 (#2659)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/README.md b/contrib/format-log/README.md
similarity index 100%
rename from exec/java-exec/src/main/java/org/apache/drill/exec/store/log/README.md
rename to contrib/format-log/README.md
diff --git a/contrib/format-log/pom.xml b/contrib/format-log/pom.xml
new file mode 100644
index 0000000..aed0649
--- /dev/null
+++ b/contrib/format-log/pom.xml
@@ -0,0 +1,83 @@
+<?xml version="1.0"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>drill-contrib-parent</artifactId>
+ <groupId>org.apache.drill.contrib</groupId>
+ <version>2.0.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>drill-format-log</artifactId>
+ <name>Drill : Contrib : Format : Log Regex</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.drill.exec</groupId>
+ <artifactId>drill-java-exec</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.drill.exec</groupId>
+ <artifactId>drill-java-exec</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.drill</groupId>
+ <artifactId>drill-common</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-java-sources</id>
+ <phase>process-sources</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${basedir}/target/classes/org/apache/drill/exec/store/log
+ </outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/java/org/apache/drill/exec/store/log</directory>
+ <filtering>true</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
similarity index 88%
rename from exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
rename to contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
index 1fd1ac3..91e97a6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
+++ b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
@@ -20,8 +20,9 @@
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.physical.impl.scan.convert.StandardConversions;
-import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
-import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileDescrip;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
@@ -30,7 +31,6 @@
import org.apache.drill.exec.vector.accessor.TupleWriter;
import org.apache.drill.exec.vector.accessor.ValueWriter;
import org.apache.drill.shaded.guava.com.google.common.base.Charsets;
-import org.apache.hadoop.mapred.FileSplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -41,7 +41,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-public class LogBatchReader implements ManagedReader<FileSchemaNegotiator> {
+public class LogBatchReader implements ManagedReader {
private static final Logger logger = LoggerFactory.getLogger(LogBatchReader.class);
public static final String RAW_LINE_COL_NAME = "_raw";
public static final String UNMATCHED_LINE_COL_NAME = "_unmatched_rows";
@@ -126,8 +126,7 @@
}
private final LogReaderConfig config;
- private final int maxRecords;
- private FileSplit split;
+ private final FileDescrip file;
private BufferedReader reader;
private ResultSetLoader loader;
private VectorWriter vectorWriter;
@@ -137,19 +136,13 @@
private int lineNumber;
private int errorCount;
- public LogBatchReader(LogReaderConfig config, int maxRecords) {
+ public LogBatchReader(LogReaderConfig config, FileSchemaNegotiator negotiator) {
this.config = config;
- this.maxRecords = maxRecords;
- }
-
- @Override
- public boolean open(FileSchemaNegotiator negotiator) {
- split = negotiator.split();
+ this.file = negotiator.file();
negotiator.tableSchema(config.tableSchema, true);
loader = negotiator.build();
bindColumns(loader.writer());
- openFile(negotiator);
- return true;
+ openFile();
}
private void bindColumns(RowSetLoader writer) {
@@ -188,15 +181,15 @@
}
}
- private void openFile(FileSchemaNegotiator negotiator) {
+ private void openFile() {
InputStream in;
try {
- in = negotiator.fileSystem().openPossiblyCompressedStream(split.getPath());
+ in = file.fileSystem().openPossiblyCompressedStream(file.split().getPath());
} catch (Exception e) {
throw UserException
.dataReadError(e)
.message("Failed to open input file")
- .addContext(String.format("File path: %s", split.getPath()))
+ .addContext(String.format("File path: %s", file.split().getPath()))
.addContext(loader.errorContext())
.build(logger);
}
@@ -215,10 +208,6 @@
}
private boolean nextLine(RowSetLoader rowWriter) {
- if (rowWriter.limitReached(maxRecords)) {
- return false;
- }
-
String line;
try {
line = reader.readLine();
@@ -226,7 +215,7 @@
throw UserException
.dataReadError(e)
.message("Error reading file")
- .addContext(String.format("File: %s", split.getPath()))
+ .addContext(String.format("File: %s", file.split().getPath()))
.addContext(loader.errorContext())
.build(logger);
}
@@ -278,7 +267,7 @@
try {
reader.close();
} catch (IOException e) {
- logger.warn("Error when closing file: " + split.getPath(), e);
+ logger.warn("Error when closing file: " + file.split().getPath(), e);
} finally {
reader = null;
}
@@ -288,6 +277,6 @@
public String toString() {
return String.format(
"LogRecordReader[File=%s, Line=%d]",
- split.getPath(), lineNumber);
+ file.split().getPath(), lineNumber);
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
similarity index 100%
rename from exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
rename to contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
similarity index 100%
rename from exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
rename to contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
similarity index 91%
rename from exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
rename to contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
index b80bed4..c75efdf 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
+++ b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
@@ -18,23 +18,21 @@
package org.apache.drill.exec.store.log;
-import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.physical.impl.scan.columns.ColumnsScanFramework;
-import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory;
-import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileScanBuilder;
-import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
-import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileReaderFactory;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileScanLifecycleBuilder;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.Propertied;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.server.DrillbitContext;
-import org.apache.drill.exec.server.options.OptionSet;
import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
import org.apache.drill.exec.store.dfs.easy.EasySubScan;
import org.apache.drill.exec.store.log.LogBatchReader.LogReaderConfig;
@@ -61,16 +59,14 @@
private static class LogReaderFactory extends FileReaderFactory {
private final LogReaderConfig readerConfig;
- private final int maxRecords;
- public LogReaderFactory(LogReaderConfig config, int maxRecords) {
+ public LogReaderFactory(LogReaderConfig config) {
readerConfig = config;
- this.maxRecords = maxRecords;
}
@Override
- public ManagedReader<? extends FileSchemaNegotiator> newReader() {
- return new LogBatchReader(readerConfig, maxRecords);
+ public ManagedReader newReader(FileSchemaNegotiator negotiator) {
+ return new LogBatchReader(readerConfig, negotiator);
}
}
@@ -91,7 +87,7 @@
.fsConf(fsConf)
.defaultName(PLUGIN_NAME)
.readerOperatorType(OPERATOR_TYPE)
- .scanVersion(ScanFrameworkVersion.EVF_V1)
+ .scanVersion(ScanFrameworkVersion.EVF_V2)
.supportsLimitPushdown(true)
.build();
}
@@ -143,8 +139,7 @@
* </ul>
*/
@Override
- protected FileScanBuilder frameworkBuilder(EasySubScan scan, OptionSet options) throws ExecutionSetupException {
-
+ protected void configureScan(FileScanLifecycleBuilder builder, EasySubScan scan) {
// Pattern and schema identical across readers; define
// up front.
final TupleMetadata providedSchema = scan.getSchema();
@@ -169,8 +164,6 @@
// Use the file framework to enable support for implicit and partition
// columns.
- final FileScanBuilder builder = new FileScanBuilder();
- initScanBuilder(builder, scan);
if (hasSchema) {
if (!hasColumns) {
@@ -207,14 +200,13 @@
// Pass along the class that will create a batch reader on demand for
// each input file.
- builder.setReaderFactory(new LogReaderFactory(
- new LogReaderConfig(this, pattern, providedSchema, tableSchema,
- readerSchema, !hasSchema, groupCount, maxErrors(providedSchema)), scan.getMaxRecords()));
+ builder.readerFactory(new LogReaderFactory(
+ new LogReaderConfig(this, pattern, providedSchema, tableSchema,
+ readerSchema, !hasSchema, groupCount, maxErrors(providedSchema))));
// The default type of regex columns is nullable VarChar,
// so let's use that as the missing column type.
builder.nullType(Types.optional(MinorType.VARCHAR));
- return builder;
}
/**
diff --git a/contrib/format-log/src/main/resources/drill-module.conf b/contrib/format-log/src/main/resources/drill-module.conf
new file mode 100644
index 0000000..68df1dd
--- /dev/null
+++ b/contrib/format-log/src/main/resources/drill-module.conf
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file tells Drill to consider this module when class path scanning.
+# This file can also include any supplementary configuration information.
+# This file is in HOCON format, see https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
+
+drill.classpath.scanning.packages += "org.apache.drill.exec.store.log"
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java b/contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
similarity index 100%
rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
rename to contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java b/contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
similarity index 100%
rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
rename to contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
diff --git a/exec/java-exec/src/test/resources/regex/baddates.log2 b/contrib/format-log/src/test/resources/regex/baddates.log2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/baddates.log2
rename to contrib/format-log/src/test/resources/regex/baddates.log2
diff --git a/exec/java-exec/src/test/resources/regex/firewall.ssdlog b/contrib/format-log/src/test/resources/regex/firewall.ssdlog
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/firewall.ssdlog
rename to contrib/format-log/src/test/resources/regex/firewall.ssdlog
diff --git a/exec/java-exec/src/test/resources/regex/large.log1 b/contrib/format-log/src/test/resources/regex/large.log1
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/large.log1
rename to contrib/format-log/src/test/resources/regex/large.log1
diff --git a/exec/java-exec/src/test/resources/regex/mysql.sqllog b/contrib/format-log/src/test/resources/regex/mysql.sqllog
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/mysql.sqllog
rename to contrib/format-log/src/test/resources/regex/mysql.sqllog
diff --git a/exec/java-exec/src/test/resources/regex/mysql.sqllog2 b/contrib/format-log/src/test/resources/regex/mysql.sqllog2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/mysql.sqllog2
rename to contrib/format-log/src/test/resources/regex/mysql.sqllog2
diff --git a/exec/java-exec/src/test/resources/regex/simple.log1 b/contrib/format-log/src/test/resources/regex/simple.log1
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/simple.log1
rename to contrib/format-log/src/test/resources/regex/simple.log1
diff --git a/exec/java-exec/src/test/resources/regex/simple.log2 b/contrib/format-log/src/test/resources/regex/simple.log2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/simple.log2
rename to contrib/format-log/src/test/resources/regex/simple.log2
diff --git a/contrib/pom.xml b/contrib/pom.xml
index d77bfa2..6fd6925 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -48,6 +48,7 @@
<module>format-excel</module>
<module>format-httpd</module>
<module>format-esri</module>
+ <module>format-log</module>
<module>format-pdf</module>
<module>format-hdf5</module>
<module>format-sas</module>
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 60a2bb1..ce39807 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -471,6 +471,11 @@
</dependency>
<dependency>
<groupId>org.apache.drill.contrib</groupId>
+ <artifactId>drill-format-log</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.drill.contrib</groupId>
<artifactId>drill-druid-storage</artifactId>
<version>${project.version}</version>
</dependency>
diff --git a/distribution/src/assemble/component.xml b/distribution/src/assemble/component.xml
index 6927b05..38d79f0 100644
--- a/distribution/src/assemble/component.xml
+++ b/distribution/src/assemble/component.xml
@@ -49,6 +49,7 @@
<include>org.apache.drill.contrib:drill-format-image:jar</include>
<include>org.apache.drill.contrib:drill-format-pcapng:jar</include>
<include>org.apache.drill.contrib:drill-format-hdf5:jar</include>
+ <include>org.apache.drill.contrib:drill-format-log:jar</include>
<include>org.apache.drill.contrib:drill-format-ltsv:jar</include>
<include>org.apache.drill.contrib:drill-format-httpd:jar</include>
<include>org.apache.drill.contrib:drill-format-pdf:jar</include>