blob: 7da7a95d1f5be9c9aa09b39e8a09c0200c780583 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.httpd;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
import io.netty.buffer.DrillBuf;
import nl.basjes.parse.core.Casts;
import nl.basjes.parse.core.Parser;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import nl.basjes.parse.core.exceptions.InvalidDissectorException;
import nl.basjes.parse.core.exceptions.MissingDissectorsException;
import nl.basjes.parse.httpdlog.HttpdLoglineParser;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class HttpdParser {
private static final Logger logger = LoggerFactory.getLogger(HttpdParser.class);
public static final String PARSER_WILDCARD = ".*";
public static final String SAFE_WILDCARD = "_$";
public static final String SAFE_SEPARATOR = "_";
public static final String REMAPPING_FLAG = "#";
private final Parser<HttpdLogRecord> parser;
private final HttpdLogRecord record;
public static final HashMap<String, String> LOGFIELDS = new HashMap<String, String>();
static {
LOGFIELDS.put("connection.client.ip", "IP:connection.client.ip");
LOGFIELDS.put("connection.client.ip.last", "IP:connection.client.ip.last");
LOGFIELDS.put("connection.client.ip.original", "IP:connection.client.ip.original");
LOGFIELDS.put("connection.client.ip.last", "IP:connection.client.ip.last");
LOGFIELDS.put("connection.client.peerip", "IP:connection.client.peerip");
LOGFIELDS.put("connection.client.peerip.last", "IP:connection.client.peerip.last");
LOGFIELDS.put("connection.client.peerip.original", "IP:connection.client.peerip.original");
LOGFIELDS.put("connection.client.peerip.last", "IP:connection.client.peerip.last");
LOGFIELDS.put("connection.server.ip", "IP:connection.server.ip");
LOGFIELDS.put("connection.server.ip.last", "IP:connection.server.ip.last");
LOGFIELDS.put("connection.server.ip.original", "IP:connection.server.ip.original");
LOGFIELDS.put("connection.server.ip.last", "IP:connection.server.ip.last");
LOGFIELDS.put("response.body.bytes", "BYTES:response.body.bytes");
LOGFIELDS.put("response.body.bytes.last", "BYTES:response.body.bytes.last");
LOGFIELDS.put("response.body.bytes.original", "BYTES:response.body.bytes.original");
LOGFIELDS.put("response.body.bytes.last", "BYTES:response.body.bytes.last");
LOGFIELDS.put("response.body.bytesclf", "BYTES:response.body.bytesclf");
LOGFIELDS.put("response.body.bytes", "BYTESCLF:response.body.bytes");
LOGFIELDS.put("response.body.bytes.last", "BYTESCLF:response.body.bytes.last");
LOGFIELDS.put("response.body.bytes.original", "BYTESCLF:response.body.bytes.original");
LOGFIELDS.put("response.body.bytes.last", "BYTESCLF:response.body.bytes.last");
LOGFIELDS.put("request.cookies.foobar", "HTTP.COOKIE:request.cookies.foobar");
LOGFIELDS.put("server.environment.foobar", "VARIABLE:server.environment.foobar");
LOGFIELDS.put("server.filename", "FILENAME:server.filename");
LOGFIELDS.put("server.filename.last", "FILENAME:server.filename.last");
LOGFIELDS.put("server.filename.original", "FILENAME:server.filename.original");
LOGFIELDS.put("server.filename.last", "FILENAME:server.filename.last");
LOGFIELDS.put("connection.client.host", "IP:connection.client.host");
LOGFIELDS.put("connection.client.host.last", "IP:connection.client.host.last");
LOGFIELDS.put("connection.client.host.original", "IP:connection.client.host.original");
LOGFIELDS.put("connection.client.host.last", "IP:connection.client.host.last");
LOGFIELDS.put("request.protocol", "PROTOCOL:request.protocol");
LOGFIELDS.put("request.protocol.last", "PROTOCOL:request.protocol.last");
LOGFIELDS.put("request.protocol.original", "PROTOCOL:request.protocol.original");
LOGFIELDS.put("request.protocol.last", "PROTOCOL:request.protocol.last");
LOGFIELDS.put("request.header.foobar", "HTTP.HEADER:request.header.foobar");
LOGFIELDS.put("request.trailer.foobar", "HTTP.TRAILER:request.trailer.foobar");
LOGFIELDS.put("connection.keepalivecount", "NUMBER:connection.keepalivecount");
LOGFIELDS.put("connection.keepalivecount.last", "NUMBER:connection.keepalivecount.last");
LOGFIELDS.put("connection.keepalivecount.original", "NUMBER:connection.keepalivecount.original");
LOGFIELDS.put("connection.keepalivecount.last", "NUMBER:connection.keepalivecount.last");
LOGFIELDS.put("connection.client.logname", "NUMBER:connection.client.logname");
LOGFIELDS.put("connection.client.logname.last", "NUMBER:connection.client.logname.last");
LOGFIELDS.put("connection.client.logname.original", "NUMBER:connection.client.logname.original");
LOGFIELDS.put("connection.client.logname.last", "NUMBER:connection.client.logname.last");
LOGFIELDS.put("request.errorlogid", "STRING:request.errorlogid");
LOGFIELDS.put("request.errorlogid.last", "STRING:request.errorlogid.last");
LOGFIELDS.put("request.errorlogid.original", "STRING:request.errorlogid.original");
LOGFIELDS.put("request.errorlogid.last", "STRING:request.errorlogid.last");
LOGFIELDS.put("request.method", "HTTP.METHOD:request.method");
LOGFIELDS.put("request.method.last", "HTTP.METHOD:request.method.last");
LOGFIELDS.put("request.method.original", "HTTP.METHOD:request.method.original");
LOGFIELDS.put("request.method.last", "HTTP.METHOD:request.method.last");
LOGFIELDS.put("server.module_note.foobar", "STRING:server.module_note.foobar");
LOGFIELDS.put("response.header.foobar", "HTTP.HEADER:response.header.foobar");
LOGFIELDS.put("response.trailer.foobar", "HTTP.TRAILER:response.trailer.foobar");
LOGFIELDS.put("request.server.port.canonical", "PORT:request.server.port.canonical");
LOGFIELDS.put("request.server.port.canonical.last", "PORT:request.server.port.canonical.last");
LOGFIELDS.put("request.server.port.canonical.original", "PORT:request.server.port.canonical.original");
LOGFIELDS.put("request.server.port.canonical.last", "PORT:request.server.port.canonical.last");
LOGFIELDS.put("connection.server.port.canonical", "PORT:connection.server.port.canonical");
LOGFIELDS.put("connection.server.port.canonical.last", "PORT:connection.server.port.canonical.last");
LOGFIELDS.put("connection.server.port.canonical.original", "PORT:connection.server.port.canonical.original");
LOGFIELDS.put("connection.server.port.canonical.last", "PORT:connection.server.port.canonical.last");
LOGFIELDS.put("connection.server.port", "PORT:connection.server.port");
LOGFIELDS.put("connection.server.port.last", "PORT:connection.server.port.last");
LOGFIELDS.put("connection.server.port.original", "PORT:connection.server.port.original");
LOGFIELDS.put("connection.server.port.last", "PORT:connection.server.port.last");
LOGFIELDS.put("connection.client.port", "PORT:connection.client.port");
LOGFIELDS.put("connection.client.port.last", "PORT:connection.client.port.last");
LOGFIELDS.put("connection.client.port.original", "PORT:connection.client.port.original");
LOGFIELDS.put("connection.client.port.last", "PORT:connection.client.port.last");
LOGFIELDS.put("connection.server.child.processid", "NUMBER:connection.server.child.processid");
LOGFIELDS.put("connection.server.child.processid.last", "NUMBER:connection.server.child.processid.last");
LOGFIELDS.put("connection.server.child.processid.original", "NUMBER:connection.server.child.processid.original");
LOGFIELDS.put("connection.server.child.processid.last", "NUMBER:connection.server.child.processid.last");
LOGFIELDS.put("connection.server.child.processid", "NUMBER:connection.server.child.processid");
LOGFIELDS.put("connection.server.child.processid.last", "NUMBER:connection.server.child.processid.last");
LOGFIELDS.put("connection.server.child.processid.original", "NUMBER:connection.server.child.processid.original");
LOGFIELDS.put("connection.server.child.processid.last", "NUMBER:connection.server.child.processid.last");
LOGFIELDS.put("connection.server.child.threadid", "NUMBER:connection.server.child.threadid");
LOGFIELDS.put("connection.server.child.threadid.last", "NUMBER:connection.server.child.threadid.last");
LOGFIELDS.put("connection.server.child.threadid.original", "NUMBER:connection.server.child.threadid.original");
LOGFIELDS.put("connection.server.child.threadid.last", "NUMBER:connection.server.child.threadid.last");
LOGFIELDS.put("connection.server.child.hexthreadid", "NUMBER:connection.server.child.hexthreadid");
LOGFIELDS.put("connection.server.child.hexthreadid.last", "NUMBER:connection.server.child.hexthreadid.last");
LOGFIELDS.put("connection.server.child.hexthreadid.original", "NUMBER:connection.server.child.hexthreadid.original");
LOGFIELDS.put("connection.server.child.hexthreadid.last", "NUMBER:connection.server.child.hexthreadid.last");
LOGFIELDS.put("request.querystring", "HTTP.QUERYSTRING:request.querystring");
LOGFIELDS.put("request.querystring.last", "HTTP.QUERYSTRING:request.querystring.last");
LOGFIELDS.put("request.querystring.original", "HTTP.QUERYSTRING:request.querystring.original");
LOGFIELDS.put("request.querystring.last", "HTTP.QUERYSTRING:request.querystring.last");
LOGFIELDS.put("request.firstline", "HTTP.FIRSTLINE:request.firstline");
LOGFIELDS.put("request.firstline.original", "HTTP.FIRSTLINE:request.firstline.original");
LOGFIELDS.put("request.firstline.original", "HTTP.FIRSTLINE:request.firstline.original");
LOGFIELDS.put("request.firstline.last", "HTTP.FIRSTLINE:request.firstline.last");
LOGFIELDS.put("request.handler", "STRING:request.handler");
LOGFIELDS.put("request.handler.last", "STRING:request.handler.last");
LOGFIELDS.put("request.handler.original", "STRING:request.handler.original");
LOGFIELDS.put("request.handler.last", "STRING:request.handler.last");
LOGFIELDS.put("request.status", "STRING:request.status");
LOGFIELDS.put("request.status.original", "STRING:request.status.original");
LOGFIELDS.put("request.status.original", "STRING:request.status.original");
LOGFIELDS.put("request.status.last", "STRING:request.status.last");
LOGFIELDS.put("request.receive.time", "TIME.STAMP:request.receive.time");
LOGFIELDS.put("request.receive.time.last", "TIME.STAMP:request.receive.time.last");
LOGFIELDS.put("request.receive.time.original", "TIME.STAMP:request.receive.time.original");
LOGFIELDS.put("request.receive.time.last", "TIME.STAMP:request.receive.time.last");
LOGFIELDS.put("request.receive.time.year", "TIME.YEAR:request.receive.time.year");
LOGFIELDS.put("request.receive.time.begin.year", "TIME.YEAR:request.receive.time.begin.year");
LOGFIELDS.put("request.receive.time.end.year", "TIME.YEAR:request.receive.time.end.year");
LOGFIELDS.put("request.receive.time.sec", "TIME.SECONDS:request.receive.time.sec");
LOGFIELDS.put("request.receive.time.sec", "TIME.SECONDS:request.receive.time.sec");
LOGFIELDS.put("request.receive.time.sec.original", "TIME.SECONDS:request.receive.time.sec.original");
LOGFIELDS.put("request.receive.time.sec.last", "TIME.SECONDS:request.receive.time.sec.last");
LOGFIELDS.put("request.receive.time.begin.sec", "TIME.SECONDS:request.receive.time.begin.sec");
LOGFIELDS.put("request.receive.time.begin.sec.last", "TIME.SECONDS:request.receive.time.begin.sec.last");
LOGFIELDS.put("request.receive.time.begin.sec.original", "TIME.SECONDS:request.receive.time.begin.sec.original");
LOGFIELDS.put("request.receive.time.begin.sec.last", "TIME.SECONDS:request.receive.time.begin.sec.last");
LOGFIELDS.put("request.receive.time.end.sec", "TIME.SECONDS:request.receive.time.end.sec");
LOGFIELDS.put("request.receive.time.end.sec.last", "TIME.SECONDS:request.receive.time.end.sec.last");
LOGFIELDS.put("request.receive.time.end.sec.original", "TIME.SECONDS:request.receive.time.end.sec.original");
LOGFIELDS.put("request.receive.time.end.sec.last", "TIME.SECONDS:request.receive.time.end.sec.last");
LOGFIELDS.put("request.receive.time.begin.msec", "TIME.EPOCH:request.receive.time.begin.msec");
LOGFIELDS.put("request.receive.time.msec", "TIME.EPOCH:request.receive.time.msec");
LOGFIELDS.put("request.receive.time.msec.last", "TIME.EPOCH:request.receive.time.msec.last");
LOGFIELDS.put("request.receive.time.msec.original", "TIME.EPOCH:request.receive.time.msec.original");
LOGFIELDS.put("request.receive.time.msec.last", "TIME.EPOCH:request.receive.time.msec.last");
LOGFIELDS.put("request.receive.time.begin.msec", "TIME.EPOCH:request.receive.time.begin.msec");
LOGFIELDS.put("request.receive.time.begin.msec.last", "TIME.EPOCH:request.receive.time.begin.msec.last");
LOGFIELDS.put("request.receive.time.begin.msec.original", "TIME.EPOCH:request.receive.time.begin.msec.original");
LOGFIELDS.put("request.receive.time.begin.msec.last", "TIME.EPOCH:request.receive.time.begin.msec.last");
LOGFIELDS.put("request.receive.time.end.msec", "TIME.EPOCH:request.receive.time.end.msec");
LOGFIELDS.put("request.receive.time.end.msec.last", "TIME.EPOCH:request.receive.time.end.msec.last");
LOGFIELDS.put("request.receive.time.end.msec.original", "TIME.EPOCH:request.receive.time.end.msec.original");
LOGFIELDS.put("request.receive.time.end.msec.last", "TIME.EPOCH:request.receive.time.end.msec.last");
LOGFIELDS.put("request.receive.time.begin.usec", "TIME.EPOCH.USEC:request.receive.time.begin.usec");
LOGFIELDS.put("request.receive.time.usec", "TIME.EPOCH.USEC:request.receive.time.usec");
LOGFIELDS.put("request.receive.time.usec.last", "TIME.EPOCH.USEC:request.receive.time.usec.last");
LOGFIELDS.put("request.receive.time.usec.original", "TIME.EPOCH.USEC:request.receive.time.usec.original");
LOGFIELDS.put("request.receive.time.usec.last", "TIME.EPOCH.USEC:request.receive.time.usec.last");
LOGFIELDS.put("request.receive.time.begin.usec", "TIME.EPOCH.USEC:request.receive.time.begin.usec");
LOGFIELDS.put("request.receive.time.begin.usec.last", "TIME.EPOCH.USEC:request.receive.time.begin.usec.last");
LOGFIELDS.put("request.receive.time.begin.usec.original", "TIME.EPOCH.USEC:request.receive.time.begin.usec.original");
LOGFIELDS.put("request.receive.time.begin.usec.last", "TIME.EPOCH.USEC:request.receive.time.begin.usec.last");
LOGFIELDS.put("request.receive.time.end.usec", "TIME.EPOCH.USEC:request.receive.time.end.usec");
LOGFIELDS.put("request.receive.time.end.usec.last", "TIME.EPOCH.USEC:request.receive.time.end.usec.last");
LOGFIELDS.put("request.receive.time.end.usec.original", "TIME.EPOCH.USEC:request.receive.time.end.usec.original");
LOGFIELDS.put("request.receive.time.end.usec.last", "TIME.EPOCH.USEC:request.receive.time.end.usec.last");
LOGFIELDS.put("request.receive.time.begin.msec_frac", "TIME.EPOCH:request.receive.time.begin.msec_frac");
LOGFIELDS.put("request.receive.time.msec_frac", "TIME.EPOCH:request.receive.time.msec_frac");
LOGFIELDS.put("request.receive.time.msec_frac.last", "TIME.EPOCH:request.receive.time.msec_frac.last");
LOGFIELDS.put("request.receive.time.msec_frac.original", "TIME.EPOCH:request.receive.time.msec_frac.original");
LOGFIELDS.put("request.receive.time.msec_frac.last", "TIME.EPOCH:request.receive.time.msec_frac.last");
LOGFIELDS.put("request.receive.time.begin.msec_frac", "TIME.EPOCH:request.receive.time.begin.msec_frac");
LOGFIELDS.put("request.receive.time.begin.msec_frac.last", "TIME.EPOCH:request.receive.time.begin.msec_frac.last");
LOGFIELDS.put("request.receive.time.begin.msec_frac.original", "TIME.EPOCH:request.receive.time.begin.msec_frac.original");
LOGFIELDS.put("request.receive.time.begin.msec_frac.last", "TIME.EPOCH:request.receive.time.begin.msec_frac.last");
LOGFIELDS.put("request.receive.time.end.msec_frac", "TIME.EPOCH:request.receive.time.end.msec_frac");
LOGFIELDS.put("request.receive.time.end.msec_frac.last", "TIME.EPOCH:request.receive.time.end.msec_frac.last");
LOGFIELDS.put("request.receive.time.end.msec_frac.original", "TIME.EPOCH:request.receive.time.end.msec_frac.original");
LOGFIELDS.put("request.receive.time.end.msec_frac.last", "TIME.EPOCH:request.receive.time.end.msec_frac.last");
LOGFIELDS.put("request.receive.time.begin.usec_frac", "FRAC:request.receive.time.begin.usec_frac");
LOGFIELDS.put("request.receive.time.usec_frac", "FRAC:request.receive.time.usec_frac");
LOGFIELDS.put("request.receive.time.usec_frac.last", "FRAC:request.receive.time.usec_frac.last");
LOGFIELDS.put("request.receive.time.usec_frac.original", "FRAC:request.receive.time.usec_frac.original");
LOGFIELDS.put("request.receive.time.usec_frac.last", "FRAC:request.receive.time.usec_frac.last");
LOGFIELDS.put("request.receive.time.begin.usec_frac", "FRAC:request.receive.time.begin.usec_frac");
LOGFIELDS.put("request.receive.time.begin.usec_frac.last", "FRAC:request.receive.time.begin.usec_frac.last");
LOGFIELDS.put("request.receive.time.begin.usec_frac.original", "FRAC:request.receive.time.begin.usec_frac.original");
LOGFIELDS.put("request.receive.time.begin.usec_frac.last", "FRAC:request.receive.time.begin.usec_frac.last");
LOGFIELDS.put("request.receive.time.end.usec_frac", "FRAC:request.receive.time.end.usec_frac");
LOGFIELDS.put("request.receive.time.end.usec_frac.last", "FRAC:request.receive.time.end.usec_frac.last");
LOGFIELDS.put("request.receive.time.end.usec_frac.original", "FRAC:request.receive.time.end.usec_frac.original");
LOGFIELDS.put("request.receive.time.end.usec_frac.last", "FRAC:request.receive.time.end.usec_frac.last");
LOGFIELDS.put("response.server.processing.time", "SECONDS:response.server.processing.time");
LOGFIELDS.put("response.server.processing.time.original", "SECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.original", "SECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.last", "SECONDS:response.server.processing.time.last");
LOGFIELDS.put("server.process.time", "MICROSECONDS:server.process.time");
LOGFIELDS.put("response.server.processing.time", "MICROSECONDS:response.server.processing.time");
LOGFIELDS.put("response.server.processing.time.original", "MICROSECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.original", "MICROSECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.last", "MICROSECONDS:response.server.processing.time.last");
LOGFIELDS.put("response.server.processing.time", "MICROSECONDS:response.server.processing.time");
LOGFIELDS.put("response.server.processing.time.original", "MICROSECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.original", "MICROSECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.last", "MICROSECONDS:response.server.processing.time.last");
LOGFIELDS.put("response.server.processing.time", "MILLISECONDS:response.server.processing.time");
LOGFIELDS.put("response.server.processing.time.original", "MILLISECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.original", "MILLISECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.last", "MILLISECONDS:response.server.processing.time.last");
LOGFIELDS.put("response.server.processing.time", "SECONDS:response.server.processing.time");
LOGFIELDS.put("response.server.processing.time.original", "SECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.original", "SECONDS:response.server.processing.time.original");
LOGFIELDS.put("response.server.processing.time.last", "SECONDS:response.server.processing.time.last");
LOGFIELDS.put("connection.client.user", "STRING:connection.client.user");
LOGFIELDS.put("connection.client.user.last", "STRING:connection.client.user.last");
LOGFIELDS.put("connection.client.user.original", "STRING:connection.client.user.original");
LOGFIELDS.put("connection.client.user.last", "STRING:connection.client.user.last");
LOGFIELDS.put("request.urlpath", "URI:request.urlpath");
LOGFIELDS.put("request.urlpath.original", "URI:request.urlpath.original");
LOGFIELDS.put("request.urlpath.original", "URI:request.urlpath.original");
LOGFIELDS.put("request.urlpath.last", "URI:request.urlpath.last");
LOGFIELDS.put("connection.server.name.canonical", "STRING:connection.server.name.canonical");
LOGFIELDS.put("connection.server.name.canonical.last", "STRING:connection.server.name.canonical.last");
LOGFIELDS.put("connection.server.name.canonical.original", "STRING:connection.server.name.canonical.original");
LOGFIELDS.put("connection.server.name.canonical.last", "STRING:connection.server.name.canonical.last");
LOGFIELDS.put("connection.server.name", "STRING:connection.server.name");
LOGFIELDS.put("connection.server.name.last", "STRING:connection.server.name.last");
LOGFIELDS.put("connection.server.name.original", "STRING:connection.server.name.original");
LOGFIELDS.put("connection.server.name.last", "STRING:connection.server.name.last");
LOGFIELDS.put("response.connection.status", "HTTP.CONNECTSTATUS:response.connection.status");
LOGFIELDS.put("response.connection.status.last", "HTTP.CONNECTSTATUS:response.connection.status.last");
LOGFIELDS.put("response.connection.status.original", "HTTP.CONNECTSTATUS:response.connection.status.original");
LOGFIELDS.put("response.connection.status.last", "HTTP.CONNECTSTATUS:response.connection.status.last");
LOGFIELDS.put("request.bytes", "BYTES:request.bytes");
LOGFIELDS.put("request.bytes.last", "BYTES:request.bytes.last");
LOGFIELDS.put("request.bytes.original", "BYTES:request.bytes.original");
LOGFIELDS.put("request.bytes.last", "BYTES:request.bytes.last");
LOGFIELDS.put("response.bytes", "BYTES:response.bytes");
LOGFIELDS.put("response.bytes.last", "BYTES:response.bytes.last");
LOGFIELDS.put("response.bytes.original", "BYTES:response.bytes.original");
LOGFIELDS.put("response.bytes.last", "BYTES:response.bytes.last");
LOGFIELDS.put("total.bytes", "BYTES:total.bytes");
LOGFIELDS.put("total.bytes.last", "BYTES:total.bytes.last");
LOGFIELDS.put("total.bytes.original", "BYTES:total.bytes.original");
LOGFIELDS.put("total.bytes.last", "BYTES:total.bytes.last");
LOGFIELDS.put("request.cookies", "HTTP.COOKIES:request.cookies");
LOGFIELDS.put("request.cookies.last", "HTTP.COOKIES:request.cookies.last");
LOGFIELDS.put("request.cookies.original", "HTTP.COOKIES:request.cookies.original");
LOGFIELDS.put("request.cookies.last", "HTTP.COOKIES:request.cookies.last");
LOGFIELDS.put("response.cookies", "HTTP.SETCOOKIES:response.cookies");
LOGFIELDS.put("response.cookies.last", "HTTP.SETCOOKIES:response.cookies.last");
LOGFIELDS.put("response.cookies.original", "HTTP.SETCOOKIES:response.cookies.original");
LOGFIELDS.put("response.cookies.last", "HTTP.SETCOOKIES:response.cookies.last");
LOGFIELDS.put("request.user-agent", "HTTP.USERAGENT:request.user-agent");
LOGFIELDS.put("request.user-agent.last", "HTTP.USERAGENT:request.user-agent.last");
LOGFIELDS.put("request.user-agent.original", "HTTP.USERAGENT:request.user-agent.original");
LOGFIELDS.put("request.user-agent.last", "HTTP.USERAGENT:request.user-agent.last");
LOGFIELDS.put("request.referer", "HTTP.URI:request.referer");
LOGFIELDS.put("request.referer.last", "HTTP.URI:request.referer.last");
LOGFIELDS.put("request.referer.original", "HTTP.URI:request.referer.original");
LOGFIELDS.put("request.referer.last", "HTTP.URI:request.referer.last");
}
public HttpdParser(final MapWriter mapWriter, final DrillBuf managedBuffer, final String logFormat,
final String timestampFormat, final Map<String, String> fieldMapping)
throws NoSuchMethodException, MissingDissectorsException, InvalidDissectorException {
Preconditions.checkArgument(logFormat != null && !logFormat.trim().isEmpty(), "logFormat cannot be null or empty");
this.record = new HttpdLogRecord(managedBuffer, timestampFormat);
this.parser = new HttpdLoglineParser<>(HttpdLogRecord.class, logFormat, timestampFormat);
setupParser(mapWriter, logFormat, fieldMapping);
if (timestampFormat != null && !timestampFormat.trim().isEmpty()) {
logger.info("Custom timestamp format has been specified. This is an informational note only as custom timestamps is rather unusual.");
}
if (logFormat.contains("\n")) {
logger.info("Specified logformat is a multiline log format: {}", logFormat);
}
}
/**
* We do not expose the underlying parser or the record which is used to manage the writers.
*
* @param line log line to tear apart.
* @throws DissectionFailure
* @throws InvalidDissectorException
* @throws MissingDissectorsException
*/
public void parse(final String line) throws DissectionFailure, InvalidDissectorException, MissingDissectorsException {
parser.parse(record, line);
record.finishRecord();
}
/**
* In order to define a type remapping the format of the field configuration will look like: <br/>
* HTTP.URI:request.firstline.uri.query.[parameter name] <br/>
*
* @param parser Add type remapping to this parser instance.
* @param fieldName request.firstline.uri.query.[parameter_name]
* @param fieldType HTTP.URI, etc..
*/
private void addTypeRemapping(final Parser<HttpdLogRecord> parser, final String fieldName, final String fieldType) {
logger.debug("Adding type remapping - fieldName: {}, fieldType: {}", fieldName, fieldType);
parser.addTypeRemapping(fieldName, fieldType);
}
/**
* The parser deals with dots unlike Drill wanting underscores request_referer. For the sake of simplicity we are
* going replace the dots. The resultant output field will look like: request.referer.<br>
* Additionally, wild cards will get replaced with .*
*
* @param drillFieldName name to be cleansed.
* @return
*/
public static String parserFormattedFieldName(String drillFieldName) {
//The Useragent fields contain a dash which causes potential problems if the field name is not escaped properly
//This removes the dash
if (drillFieldName.contains("useragent")) {
drillFieldName = drillFieldName.replace("useragent", "user-agent");
}
String tempFieldName;
tempFieldName = LOGFIELDS.get(drillFieldName);
return tempFieldName.replace(SAFE_WILDCARD, PARSER_WILDCARD).replaceAll(SAFE_SEPARATOR, ".").replaceAll("\\.\\.", "_");
}
/**
* Drill cannot deal with fields with dots in them like request.referer. For the sake of simplicity we are going
* ensure the field name is cleansed. The resultant output field will look like: request_referer.<br>
* Additionally, wild cards will get replaced with _$
*
* @param parserFieldName name to be cleansed.
* @return
*/
public static String drillFormattedFieldName(String parserFieldName) {
//The Useragent fields contain a dash which causes potential problems if the field name is not escaped properly
//This removes the dash
if (parserFieldName.contains("user-agent")) {
parserFieldName = parserFieldName.replace("user-agent", "useragent");
}
if (parserFieldName.contains(":")) {
String[] fieldPart = parserFieldName.split(":");
return fieldPart[1].replaceAll("_", "__").replace(PARSER_WILDCARD, SAFE_WILDCARD).replaceAll("\\.", SAFE_SEPARATOR);
} else {
return parserFieldName.replaceAll("_", "__").replace(PARSER_WILDCARD, SAFE_WILDCARD).replaceAll("\\.", SAFE_SEPARATOR);
}
}
private void setupParser(final MapWriter mapWriter, final String logFormat, final Map<String, String> fieldMapping)
throws NoSuchMethodException, MissingDissectorsException, InvalidDissectorException {
/**
* If the user has selected fields, then we will use them to configure the parser because this would be the most
* efficient way to parse the log.
*/
final Map<String, String> requestedPaths;
final List<String> allParserPaths = parser.getPossiblePaths();
if (fieldMapping != null && !fieldMapping.isEmpty()) {
logger.debug("Using fields defined by user.");
requestedPaths = fieldMapping;
} else {
/**
* Use all possible paths that the parser has determined from the specified log format.
*/
logger.debug("No fields defined by user, defaulting to all possible fields.");
requestedPaths = Maps.newHashMap();
for (final String parserPath : allParserPaths) {
requestedPaths.put(drillFormattedFieldName(parserPath), parserPath);
}
}
/**
* By adding the parse target to the dummy instance we activate it for use. Which we can then use to find out which
* paths cast to which native data types. After we are done figuring this information out, we throw this away
* because this will be the slowest parsing path possible for the specified format.
*/
Parser<Object> dummy = new HttpdLoglineParser<>(Object.class, logFormat);
dummy.addParseTarget(String.class.getMethod("indexOf", String.class), allParserPaths);
for (final Map.Entry<String, String> entry : requestedPaths.entrySet()) {
final EnumSet<Casts> casts;
/**
* Check the field specified by the user to see if it is supposed to be remapped.
*/
if (entry.getValue().startsWith(REMAPPING_FLAG)) {
/**
* Because this field is being remapped we need to replace the field name that the parser uses.
*/
entry.setValue(entry.getValue().substring(REMAPPING_FLAG.length()));
final String[] pieces = entry.getValue().split(":");
addTypeRemapping(parser, pieces[1], pieces[0]);
casts = Casts.STRING_ONLY;
} else {
casts = dummy.getCasts(entry.getValue());
}
logger.debug("Setting up drill field: {}, parser field: {}, which casts as: {}", entry.getKey(), entry.getValue(), casts);
record.addField(parser, mapWriter, casts, entry.getValue(), entry.getKey());
}
}
}