blob: e6a7c79b77e05eba2ae595c3bd866b6eb84c49e8 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.metron.parsing.parsers;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.metron.parsing.utils.GrokUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.code.regexp.Matcher;
import com.google.code.regexp.Pattern;
public class MetronGrok implements Serializable {
private static final long serialVersionUID = 2002441320075020721L;
private static final Logger LOG = LoggerFactory.getLogger(MetronGrok.class);
/**
* Named regex of the originalGrokPattern.
*/
private String namedRegex;
/**
* Map of the named regex of the originalGrokPattern
* with id = namedregexid and value = namedregex.
*/
private Map<String, String> namedRegexCollection;
/**
* Original {@code Grok} pattern (expl: %{IP}).
*/
private String originalGrokPattern;
/**
* Pattern of the namedRegex.
*/
private Pattern compiledNamedRegex;
/**
* {@code Grok} discovery.
*/
private Map<String, String> grokPatternDefinition;
/** only use in grok discovery. */
private String savedPattern;
/**
* Create Empty {@code Grok}.
*/
public static final MetronGrok EMPTY = new MetronGrok();
/**
* Create a new <i>empty</i>{@code Grok} object.
*/
public MetronGrok() {
originalGrokPattern = StringUtils.EMPTY;
namedRegex = StringUtils.EMPTY;
compiledNamedRegex = null;
grokPatternDefinition = new TreeMap<String, String>();
namedRegexCollection = new TreeMap<String, String>();
savedPattern = StringUtils.EMPTY;
}
public String getSaved_pattern() {
return savedPattern;
}
public void setSaved_pattern(String savedpattern) {
this.savedPattern = savedpattern;
}
/**
* Create a {@code Grok} instance with the given patterns file and
* a {@code Grok} pattern.
*
* @param grokPatternPath Path to the pattern file
* @param grokExpression - <b>OPTIONAL</b> - Grok pattern to compile ex: %{APACHELOG}
* @return {@code Grok} instance
* @throws Exception
*/
public static MetronGrok create(String grokPatternPath, String grokExpression)
throws Exception {
if (StringUtils.isBlank(grokPatternPath)) {
throw new Exception("{grokPatternPath} should not be empty or null");
}
MetronGrok g = new MetronGrok();
g.addPatternFromFile(grokPatternPath);
if (StringUtils.isNotBlank(grokExpression)) {
g.compile(grokExpression);
}
return g;
}
/**
* Create a {@code Grok} instance with the given grok patterns file.
*
* @param grokPatternPath : Path to the pattern file
* @return Grok
* @throws Exception
*/
public static MetronGrok create(String grokPatternPath) throws Exception {
return create(grokPatternPath, null);
}
/**
* Add custom pattern to grok in the runtime.
*
* @param name : Pattern Name
* @param pattern : Regular expression Or {@code Grok} pattern
* @throws Exception
**/
public void addPattern(String name, String pattern) throws Exception {
if (StringUtils.isBlank(name)) {
throw new Exception("Invalid Pattern name");
}
if (StringUtils.isBlank(name)) {
throw new Exception("Invalid Pattern");
}
grokPatternDefinition.put(name, pattern);
}
/**
* Copy the given Map of patterns (pattern name, regular expression) to {@code Grok},
* duplicate element will be override.
*
* @param cpy : Map to copy
* @throws Exception
**/
public void copyPatterns(Map<String, String> cpy) throws Exception {
if (cpy == null) {
throw new Exception("Invalid Patterns");
}
if (cpy.isEmpty()) {
throw new Exception("Invalid Patterns");
}
for (Map.Entry<String, String> entry : cpy.entrySet()) {
grokPatternDefinition.put(entry.getKey().toString(), entry.getValue().toString());
}
}
/**
* Get the current map of {@code Grok} pattern.
*
* @return Patterns (name, regular expression)
*/
public Map<String, String> getPatterns() {
return grokPatternDefinition;
}
/**
* Get the named regex from the {@code Grok} pattern. <p></p>
* See {@link #compile(String)} for more detail.
* @return named regex
*/
public String getNamedRegex() {
return namedRegex;
}
/**
* Add patterns to {@code Grok} from the given file.
*
* @param file : Path of the grok pattern
* @throws Exception
*/
public void addPatternFromFile(String file) throws Exception {
File f = new File(file);
if (!f.exists()) {
throw new Exception("Pattern not found");
}
if (!f.canRead()) {
throw new Exception("Pattern cannot be read");
}
FileReader r = null;
try {
r = new FileReader(f);
addPatternFromReader(r);
} catch (FileNotFoundException e) {
throw new Exception(e.getMessage());
} catch (@SuppressWarnings("hiding") IOException e) {
throw new Exception(e.getMessage());
} finally {
try {
if (r != null) {
r.close();
}
} catch (IOException io) {
// TODO(anthony) : log the error
}
}
}
/**
* Add patterns to {@code Grok} from a Reader.
*
* @param r : Reader with {@code Grok} patterns
* @throws Exception
*/
public void addPatternFromReader(Reader r) throws Exception {
BufferedReader br = new BufferedReader(r);
String line;
// We dont want \n and commented line
Pattern pattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$");
try {
while ((line = br.readLine()) != null) {
Matcher m = pattern.matcher(line);
if (m.matches()) {
this.addPattern(m.group(1), m.group(2));
}
}
br.close();
} catch (IOException e) {
throw new Exception(e.getMessage());
} catch (Exception e) {
throw new Exception(e.getMessage());
}
}
/**
* Match the given <tt>log</tt> with the named regex.
* And return the json representation of the matched element
*
* @param log : log to match
* @return json representation og the log
*/
public String capture(String log){
MetronMatch match = match(log);
match.captures();
return match.toJson();
}
/**
* Match the given list of <tt>log</tt> with the named regex
* and return the list of json representation of the matched elements.
*
* @param logs : list of log
* @return list of json representation of the log
*/
public List<String> captures(List<String> logs){
List<String> matched = new ArrayList<String>();
for (String log : logs) {
MetronMatch match = match(log);
match.captures();
matched.add(match.toJson());
}
return matched;
}
/**
* Match the given <tt>text</tt> with the named regex
* {@code Grok} will extract data from the string and get an extence of {@link Match}.
*
* @param text : Single line of log
* @return Grok Match
*/
public MetronMatch match(String text) {
if (compiledNamedRegex == null || StringUtils.isBlank(text)) {
return MetronMatch.EMPTY;
}
Matcher m = compiledNamedRegex.matcher(text);
MetronMatch match = new MetronMatch();
if (m.find()) {
match.setSubject(text);
match.setGrok(this);
match.setMatch(m);
match.setStart(m.start(0));
match.setEnd(m.end(0));
}
return match;
}
/**
* Compile the {@code Grok} pattern to named regex pattern.
*
* @param pattern : Grok pattern (ex: %{IP})
* @throws Exception
*/
public void compile(String pattern) throws Exception {
if (StringUtils.isBlank(pattern)) {
throw new Exception("{pattern} should not be empty or null");
}
namedRegex = pattern;
originalGrokPattern = pattern;
int index = 0;
/** flag for infinite recurtion */
int iterationLeft = 1000;
Boolean continueIteration = true;
// Replace %{foo} with the regex (mostly groupname regex)
// and then compile the regex
while (continueIteration) {
continueIteration = false;
if (iterationLeft <= 0) {
throw new Exception("Deep recursion pattern compilation of " + originalGrokPattern);
}
iterationLeft--;
Matcher m = GrokUtils.GROK_PATTERN.matcher(namedRegex);
// Match %{Foo:bar} -> pattern name and subname
// Match %{Foo=regex} -> add new regex definition
if (m.find()) {
continueIteration = true;
Map<String, String> group = m.namedGroups();
if (group.get("definition") != null) {
try {
addPattern(group.get("pattern"), group.get("definition"));
group.put("name", group.get("name") + "=" + group.get("definition"));
} catch (Exception e) {
// Log the exeception
}
}
namedRegexCollection.put("name" + index,
(group.get("subname") != null ? group.get("subname") : group.get("name")));
namedRegex =
StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", "(?<name" + index + ">"
+ grokPatternDefinition.get(group.get("pattern")) + ")");
// System.out.println(_expanded_pattern);
index++;
}
}
if (namedRegex.isEmpty()) {
throw new Exception("Pattern not fount");
}
// Compile the regex
compiledNamedRegex = Pattern.compile(namedRegex);
}
/**
* Original grok pattern used to compile to the named regex.
*
* @return String Original Grok pattern
*/
public String getOriginalGrokPattern(){
return originalGrokPattern;
}
/**
* Get the named regex from the given id.
*
* @param id : named regex id
* @return String of the named regex
*/
public String getNamedRegexCollectionById(String id) {
return namedRegexCollection.get(id);
}
/**
* Get the full collection of the named regex.
*
* @return named RegexCollection
*/
public Map<String, String> getNamedRegexCollection() {
return namedRegexCollection;
}
}