| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.metron.parsing.parsers; |
| |
| import java.io.BufferedReader; |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.io.Serializable; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.TreeMap; |
| |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.metron.parsing.utils.GrokUtils; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import com.google.code.regexp.Matcher; |
| import com.google.code.regexp.Pattern; |
| |
| public class MetronGrok implements Serializable { |
| |
| private static final long serialVersionUID = 2002441320075020721L; |
| private static final Logger LOG = LoggerFactory.getLogger(MetronGrok.class); |
| /** |
| * Named regex of the originalGrokPattern. |
| */ |
| private String namedRegex; |
| /** |
| * Map of the named regex of the originalGrokPattern |
| * with id = namedregexid and value = namedregex. |
| */ |
| private Map<String, String> namedRegexCollection; |
| /** |
| * Original {@code Grok} pattern (expl: %{IP}). |
| */ |
| private String originalGrokPattern; |
| /** |
| * Pattern of the namedRegex. |
| */ |
| private Pattern compiledNamedRegex; |
| /** |
| * {@code Grok} discovery. |
| */ |
| private Map<String, String> grokPatternDefinition; |
| |
| /** only use in grok discovery. */ |
| private String savedPattern; |
| |
| /** |
| * Create Empty {@code Grok}. |
| */ |
| public static final MetronGrok EMPTY = new MetronGrok(); |
| |
| /** |
| * Create a new <i>empty</i>{@code Grok} object. |
| */ |
| public MetronGrok() { |
| originalGrokPattern = StringUtils.EMPTY; |
| namedRegex = StringUtils.EMPTY; |
| compiledNamedRegex = null; |
| grokPatternDefinition = new TreeMap<String, String>(); |
| namedRegexCollection = new TreeMap<String, String>(); |
| savedPattern = StringUtils.EMPTY; |
| } |
| |
| public String getSaved_pattern() { |
| return savedPattern; |
| } |
| |
| public void setSaved_pattern(String savedpattern) { |
| this.savedPattern = savedpattern; |
| } |
| |
| /** |
| * Create a {@code Grok} instance with the given patterns file and |
| * a {@code Grok} pattern. |
| * |
| * @param grokPatternPath Path to the pattern file |
| * @param grokExpression - <b>OPTIONAL</b> - Grok pattern to compile ex: %{APACHELOG} |
| * @return {@code Grok} instance |
| * @throws Exception |
| */ |
| public static MetronGrok create(String grokPatternPath, String grokExpression) |
| throws Exception { |
| if (StringUtils.isBlank(grokPatternPath)) { |
| throw new Exception("{grokPatternPath} should not be empty or null"); |
| } |
| MetronGrok g = new MetronGrok(); |
| g.addPatternFromFile(grokPatternPath); |
| if (StringUtils.isNotBlank(grokExpression)) { |
| g.compile(grokExpression); |
| } |
| return g; |
| } |
| |
| /** |
| * Create a {@code Grok} instance with the given grok patterns file. |
| * |
| * @param grokPatternPath : Path to the pattern file |
| * @return Grok |
| * @throws Exception |
| */ |
| public static MetronGrok create(String grokPatternPath) throws Exception { |
| return create(grokPatternPath, null); |
| } |
| |
| /** |
| * Add custom pattern to grok in the runtime. |
| * |
| * @param name : Pattern Name |
| * @param pattern : Regular expression Or {@code Grok} pattern |
| * @throws Exception |
| **/ |
| public void addPattern(String name, String pattern) throws Exception { |
| if (StringUtils.isBlank(name)) { |
| throw new Exception("Invalid Pattern name"); |
| } |
| if (StringUtils.isBlank(name)) { |
| throw new Exception("Invalid Pattern"); |
| } |
| grokPatternDefinition.put(name, pattern); |
| } |
| |
| /** |
| * Copy the given Map of patterns (pattern name, regular expression) to {@code Grok}, |
| * duplicate element will be override. |
| * |
| * @param cpy : Map to copy |
| * @throws Exception |
| **/ |
| public void copyPatterns(Map<String, String> cpy) throws Exception { |
| if (cpy == null) { |
| throw new Exception("Invalid Patterns"); |
| } |
| |
| if (cpy.isEmpty()) { |
| throw new Exception("Invalid Patterns"); |
| } |
| for (Map.Entry<String, String> entry : cpy.entrySet()) { |
| grokPatternDefinition.put(entry.getKey().toString(), entry.getValue().toString()); |
| } |
| } |
| |
| /** |
| * Get the current map of {@code Grok} pattern. |
| * |
| * @return Patterns (name, regular expression) |
| */ |
| public Map<String, String> getPatterns() { |
| return grokPatternDefinition; |
| } |
| |
| /** |
| * Get the named regex from the {@code Grok} pattern. <p></p> |
| * See {@link #compile(String)} for more detail. |
| * @return named regex |
| */ |
| public String getNamedRegex() { |
| return namedRegex; |
| } |
| |
| /** |
| * Add patterns to {@code Grok} from the given file. |
| * |
| * @param file : Path of the grok pattern |
| * @throws Exception |
| */ |
| public void addPatternFromFile(String file) throws Exception { |
| |
| File f = new File(file); |
| if (!f.exists()) { |
| throw new Exception("Pattern not found"); |
| } |
| |
| if (!f.canRead()) { |
| throw new Exception("Pattern cannot be read"); |
| } |
| |
| FileReader r = null; |
| try { |
| r = new FileReader(f); |
| addPatternFromReader(r); |
| } catch (FileNotFoundException e) { |
| throw new Exception(e.getMessage()); |
| } catch (@SuppressWarnings("hiding") IOException e) { |
| throw new Exception(e.getMessage()); |
| } finally { |
| try { |
| if (r != null) { |
| r.close(); |
| } |
| } catch (IOException io) { |
| // TODO(anthony) : log the error |
| } |
| } |
| } |
| |
| /** |
| * Add patterns to {@code Grok} from a Reader. |
| * |
| * @param r : Reader with {@code Grok} patterns |
| * @throws Exception |
| */ |
| public void addPatternFromReader(Reader r) throws Exception { |
| BufferedReader br = new BufferedReader(r); |
| String line; |
| // We dont want \n and commented line |
| Pattern pattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$"); |
| try { |
| while ((line = br.readLine()) != null) { |
| Matcher m = pattern.matcher(line); |
| if (m.matches()) { |
| this.addPattern(m.group(1), m.group(2)); |
| } |
| } |
| br.close(); |
| } catch (IOException e) { |
| throw new Exception(e.getMessage()); |
| } catch (Exception e) { |
| throw new Exception(e.getMessage()); |
| } |
| |
| } |
| |
| /** |
| * Match the given <tt>log</tt> with the named regex. |
| * And return the json representation of the matched element |
| * |
| * @param log : log to match |
| * @return json representation og the log |
| */ |
| public String capture(String log){ |
| MetronMatch match = match(log); |
| match.captures(); |
| return match.toJson(); |
| } |
| |
| /** |
| * Match the given list of <tt>log</tt> with the named regex |
| * and return the list of json representation of the matched elements. |
| * |
| * @param logs : list of log |
| * @return list of json representation of the log |
| */ |
| public List<String> captures(List<String> logs){ |
| List<String> matched = new ArrayList<String>(); |
| for (String log : logs) { |
| MetronMatch match = match(log); |
| match.captures(); |
| matched.add(match.toJson()); |
| } |
| return matched; |
| } |
| |
| /** |
| * Match the given <tt>text</tt> with the named regex |
| * {@code Grok} will extract data from the string and get an extence of {@link Match}. |
| * |
| * @param text : Single line of log |
| * @return Grok Match |
| */ |
| public MetronMatch match(String text) { |
| if (compiledNamedRegex == null || StringUtils.isBlank(text)) { |
| return MetronMatch.EMPTY; |
| } |
| |
| Matcher m = compiledNamedRegex.matcher(text); |
| MetronMatch match = new MetronMatch(); |
| if (m.find()) { |
| match.setSubject(text); |
| match.setGrok(this); |
| match.setMatch(m); |
| match.setStart(m.start(0)); |
| match.setEnd(m.end(0)); |
| } |
| return match; |
| } |
| |
| /** |
| * Compile the {@code Grok} pattern to named regex pattern. |
| * |
| * @param pattern : Grok pattern (ex: %{IP}) |
| * @throws Exception |
| */ |
| public void compile(String pattern) throws Exception { |
| |
| if (StringUtils.isBlank(pattern)) { |
| throw new Exception("{pattern} should not be empty or null"); |
| } |
| |
| namedRegex = pattern; |
| originalGrokPattern = pattern; |
| int index = 0; |
| /** flag for infinite recurtion */ |
| int iterationLeft = 1000; |
| Boolean continueIteration = true; |
| |
| // Replace %{foo} with the regex (mostly groupname regex) |
| // and then compile the regex |
| while (continueIteration) { |
| continueIteration = false; |
| if (iterationLeft <= 0) { |
| throw new Exception("Deep recursion pattern compilation of " + originalGrokPattern); |
| } |
| iterationLeft--; |
| |
| Matcher m = GrokUtils.GROK_PATTERN.matcher(namedRegex); |
| // Match %{Foo:bar} -> pattern name and subname |
| // Match %{Foo=regex} -> add new regex definition |
| if (m.find()) { |
| continueIteration = true; |
| Map<String, String> group = m.namedGroups(); |
| if (group.get("definition") != null) { |
| try { |
| addPattern(group.get("pattern"), group.get("definition")); |
| group.put("name", group.get("name") + "=" + group.get("definition")); |
| } catch (Exception e) { |
| // Log the exeception |
| } |
| } |
| namedRegexCollection.put("name" + index, |
| (group.get("subname") != null ? group.get("subname") : group.get("name"))); |
| namedRegex = |
| StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", "(?<name" + index + ">" |
| + grokPatternDefinition.get(group.get("pattern")) + ")"); |
| // System.out.println(_expanded_pattern); |
| index++; |
| } |
| } |
| |
| if (namedRegex.isEmpty()) { |
| throw new Exception("Pattern not fount"); |
| } |
| // Compile the regex |
| compiledNamedRegex = Pattern.compile(namedRegex); |
| } |
| |
| /** |
| * Original grok pattern used to compile to the named regex. |
| * |
| * @return String Original Grok pattern |
| */ |
| public String getOriginalGrokPattern(){ |
| return originalGrokPattern; |
| } |
| |
| /** |
| * Get the named regex from the given id. |
| * |
| * @param id : named regex id |
| * @return String of the named regex |
| */ |
| public String getNamedRegexCollectionById(String id) { |
| return namedRegexCollection.get(id); |
| } |
| |
| /** |
| * Get the full collection of the named regex. |
| * |
| * @return named RegexCollection |
| */ |
| public Map<String, String> getNamedRegexCollection() { |
| return namedRegexCollection; |
| } |
| } |