blob: 5b3f249490410d075a9c47d5e26ee06b39e67ed7 [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re
import collections
# Patterns for common log types
regexes = {
"apache_access": re.compile(
r"(?P<client_ip>[\d\.]+)\s"
r"(?P<identity>\S*)\s"
r"(?P<user>\S*)\s"
r"\[(?P<time_txt>.*?)\]\s"
r'"(?P<request>.*?)"\s'
r"(?P<status>\d+)\s"
r"(?P<bytes>\S*)\s"
r'"(?P<referer>.*?)"\s'
r'"(?P<user_agent>.*?)"\s*'
),
"apache_error": re.compile(
r"\[(?P<date>.*?)\]\s+"
r"\[(?P<module>.*?)\]\s+"
r"\[(?P<pid>.*?)\]\s+"
r"\[client\s+(?P<client_ip>[0-9.]+):\d+\]\s+"
r"(?P<message>.+)"
),
"syslog": re.compile(
r"(?P<date>\S+\s+\d+\s+\d+:\d+:\d+)\s+(<[0-9.]+>\s+)?"
r"(?P<host>\S+)\s+"
r"(?P<type>\S+):\s+"
r"(?P<message>.+)"
),
"fail2ban": re.compile(r"(?P<date>\S+ \d+:\d+:[\d,]+)\s+" r"(?P<type>fail2ban\.[^:]+):\s+" r"(?P<message>.+)"),
"rsync": re.compile(r"(?P<date>\S+ \d+:\d+:[\d,]+)\s+" r"\[(?P<pid>[\S.]+)\]\s+" r"(?P<message>.+)"),
"pylogs": re.compile(r"(?P<date>\S+ \S+)\s+\[pylog\]\s+" r"\[(?P<type>[\S.]+)\]:\s+" r"(?P<message>.+)"),
"qmail": re.compile(r"(?P<mid>@[a-f0-9]+)\s+" r"(?P<message>.+)"),
"lastlog": re.compile(r"USER: (?P<user>([-.a-z0-9]+))\((?P<uid>(\d+))\);\s+T: \[(?P<time_txt>([^]]+))\];(?: EPOCH: (?P<entry_timestamp>[0-9.]+);)? LINE: (?P<term>(pts/\d+|tty\d+|system)); HOST: (?P<client_ip>[0-9a-f:.]+)"),
}
# These names must agree with the regexes above
tuples = {
"apache_access": collections.namedtuple(
"apache_access",
[
"client_ip",
"identity",
"user",
"time_txt",
"request",
"status",
"bytes",
"referer",
"user_agent",
"filepath",
"logtype",
"timestamp",
],
),
"apache_error": collections.namedtuple(
"apache_error", ["date", "module", "pid", "client_ip", "message", "filepath", "logtype", "timestamp"]
),
"syslog": collections.namedtuple("syslog", ["date", "host", "type", "message", "filepath", "logtype", "timestamp"]),
"fail2ban": collections.namedtuple("fail2ban", ["date", "type", "message", "filepath", "logtype", "timestamp"]),
"rsync": collections.namedtuple("rsync", ["date", "pid", "message", "filepath", "logtype", "timestamp"]),
"pylogs": collections.namedtuple("pylogs", ["date", "type", "message", "filepath", "logtype", "timestamp"]),
"qmail": collections.namedtuple("qmail", ["mid", "message", "filepath", "logtype", "timestamp"]),
"lastlog": collections.namedtuple("lastlog", ["user", "uid", "time_txt", "entry_timestamp", "term", "client_ip", "filepath", "logtype", "timestamp"]),
}