| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| KIF: Kill It with Fire (or a depressed space alien) |
| """ |
| |
| # Initial module imports |
| import os |
| import sys |
| import psutil |
| import yaml |
| import re |
| import subprocess |
| import argparse |
| import socket |
| import time |
| import asfpy.syslog |
| import asfpy.messaging |
| |
| # Redirect stdout to syslog+stdout |
| print = asfpy.syslog.Printer(stdout=True) |
| |
| |
| # define binary metrics |
| KB = (2 ** 10) |
| MB = (2 ** 20) |
| GB = (2 ** 30) |
| TB = (2 ** 40) |
| |
| # Helper func |
| def whoami(): |
| """Returns the FQDN of the box the program runs on""" |
| try: |
| # Get local hostname (what you see in the terminal) |
| local_hostname = socket.gethostname() |
| # Get all address info segments for the local host |
| canonical_names = [ |
| address[3] for address in |
| socket.getaddrinfo(local_hostname, None, 0, socket.SOCK_DGRAM, 0, socket.AI_CANONNAME) |
| if address[3] |
| ] |
| # For each canonical name, see if we find $local_hostname.something.tld, and if so, return that. |
| if canonical_names: |
| prefix = f"{local_hostname}." |
| for name in canonical_names: |
| if name.startswith(prefix): |
| return name |
| # No match, just return the first occurrence. |
| return canonical_names[0] |
| except socket.error: |
| pass |
| # Fall back to socket.getfqdn |
| return socket.getfqdn() |
| |
| # hostname, pid file etc |
| ME = whoami() |
| TEMPLATE_EMAIL = open("email_template.txt", "r").read() |
| # Default to checking triggers every N seconds. |
| DEFAULT_INTERVAL = 300 |
| |
| |
| class ProcessInfo(object): |
| def __init__(self, pid=None): |
| if pid is None: |
| # This instance will aggregate values across multiple processes, |
| # so we'll zero the numerics. |
| self.mem = 0 |
| self.mempct = 0 |
| self.fds = 0 |
| self.age = 0 |
| self.state = '' # can't aggregate state, but needs a value |
| self.conns = 0 |
| self.conns_local = 0 |
| self.command = '(root)' |
| return |
| |
| proc = psutil.Process(pid) |
| |
| self.mem = proc.memory_info().rss |
| self.mempct = proc.memory_percent() |
| self.fds = proc.num_fds() |
| self.age = time.time() - proc.create_time() |
| self.state = proc.status() |
| self.command = " ".join(proc.cmdline()) |
| |
| self.conns = len(proc.connections()) |
| self.conns_local = 0 |
| for connection in proc.connections(): |
| if connection.raddr and connection.raddr[0]: |
| if RE_LOCAL_IP.match(connection.raddr[0]) \ |
| or connection.raddr[0] == '::1': |
| self.conns_local += 1 |
| |
| def accumulate(self, other): |
| self.mem += other.mem |
| self.mempct += other.mempct ### this is likely wrong |
| self.fds += other.fds |
| self.conns += other.conns |
| self.conns_local += other.conns_local |
| self.age += other.age |
| # cannot accumulate .state |
| |
| |
| RE_LOCAL_IP = re.compile(r'^(10|192|127)\.') |
| |
| |
| def getuser(pid): |
| try: |
| proc = psutil.Process(pid) |
| return proc.username() |
| except (psutil.ZombieProcess, psutil.AccessDenied, psutil.NoSuchProcess): |
| print("Could not access process, it might have gone away...") |
| return None |
| |
| |
| # getprocs: Get all processes and their command line stack |
| def getprocs(): |
| procs = {} |
| for p in psutil.process_iter(): |
| try: |
| pinfo = p.as_dict(attrs=['pid', 'name', 'username', 'status', 'cmdline']) |
| content = pinfo['cmdline'] |
| if not content: |
| content = pinfo['name'] # Fall back if no cmdline present |
| pid = pinfo['pid'] |
| if len(content) > 0 and len(content[0]) > 0: |
| content = [c for c in content if len(c) > 0] |
| procs[pid] = content |
| except (psutil.ZombieProcess, psutil.AccessDenied, psutil.NoSuchProcess): |
| print("Could not access process, it might have gone away...") |
| continue |
| return procs |
| |
| |
| def checkTriggers(id, info, triggers, dead=False): |
| if len(triggers) > 0: |
| print(" - Checking triggers:") |
| for trigger, value in triggers.items(): |
| print(" - Checking against trigger %s" % trigger) |
| |
| # maxmemory: Process can max use N amount of memory or it triggers |
| if trigger == 'maxmemory': |
| if isinstance(value, str): |
| value = value.lower() |
| if value.find("%") != -1: # percentage check |
| maxmem = float(value.replace('%', '')) |
| cmem = info.mempct |
| cvar = '%' |
| elif value.find('kb') != -1: # kb check |
| maxmem = int(value.replace('kb', '')) * KB |
| cmem = info.mem |
| cvar = ' bytes' |
| elif value.find('mb') != -1: # mb check |
| maxmem = int(value.replace('mb', '')) * MB |
| cmem = info.mem |
| cvar = ' bytes' |
| elif value.find('gb') != -1: # gb check |
| maxmem = int(value.replace('gb', '')) * GB |
| cmem = info.mem |
| cvar = ' bytes' |
| elif value.find('tb') != -1: # tb check |
| maxmem = int(value.replace('tb', '')) * TB |
| cmem = info.mem |
| cvar = ' bytes' |
| elif isinstance(value, int): |
| maxmem = value |
| cmem = info.mem |
| cvar = ' bytes' |
| lstr = " - %s: '%s' is using %u%s memory, max allowed is %u%s" % ( |
| id, info.command, cmem + 0.5, cvar, maxmem + 0.5, cvar) |
| print(lstr) |
| if cmem > maxmem: |
| print(" - Trigger fired!") |
| return lstr |
| |
| # maxfds: maximum number of file descriptors |
| if trigger == 'maxfds': |
| maxfds = int(value) |
| cfds = info.fds |
| lstr = " - %s: '%s' is using %u FDs, max allowed is %u" % (id, info.command, cfds, value) |
| print(lstr) |
| if cfds > maxfds: |
| print(" - Trigger fired!") |
| return lstr |
| |
| # maxconns: maximum number of open connections |
| if trigger == 'maxconns': |
| maxconns = int(value) |
| ccons = info.conns |
| lstr = " - %s: '%s' is using %u connections, max allowed is %u" % (id, info.command, ccons, value) |
| print(lstr) |
| if ccons > maxconns: |
| print(" - Trigger fired!") |
| return lstr |
| |
| # maxlocalconns: maximum number of open connections in local network |
| if trigger == 'maxlocalconns': |
| maxconns = int(value) |
| ccons = info.conns_local |
| lstr = " - %s: '%s' is using %u LAN connections, max allowed is %u" % (id, info.command, ccons, value) |
| print(lstr) |
| if ccons > maxconns: |
| print(" - Trigger fired!") |
| return lstr |
| |
| # maxage: maximum age of a process (NOT cpu time) |
| if trigger == 'maxage': |
| if value.find('s') != -1: # seconds |
| maxage = int(value.replace('s', '')) |
| cage = info.age |
| cvar = ' seconds' |
| elif value.find('m') != -1: # minutes |
| maxage = int(value.replace('m', '')) * 60 |
| cage = info.age |
| cvar = ' minutes' |
| elif value.find('h') != -1: # hours |
| maxage = int(value.replace('h', '')) * 3600 |
| cage = info.age |
| cvar = ' hours' |
| elif value.find('d') != -1: # days |
| maxage = int(value.replace('d', '')) * 86400 |
| cage = info.age |
| cvar = ' days' |
| else: |
| maxage = int(value) |
| cage = info.age |
| lstr = " - %s: '%s' is %u seconds old, max allowed is %u" % (id, info.command, cage, maxage) |
| print(lstr) |
| if cage > maxage: |
| print(" - Trigger fired!") |
| return lstr |
| |
| # state: kill processes in a specific state (zombie etc) |
| if trigger == 'state': |
| cstate = info.state |
| lstr = " - %s: '%s' is in state '%s'" % (id, info.command, cstate) |
| print(lstr) |
| if cstate == value: |
| print(" - Trigger fired!") |
| return lstr |
| return None |
| |
| |
| def scan_for_triggers(config): |
| procs = getprocs() # get all current processes |
| actions = [] |
| |
| # For each rule.. |
| for id, rule in config['rules'].items(): |
| print("- Running rule %s" % id) |
| # Is this process running here? |
| pids = [] |
| if 'host_must_match' in rule: |
| if not re.match(rule['host_must_match'], ME): |
| print(f"Ignoring rule-set '{id}', hostname '{ME}' does not match host_must_match criterion.") |
| continue |
| if 'host_must_not_match' in rule: |
| if re.match(rule['host_must_not_match'], ME): |
| print(f"Ignoring rule-set '{id}', hostname '{ME}' matches host_must_not_match criterion.") |
| continue |
| if 'procid' in rule: |
| procid = rule['procid'] |
| print(" - Checking for process %s" % procid) |
| for xpid, cmdline in procs.items(): |
| cmdstring = " ".join(cmdline) |
| addit = False |
| if isinstance(procid, str): |
| if cmdstring.find(rule['procid']) != -1: |
| addit = True |
| elif isinstance(procid, list): |
| if cmdline == procid: |
| addit = True |
| # If uid is specified and doesn't match here, discard match. |
| if 'uid' in rule: |
| xuid = getuser(xpid) |
| if xuid != rule['uid']: |
| addit = False |
| if addit: |
| if not ('ignore' in rule): |
| addit = True |
| elif isinstance(rule['ignore'], str) and cmdstring != rule['ignore']: |
| addit = True |
| elif isinstance(rule['ignore'], list) and cmdline != rule['ignore']: |
| addit = True |
| if 'ignorepidfile' in rule: |
| try: |
| ppid = int(open(rule['ignorepidfile']).read()) |
| if ppid == xpid: |
| print("Ignoring %u, matches pid file %s!" % (ppid, rule['ignorepidfile'])) |
| addit = False |
| except Exception as err: |
| print(err) |
| if 'ignorematch' in rule: |
| ignm = rule['ignorematch'] |
| if isinstance(ignm, str) and ignm in cmdstring: |
| print("Ignoring %u, matches ignorematch directive %s!" % (xpid, rule['ignorematch'])) |
| addit = False |
| elif isinstance(ignm, list): |
| for line in ignm: |
| if line in cmdstring: |
| print("Ignoring %u, matches ignorematch directive %s!" % (xpid, line)) |
| addit = False |
| break |
| if addit: |
| pids.append(xpid) |
| if 'uid' in rule: |
| for xpid, cmdline in procs.items(): |
| cmdstring = " ".join(cmdline) |
| uid = getuser(xpid) |
| if uid == rule['uid']: |
| addit = False |
| if not ('ignore' in rule): |
| addit = True |
| elif isinstance(rule['ignore'], str) and cmdstring != rule['ignore']: |
| addit = True |
| elif isinstance(rule['ignore'], list) and cmdline != rule['ignore']: |
| addit = True |
| if 'ignorepidfile' in rule: |
| try: |
| ppid = int(open(rule['ignorepidfile']).read()) |
| if ppid == xpid: |
| print("Ignoring %u, matches pid file %s!" % (ppid, rule['ignorepidfile'])) |
| addit = False |
| except Exception as err: |
| print(err) |
| if 'ignorematch' in rule: |
| ignm = rule['ignorematch'] |
| if isinstance(ignm, str) and ignm in cmdstring: |
| print("Ignoring %u, matches ignorematch directive %s!" % (xpid, rule['ignorematch'])) |
| addit = False |
| elif isinstance(ignm, list): |
| for line in ignm: |
| if line in cmdstring: |
| print("Ignoring %u, matches ignorematch directive %s!" % (xpid, line)) |
| addit = False |
| break |
| if addit: |
| pids.append(xpid) |
| |
| # If proc is running, analyze it |
| analysis = ProcessInfo() # no pid. accumulator. |
| for pid in pids: |
| print(" - Found process at PID %u" % pid) |
| |
| try: |
| # Get all relevant data from this PID |
| info = ProcessInfo(pid) |
| |
| # If combining, combine into the analysis hash |
| if 'combine' in rule and rule['combine'] == True: |
| analysis.accumulate(info) |
| else: |
| # If running a per-pid test, run it: |
| err = checkTriggers(id, info, rule['triggers']) |
| if err: |
| action = { |
| 'pids': [], |
| 'trigger': "", |
| 'runlist': [], |
| 'notify': rule.get('notify', None), |
| 'kills': {} |
| } |
| if 'runlist' in rule and len(rule['runlist']) > 0: |
| action['runlist'] = rule['runlist'] |
| if 'kill' in rule and rule['kill'] == True: |
| sig = 9 |
| if 'killwith' in rule: |
| sig = int(rule['killwith']) |
| action['kills'][pid] = sig |
| action['trigger'] = err |
| actions.append(action) |
| except: |
| print("Could not analyze proc %u, bailing!" % pid) |
| continue |
| if len(pids) > 0: |
| # If combined trigger test, run it now |
| if 'combine' in rule and rule['combine'] == True: |
| err = checkTriggers(id, analysis, rule['triggers']) |
| if err: |
| action = { |
| 'pids': [], |
| 'trigger': "", |
| 'runlist': [], |
| 'notify': rule.get('notify', None), |
| 'kills': {} |
| } |
| if 'runlist' in rule and len(rule['runlist']) > 0: |
| action['runlist'] = rule['runlist'] |
| if 'kill' in rule and rule['kill'] == True: |
| sig = 9 |
| if 'killwith' in rule: |
| sig = int(rule['killwith']) |
| for ypid in pids: |
| action['kills'][ypid] = sig |
| action['trigger'] = err |
| actions.append(action) |
| else: |
| print(" - No matching processes found") |
| |
| return actions |
| |
| |
| def run_actions(config, actions, debug=False): |
| goods = 0 |
| bads = 0 |
| triggered_total = 0 |
| email_triggers = "" |
| email_actions = "" |
| |
| for action in actions: |
| triggered_total += 1 |
| print("Following triggers were detected:") |
| print("- %s" % action['trigger']) |
| if action.get('notify', 'email') in [None, 'email']: |
| email_triggers += "- %s\n" % action['trigger'] |
| print("Running triggered commands:") |
| rloutput = "" |
| for item in action['runlist']: |
| print("- %s" % item) |
| rloutput += "- %s" % item |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += "- %s" % item |
| try: |
| if not debug: |
| subprocess.check_output(item, shell=True, stderr=subprocess.STDOUT) |
| rloutput += " (success)" |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += " (success)" |
| else: |
| print("(disabled due to --debug flag)") |
| rloutput += " (disabled due to --debug)" |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += " (disabled due to --debug)" |
| goods += 1 |
| except subprocess.CalledProcessError as e: |
| print("command failed: %s" % e.output) |
| rloutput += " (failed!: %s)" % e.output |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += " (failed!: %s)" % e.output |
| bads += 1 |
| rloutput += "\n" |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += "\n" |
| for pid, sig in action['kills'].items(): |
| print("- KILL PID %u with sig %u" % (pid, sig)) |
| rloutput += "- KILL PID %u with sig %u" % (pid, sig) |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += "- KILL PID %u with sig %u" % (pid, sig) |
| if not debug: |
| try: |
| os.kill(pid, sig) |
| except OSError: |
| email_actions += "(failed, no such process!)" |
| else: |
| print(" (disabled due to --debug flag)") |
| rloutput += " (disabled due to --debug flag)" |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += " (disabled due to --debug flag)" |
| rloutput += "\n" |
| if action.get('notify', 'email') in [None, 'email']: |
| email_actions += "\n" |
| goods += 1 |
| print("%u calls succeeded, %u failed." % (goods, bads)) |
| |
| if email_actions and 'notifications' in config and 'email' in config['notifications']: |
| ecfg = config['notifications']['email'] |
| if 'rcpt' in ecfg and 'from' in ecfg and not debug: |
| subject = "[KIF] events triggered on %s" % ME |
| msg = TEMPLATE_EMAIL.format(whoami=ME, triggers=email_triggers, actions=email_actions) |
| asfpy.messaging.mail(sender=ecfg['from'], recipient=ecfg['rcpt'], subject=subject, message=msg) |
| |
| |
| # Get started! |
| def main(): |
| # Get args, if any |
| parser = argparse.ArgumentParser() |
| parser.add_argument("-d", "--debug", help="Debug run (don't execute runlists)", action='store_true') |
| parser.add_argument("-c", "--config", help="Path to the config file if not in ./kif.yaml") |
| args = parser.parse_args() |
| |
| if not args.config: |
| config = yaml.load(open("kif.yaml"), Loader=yaml.FullLoader) |
| else: |
| config = yaml.load(open(args.config), Loader=yaml.FullLoader) |
| |
| if os.getuid() != 0: |
| print("Kif must be run as root!") |
| sys.exit(-1) |
| |
| interval = int(config.get('daemon', {}) |
| .get('interval', DEFAULT_INTERVAL)) |
| |
| # Loop forever and ever |
| while True: |
| if 'rules' not in config: |
| print('- NO RULES TO CHECK') |
| else: |
| # Now actually run things |
| actions = scan_for_triggers(config) |
| if actions: |
| run_actions(config, actions, args.debug) |
| print(f'KIF run finished, waiting {interval} seconds till next run.') |
| time.sleep(interval) |
| |
| |
| if __name__ == '__main__': |
| main() |