| |
| /** @file |
| |
| Simulator application, for testing the behavior of the SieveLRU. This does |
| not build as part of the system, but put here for future testing etc. |
| |
| @section license License |
| |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| #include <getopt.h> |
| |
| #include <vector> |
| #include <fstream> |
| #include <chrono> |
| #include <iostream> |
| #include <iomanip> |
| #include <cstdint> |
| |
| // Yeh well, sue me, boost is useful here, and this is not part of the actual core code |
| #include <boost/algorithm/string.hpp> |
| |
| #include "ip_reputation.h" |
| |
| // Convenience class declarations |
| using IpMap = std::unordered_map<IpReputation::KeyClass, std::tuple<int, bool>>; // count / false = good, true = bad |
| using IpList = std::vector<IpMap::iterator>; |
| |
| // Holds all command line options |
| struct CmdConfigs { |
| uint32_t start_buckets, end_buckets, incr_buckets; |
| uint32_t start_size, end_size, incr_size; |
| uint32_t start_threshold, end_threshold, incr_threshold; |
| uint32_t start_permablock, end_permablock, incr_permablock; |
| }; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // Command line options / parsing, returns the parsed and populate CmdConfig |
| // structure (from above). |
| // |
| std::tuple<int32_t, int32_t, int32_t> |
| splitArg(std::string str) |
| { |
| int32_t start = 0, end = 0, incr = 1; |
| std::vector<std::string> results; |
| |
| boost::split(results, str, [](char c) { return c == '-' || c == '/'; }); |
| |
| if (results.size() > 0) { |
| start = std::stoi(results[0]); |
| if (results.size() > 1) { |
| end = std::stoi(results[1]); |
| if (results.size() > 2) { |
| incr = std::stoi(results[2]); |
| } |
| } else { |
| end = start; |
| } |
| } else { |
| std::cerr << "Malformed argument: " << str << "\n"; |
| } |
| |
| return {start, end, incr}; |
| } |
| |
| CmdConfigs |
| parseArgs(int argc, char **argv) |
| { |
| CmdConfigs options; |
| int c; |
| constexpr struct option long_options[] = { |
| {"help", no_argument, nullptr, 'h'}, |
| {"buckets", required_argument, nullptr, 'b'}, |
| {"perma", required_argument, nullptr, 'p'}, |
| {"size", required_argument, nullptr, 's'}, |
| {"threshold", required_argument, nullptr, 't'}, |
| {nullptr, 0, nullptr, 0 } |
| }; |
| |
| // Make sure the optional values have been set |
| |
| options.start_permablock = 0; |
| options.end_permablock = 0; |
| options.incr_permablock = 1; |
| |
| while (true) { |
| int ix = 0; |
| |
| c = getopt_long(argc, argv, "b:f:p:s:t:h?", long_options, &ix); |
| if (c == -1) { |
| break; |
| } |
| |
| switch (c) { |
| case 'h': |
| case '?': |
| std::cerr << "usage: iprep_simu -b|--buckets <size>[-<end bucket range>[/<increment>]]\n"; |
| std::cerr << " -s|--size <bucket size>[-<end bucket size range>[/<increment>]]\n"; |
| std::cerr << " -t|--threshold <bucket num>[-<end bucket num range>[/<increment>]]\n"; |
| std::cerr << " [-p|--perma <permablock>[-<end permablock range>[/<increment>]]]\n"; |
| std::cerr << " [-h|--help\n"; |
| exit(0); |
| break; |
| case 'b': |
| std::tie(options.start_buckets, options.end_buckets, options.incr_buckets) = splitArg(optarg); |
| break; |
| case 's': |
| std::tie(options.start_size, options.end_size, options.incr_size) = splitArg(optarg); |
| break; |
| case 'p': |
| std::tie(options.start_permablock, options.end_permablock, options.incr_permablock) = splitArg(optarg); |
| break; |
| case 't': |
| std::tie(options.start_threshold, options.end_threshold, options.incr_threshold) = splitArg(optarg); |
| break; |
| default: |
| fprintf(stderr, "getopt returned weird stuff: 0%o\n", c); |
| exit(-1); |
| break; |
| } |
| } |
| |
| return options; // RVO |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // Load a configuration file, and populate the two structures with the |
| // list of IPs (and their status) as well as the full sequence of requests. |
| // |
| // Returns a tuple with the number of good requests and bad requests, respectively. |
| // |
| std::tuple<uint32_t, uint32_t> |
| loadFile(const std::string &fname, IpMap &all_ips, IpList &ips) |
| { |
| std::ifstream infile(fname); |
| |
| float timestamp; // The timestamp from the request(relative) |
| std::string ip; // The IP |
| bool status; // Bad (false) or Good (true) request? |
| |
| uint32_t good_ips = 0; |
| uint32_t bad_ips = 0; |
| uint32_t good_requests = 0; |
| uint32_t bad_requests = 0; |
| |
| // Load in the entire file, and fill the request vector as well as the IP lookup table (state) |
| while (infile >> timestamp >> ip >> status) { |
| auto ip_hash = IpReputation::SieveLru::hasher(ip, ip.find(':') != std::string::npos ? AF_INET6 : AF_INET); |
| auto it = all_ips.find(ip_hash); |
| |
| if (!status) { |
| ++good_requests; |
| } else { |
| ++bad_requests; |
| } |
| |
| if (all_ips.end() != it) { |
| auto &[key, data] = *it; |
| auto &[count, d_status] = data; |
| |
| ++count; |
| ips.push_back(it); |
| } else { |
| all_ips[ip_hash] = {0, status}; |
| ips.push_back(all_ips.find(ip_hash)); |
| if (!status) { |
| ++good_ips; |
| } else { |
| ++bad_ips; |
| } |
| } |
| } |
| |
| std::cout << std::setprecision(3); |
| std::cout << "Total number of requests: " << ips.size() << "\n"; |
| std::cout << "\tGood requests: " << good_requests << " (" << 100.0 * good_requests / ips.size() << "%)\n"; |
| std::cout << "\tBad requests: " << bad_requests << " (" << 100.0 * bad_requests / ips.size() << "%)\n"; |
| std::cout << "Unique IPs in set: " << all_ips.size() << "\n"; |
| std::cout << "\tGood IPs: " << good_ips << " (" << 100.0 * good_ips / all_ips.size() << "%)\n"; |
| std::cout << "\tBad IPs: " << bad_ips << " (" << 100.0 * bad_ips / all_ips.size() << "%)\n"; |
| std::cout << "\n"; |
| |
| return {good_requests, bad_requests}; |
| } |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| std::string name = "simulator"; |
| auto options = parseArgs(argc, argv); |
| |
| // All remaining arguments should be files, so lets process them one by one |
| for (int file_num = optind; file_num < argc; ++file_num) { |
| IpMap all_ips; |
| IpList ips; |
| |
| // Load the data from file |
| auto [good_requests, bad_requests] = loadFile(argv[file_num], all_ips, ips); |
| |
| // Here starts the actual simulation, loop through variations |
| for (uint32_t size = options.start_size; size <= options.end_size; size += options.incr_size) { |
| for (uint32_t buckets = options.start_buckets; buckets <= options.end_buckets; buckets += options.incr_buckets) { |
| for (uint32_t threshold = options.start_threshold; threshold <= options.end_threshold; |
| threshold += options.incr_threshold) { |
| for (uint32_t permablock = options.start_permablock; permablock <= options.end_permablock; |
| permablock += options.incr_permablock) { |
| // Setup the buckets and metrics for this loop |
| // ToDo: This needs to be initialized, with a YAML or something |
| // IpReputation::SieveLru ipt(buckets, size); |
| auto ipt = new IpReputation::SieveLru(name); |
| auto start = std::chrono::system_clock::now(); |
| |
| // Some metrics |
| uint32_t good_blocked = 0; |
| uint32_t good_allowed = 0; |
| uint32_t bad_blocked = 0; |
| uint32_t bad_allowed = 0; |
| uint32_t good_perm_blocked = 0; |
| uint32_t bad_perm_blocked = 0; |
| |
| for (auto iter : ips) { |
| auto &[ip, data] = *iter; |
| auto &[count, status] = data; |
| auto [bucket, cur_cnt] = ipt->increment(ip); |
| |
| // Currently we only allow perma-blocking on items in bucket 1, so check for that first. |
| if (cur_cnt > permablock && bucket == ipt->lastBucket()) { |
| bucket = ipt->block(ip); |
| } |
| |
| if (bucket == ipt->blockBucket()) { |
| if (!status) { |
| ++good_perm_blocked; |
| } else { |
| ++bad_perm_blocked; |
| } |
| } else if (bucket <= threshold) { |
| if (!status) { |
| ++good_blocked; |
| } else { |
| ++bad_blocked; |
| } |
| } else { |
| if (!status) { |
| ++good_allowed; |
| } else { |
| ++bad_allowed; |
| } |
| } |
| } |
| |
| auto end = std::chrono::system_clock::now(); |
| |
| uint32_t total_blocked = bad_blocked + good_blocked; |
| uint32_t total_perm_blocked = bad_perm_blocked + good_perm_blocked; |
| uint32_t total_allowed = bad_allowed + good_allowed; |
| |
| // ipt->dump(); |
| |
| std::chrono::duration<double> elapsed_seconds = end - start; |
| |
| std::cout << "Running with size=" << size << ", buckets=" << buckets << ", threshold=" << threshold |
| << ", permablock=" << permablock << "\n"; |
| std::cout << "Processing time: " << elapsed_seconds.count() << "\n"; |
| std::cout << "Denied requests: " << total_blocked + total_perm_blocked << "\n"; |
| std::cout << "\tGood requests denied: " << good_blocked + good_perm_blocked << " (" |
| << 100.0 * (good_blocked + good_perm_blocked) / good_requests << "%)\n"; |
| std::cout << "\tBad requests denied: " << bad_blocked + bad_perm_blocked << " (" |
| << 100.0 * (bad_blocked + bad_perm_blocked) / bad_requests << "%)\n"; |
| std::cout << "Allowed requests: " << total_allowed << "\n"; |
| std::cout << "\tGood requests allowed: " << good_allowed << " (" << 100.0 * good_allowed / good_requests << "%)" |
| << "\n"; |
| std::cout << "\tBad requests allowed: " << bad_allowed << " (" << 100.0 * bad_allowed / bad_requests << "%)" << "\n"; |
| if (permablock) { |
| std::cout << "Permanently blocked IPs: " << ipt->bucketSize(ipt->blockBucket()) << "\n"; |
| std::cout << "\tGood requests permanently denied: " << good_perm_blocked << " (" |
| << 100.0 * good_perm_blocked / good_requests << "%)\n"; |
| std::cout << "\tBad requests permanently denied: " << bad_perm_blocked << " (" |
| << 100.0 * bad_perm_blocked / bad_requests << "%)\n"; |
| } |
| std::cout << "Estimated score (lower is better): " |
| << 100.0 * ((100.0 * good_blocked / good_requests + 100.0 * bad_allowed / bad_requests) / |
| (100.0 * good_allowed / good_requests + 100.0 * bad_blocked / bad_requests)) |
| << "\n"; |
| std::cout << "Memory used for IP Reputation data: " << ipt->memoryUsed() / (1024.0 * 1024.0) << "MB\n\n"; |
| } |
| } |
| } |
| } |
| } |
| } |