blob: 790ec182e3e99e8b917ec976d0f3ad5e67459b9b [file] [log] [blame]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
shopt -s nocasematch
#set -u # nounset disabled
set -e # errexit
set -E # errtrap
set -o pipefail
#
# Analyzes the results.csv files to identify issues
#
function help {
echo " "
echo "usage: ${0}"
echo " --test-directory [REQUIRED] The directory for the tests"
echo " -h/--help Usage information."
echo " "
echo " "
}
function _echo() {
color="txt${1:-DEFAULT}"
case "${1}" in
ERROR)
>&2 echo -e "${!color}${1}> ${2}${txtDEFAULT}"
;;
WARN)
echo -e "${!color}${1}> ${2}${txtDEFAULT}"
;;
*)
echo -e "${!color}${1}> ${2}${txtDEFAULT}"
;;
esac
}
SCRIPT_NAME=$(basename -- "$0")
TEST_DIRECTORY=
declare -A LOGS_WITH_UNEQUAL_RESULTS
declare -a LOG_NAMES
declare -A OVERALL_LOG_CARDINALITY
declare -A LOG_ISSUE_COUNT
declare -r txtDEFAULT='\033[0m'
# shellcheck disable=SC2034
declare -r txtERROR='\033[0;31m'
# shellcheck disable=SC2034
declare -r txtWARN='\033[0;33m'
# Handle command line options
for i in "$@"; do
case $i in
#
# TEST_DIRECTORY
#
# --test-directory
#
--test-directory=*)
TEST_DIRECTORY="${i#*=}"
shift # past argument=value
;;
#
# -h/--help
#
-h | --help)
help
exit 0
shift # past argument with no value
;;
#
# Unknown option
#
*)
UNKNOWN_OPTION="${i#*=}"
_echo ERROR "unknown option: $UNKNOWN_OPTION"
help
;;
esac
done
if [[ -z "$TEST_DIRECTORY" ]]; then
echo "$TEST_DIRECTORY must be passed"
exit 1
fi
echo "Running ${SCRIPT_NAME} with"
echo "TEST_DIRECTORY = $TEST_DIRECTORY"
echo "==================================================="
## Main functions
function count_occurrences_of_each_log_file
{
# Count the number of occurences of each log name
for LOG_NAME in "${LOG_NAMES[@]}"; do
(( ++OVERALL_LOG_CARDINALITY["${LOG_NAME}"] ))
done
}
function check_for_unequal_log_counts
{
RESULTS_FILE="${1}"
# Get the pcap folder name from the provided file
# shellcheck disable=SC2001
PCAP_FOLDER="$( cd "$( dirname "${RESULTS_FILE}" )" >/dev/null 2>&1 && echo "${PWD##*/}")"
# Check each log line in the provided log file for unequal results
for LOG_NAME in "${LOG_NAMES[@]}"; do
# For each log in the provided results, identify any unequal log counts
UNEQUAL_LOG=$(awk -F\, -v log_name="${LOG_NAME}" '$1 == log_name && $2 != $3 {print $1}' "${RESULTS_FILE}")
# Create a space separated list of unequal logs to simulate a
# multidimensional array
if [[ -n "${UNEQUAL_LOG}" ]]; then
if [[ "${#LOGS_WITH_UNEQUAL_RESULTS[${PCAP_FOLDER}]}" -eq 0 ]]; then
LOGS_WITH_UNEQUAL_RESULTS["${PCAP_FOLDER}"]="${UNEQUAL_LOG}"
else
LOGS_WITH_UNEQUAL_RESULTS["${PCAP_FOLDER}"]+=" ${UNEQUAL_LOG}"
fi
fi
done
}
function print_unequal_results
{
# Output a table with the pcap file and log name details where the imbalance
# was detected
{
echo "PCAP FOLDER,LOG NAME"
for KEY in "${!LOGS_WITH_UNEQUAL_RESULTS[@]}"; do
# This must be done because we are simulating multidimensional arrays due to
# the lack of native bash support
for VALUE in ${LOGS_WITH_UNEQUAL_RESULTS[${KEY}]}; do
echo "${KEY},${VALUE}"
done
done
} | column -t -s ','
}
function print_log_comparison_insights
{
# Load the log to instance count mapping from LOGS_WITH_UNEQUAL_RESULTS into a new
# associative array
# shellcheck disable=SC2046
declare -A $(echo "${LOGS_WITH_UNEQUAL_RESULTS[@]}" | tr ' ' '\n' | sort | uniq -c | awk '{print "LOG_ISSUE_COUNT["$2"]="$1}')
# Compare each log type's instances of inequality to the total number of
# instances of each log. If they are equal, this indicates that there may be
# a log-type related issue.
#
# For example, if count_occurrences_of_each_log_file identified that there
# were 10 instances of http logs across all of the `results.csv` files,
# ${OVERALL_LOG_CARDINALITY[http]} should equal 10. If check_for_unequal_log_counts
# independently found 10 instances where the http bro and kafka log counts
# from the `results.csv` files were not equal, ${LOG_ISSUE_COUNT[http]}
# would also have 10 entries, causing us to warn the user of that insight.
for KEY in "${!LOG_ISSUE_COUNT[@]}"; do
if [[ "${LOG_ISSUE_COUNT[${KEY}]}" == "${OVERALL_LOG_CARDINALITY[${KEY}]}" ]]; then
_echo WARN "None of the ${KEY} log counts were the same between bro and kafka. This may indicate an issue specific to that log."
fi
done
}
## Main
# Move over to the docker area
cd "${TEST_DIRECTORY}" || exit 1
# Get a list of results files
RESULTS_FILES=$(find "${TEST_DIRECTORY}" -name "results.csv")
# Analyze each results file for issues
for file in $RESULTS_FILES; do
# Capture the first column (the log names) of the provided file's contents in
# the array LOG_NAMES, excluding the header
mapfile -s 1 -t LOG_NAMES < <(awk -F\, '{print $1}' "${file}")
count_occurrences_of_each_log_file
check_for_unequal_log_counts "${file}"
done
if [[ "${#LOGS_WITH_UNEQUAL_RESULTS[@]}" -gt 0 ]]; then
_echo ERROR "UNEQUALITY FOUND IN BRO AND KAFKA LOG COUNTS"
echo ""
print_unequal_results
print_log_comparison_insights
exit 1
fi