METRON-1991 Bro plugin docker scripts should exit nonzero when bro and kafka counts differ (JonZeolla via ottobackwards) closes apache/metron-bro-plugin-kafka#29
diff --git a/docker/README.md b/docker/README.md
index 8e4d3fa..3bae93b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -61,6 +61,7 @@
#### Scripts executed on the host to setup and interact with the docker containers
```bash
+├── analyze_results.sh
├── build_container.sh
├── cleanup_docker.sh
├── create_docker_network.sh
@@ -83,6 +84,11 @@
└── stop_container.sh
```
+- `analyze_results.sh`: Analyzes the `results.csv` files for any issues
+ ###### Parameters
+ ```bash
+ --test-directory [REQUIRED] The directory for the tests
+ ```
- `build_container.sh`: Runs docker build in the passed directory, and names the results
###### Parameters
```bash
@@ -191,12 +197,12 @@
```bash
--data-path [REQUIRED] The pcap data path
```
-- `print_results.sh` : Prints the `results.csv` for all the pcaps processed in the given directory to console
+- `print_results.sh`: Prints the `results.csv` for all the pcaps processed in the given directory to console
###### Parameters
```bash
--test-directory [REQUIRED] The directory for the tests
```
-- `split_kafka_output_by_log.sh` : For a pcap result directory, will create a LOG.kafka.log for each LOG.log's entry in the kafka-output.log
+- `split_kafka_output_by_log.sh`: For a pcap result directory, will create a LOG.kafka.log for each LOG.log's entry in the kafka-output.log
###### Parameters
```bash
--log-directory [REQUIRED] The directory with the logs
diff --git a/docker/run_end_to_end.sh b/docker/run_end_to_end.sh
index 6baf679..ae06715 100755
--- a/docker/run_end_to_end.sh
+++ b/docker/run_end_to_end.sh
@@ -182,22 +182,34 @@
echo "OFFSET------------------> ${OFFSET}"
bash "${SCRIPT_DIR}"/docker_execute_process_data_file.sh --pcap-file-name="${BASE_FILE_NAME}" --output-directory-name="${DOCKER_DIRECTORY_NAME}"
-
rc=$?; if [[ ${rc} != 0 ]]; then
echo "ERROR> FAILED TO PROCESS ${file} DATA. CHECK LOGS, please run the finish_end_to_end.sh when you are done."
exit ${rc}
fi
+
KAFKA_OUTPUT_FILE="${TEST_OUTPUT_PATH}/${DOCKER_DIRECTORY_NAME}/kafka-output.log"
bash "${SCRIPT_DIR}"/docker_run_consume_bro_kafka.sh --offset=$OFFSET | "${ROOT_DIR}"/remove_timeout_message.sh | tee "${KAFKA_OUTPUT_FILE}"
-
rc=$?; if [[ ${rc} != 0 ]]; then
echo "ERROR> FAILED TO PROCESS ${DATA_PATH} DATA. CHECK LOGS"
fi
"${SCRIPT_DIR}"/split_kakfa_output_by_log.sh --log-directory="${TEST_OUTPUT_PATH}/${DOCKER_DIRECTORY_NAME}"
+ rc=$?; if [[ ${rc} != 0 ]]; then
+ echo "ERROR> ISSUE ENCOUNTERED WHEN SPLITTING KAFKA OUTPUT LOGS"
+ fi
done
"${SCRIPT_DIR}"/print_results.sh --test-directory="${TEST_OUTPUT_PATH}"
+rc=$?; if [[ ${rc} != 0 ]]; then
+ echo "ERROR> ISSUE ENCOUNTERED WHEN PRINTING RESULTS"
+ exit ${rc}
+fi
+
+"${SCRIPT_DIR}"/analyze_results.sh --test-directory="${TEST_OUTPUT_PATH}"
+rc=$?; if [[ ${rc} != 0 ]]; then
+ echo "ERROR> ISSUE ENCOUNTERED WHEN ANALYZING RESULTS"
+ exit ${rc}
+fi
echo ""
echo "Run complete"
diff --git a/docker/scripts/analyze_results.sh b/docker/scripts/analyze_results.sh
new file mode 100755
index 0000000..790ec18
--- /dev/null
+++ b/docker/scripts/analyze_results.sh
@@ -0,0 +1,207 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+shopt -s nocasematch
+#set -u # nounset disabled
+set -e # errexit
+set -E # errtrap
+set -o pipefail
+
+#
+# Analyzes the results.csv files to identify issues
+#
+
+function help {
+ echo " "
+ echo "usage: ${0}"
+ echo " --test-directory [REQUIRED] The directory for the tests"
+ echo " -h/--help Usage information."
+ echo " "
+ echo " "
+}
+
+function _echo() {
+ color="txt${1:-DEFAULT}"
+ case "${1}" in
+ ERROR)
+ >&2 echo -e "${!color}${1}> ${2}${txtDEFAULT}"
+ ;;
+ WARN)
+ echo -e "${!color}${1}> ${2}${txtDEFAULT}"
+ ;;
+ *)
+ echo -e "${!color}${1}> ${2}${txtDEFAULT}"
+ ;;
+ esac
+}
+
+SCRIPT_NAME=$(basename -- "$0")
+TEST_DIRECTORY=
+declare -A LOGS_WITH_UNEQUAL_RESULTS
+declare -a LOG_NAMES
+declare -A OVERALL_LOG_CARDINALITY
+declare -A LOG_ISSUE_COUNT
+declare -r txtDEFAULT='\033[0m'
+# shellcheck disable=SC2034
+declare -r txtERROR='\033[0;31m'
+# shellcheck disable=SC2034
+declare -r txtWARN='\033[0;33m'
+
+# Handle command line options
+for i in "$@"; do
+ case $i in
+ #
+ # TEST_DIRECTORY
+ #
+ # --test-directory
+ #
+ --test-directory=*)
+ TEST_DIRECTORY="${i#*=}"
+ shift # past argument=value
+ ;;
+
+ #
+ # -h/--help
+ #
+ -h | --help)
+ help
+ exit 0
+ shift # past argument with no value
+ ;;
+
+ #
+ # Unknown option
+ #
+ *)
+ UNKNOWN_OPTION="${i#*=}"
+ _echo ERROR "unknown option: $UNKNOWN_OPTION"
+ help
+ ;;
+ esac
+done
+
+if [[ -z "$TEST_DIRECTORY" ]]; then
+ echo "$TEST_DIRECTORY must be passed"
+ exit 1
+fi
+
+echo "Running ${SCRIPT_NAME} with"
+echo "TEST_DIRECTORY = $TEST_DIRECTORY"
+echo "==================================================="
+
+## Main functions
+function count_occurrences_of_each_log_file
+{
+ # Count the number of occurences of each log name
+ for LOG_NAME in "${LOG_NAMES[@]}"; do
+ (( ++OVERALL_LOG_CARDINALITY["${LOG_NAME}"] ))
+ done
+}
+
+function check_for_unequal_log_counts
+{
+ RESULTS_FILE="${1}"
+
+ # Get the pcap folder name from the provided file
+ # shellcheck disable=SC2001
+ PCAP_FOLDER="$( cd "$( dirname "${RESULTS_FILE}" )" >/dev/null 2>&1 && echo "${PWD##*/}")"
+
+ # Check each log line in the provided log file for unequal results
+ for LOG_NAME in "${LOG_NAMES[@]}"; do
+ # For each log in the provided results, identify any unequal log counts
+ UNEQUAL_LOG=$(awk -F\, -v log_name="${LOG_NAME}" '$1 == log_name && $2 != $3 {print $1}' "${RESULTS_FILE}")
+
+ # Create a space separated list of unequal logs to simulate a
+ # multidimensional array
+ if [[ -n "${UNEQUAL_LOG}" ]]; then
+ if [[ "${#LOGS_WITH_UNEQUAL_RESULTS[${PCAP_FOLDER}]}" -eq 0 ]]; then
+ LOGS_WITH_UNEQUAL_RESULTS["${PCAP_FOLDER}"]="${UNEQUAL_LOG}"
+ else
+ LOGS_WITH_UNEQUAL_RESULTS["${PCAP_FOLDER}"]+=" ${UNEQUAL_LOG}"
+ fi
+ fi
+ done
+}
+
+function print_unequal_results
+{
+ # Output a table with the pcap file and log name details where the imbalance
+ # was detected
+ {
+ echo "PCAP FOLDER,LOG NAME"
+
+ for KEY in "${!LOGS_WITH_UNEQUAL_RESULTS[@]}"; do
+ # This must be done because we are simulating multidimensional arrays due to
+ # the lack of native bash support
+ for VALUE in ${LOGS_WITH_UNEQUAL_RESULTS[${KEY}]}; do
+ echo "${KEY},${VALUE}"
+ done
+ done
+ } | column -t -s ','
+}
+
+function print_log_comparison_insights
+{
+ # Load the log to instance count mapping from LOGS_WITH_UNEQUAL_RESULTS into a new
+ # associative array
+ # shellcheck disable=SC2046
+ declare -A $(echo "${LOGS_WITH_UNEQUAL_RESULTS[@]}" | tr ' ' '\n' | sort | uniq -c | awk '{print "LOG_ISSUE_COUNT["$2"]="$1}')
+
+ # Compare each log type's instances of inequality to the total number of
+ # instances of each log. If they are equal, this indicates that there may be
+ # a log-type related issue.
+ #
+ # For example, if count_occurrences_of_each_log_file identified that there
+ # were 10 instances of http logs across all of the `results.csv` files,
+ # ${OVERALL_LOG_CARDINALITY[http]} should equal 10. If check_for_unequal_log_counts
+ # independently found 10 instances where the http bro and kafka log counts
+ # from the `results.csv` files were not equal, ${LOG_ISSUE_COUNT[http]}
+ # would also have 10 entries, causing us to warn the user of that insight.
+ for KEY in "${!LOG_ISSUE_COUNT[@]}"; do
+ if [[ "${LOG_ISSUE_COUNT[${KEY}]}" == "${OVERALL_LOG_CARDINALITY[${KEY}]}" ]]; then
+ _echo WARN "None of the ${KEY} log counts were the same between bro and kafka. This may indicate an issue specific to that log."
+ fi
+ done
+}
+
+## Main
+# Move over to the docker area
+cd "${TEST_DIRECTORY}" || exit 1
+# Get a list of results files
+RESULTS_FILES=$(find "${TEST_DIRECTORY}" -name "results.csv")
+# Analyze each results file for issues
+for file in $RESULTS_FILES; do
+ # Capture the first column (the log names) of the provided file's contents in
+ # the array LOG_NAMES, excluding the header
+ mapfile -s 1 -t LOG_NAMES < <(awk -F\, '{print $1}' "${file}")
+
+ count_occurrences_of_each_log_file
+ check_for_unequal_log_counts "${file}"
+done
+
+if [[ "${#LOGS_WITH_UNEQUAL_RESULTS[@]}" -gt 0 ]]; then
+ _echo ERROR "UNEQUALITY FOUND IN BRO AND KAFKA LOG COUNTS"
+ echo ""
+
+ print_unequal_results
+ print_log_comparison_insights
+
+ exit 1
+fi
+
diff --git a/docker/scripts/build_container.sh b/docker/scripts/build_container.sh
index d4e5dca..40810db 100755
--- a/docker/scripts/build_container.sh
+++ b/docker/scripts/build_container.sh
@@ -22,6 +22,7 @@
set -e # errexit
set -E # errtrap
set -o pipefail
+
#
# Runs docker build in a provided directory, with a provided name
#
@@ -36,6 +37,7 @@
echo " "
}
+SCRIPT_NAME=$(basename -- "$0")
CONTAINER_DIRECTORY=
CONTAINER_NAME=
@@ -92,7 +94,7 @@
exit 1
fi
-echo "Running with "
+echo "Running ${SCRIPT_NAME} with"
echo "CONTAINER_DIRECTORY = $CONTAINER_DIRECTORY"
echo "CONTAINER_NAME = $CONTAINER_NAME"
echo "==================================================="
diff --git a/docker/scripts/print_results.sh b/docker/scripts/print_results.sh
index ecc67ca..6e107c7 100755
--- a/docker/scripts/print_results.sh
+++ b/docker/scripts/print_results.sh
@@ -31,11 +31,12 @@
echo " "
echo "usage: ${0}"
echo " --test-directory [REQUIRED] The directory for the tests"
- echo " -h/--help Usage information."
+ echo " -h/--help Usage information."
echo " "
echo " "
}
+SCRIPT_NAME=$(basename -- "$0")
TEST_DIRECTORY=
# Handle command line options
@@ -77,7 +78,7 @@
fi
-echo "Running with "
+echo "Running ${SCRIPT_NAME} with"
echo "TEST_DIRECTORY = $TEST_DIRECTORY"
echo "==================================================="
diff --git a/docker/scripts/split_kakfa_output_by_log.sh b/docker/scripts/split_kakfa_output_by_log.sh
index 74d55e3..61e53e4 100755
--- a/docker/scripts/split_kakfa_output_by_log.sh
+++ b/docker/scripts/split_kakfa_output_by_log.sh
@@ -37,6 +37,7 @@
echo " "
}
+SCRIPT_NAME=$(basename -- "$0")
LOG_DIRECTORY=
# Handle command line options
@@ -77,7 +78,7 @@
exit 1
fi
-echo "Running with "
+echo "Running ${SCRIPT_NAME} with"
echo "$LOG_DIRECTORY = $LOG_DIRECTORY"
echo "==================================================="