Merge pull request #144 from curtishoward/SPOT-181_envelope_ingest
[SPOT-181] spot-ingest for ODM with config-driven Spark streaming (Envelope)
diff --git a/spot-ml/ml_ops.sh b/spot-ml/ml_ops.sh
index 977a58e..90cddb3 100755
--- a/spot-ml/ml_ops.sh
+++ b/spot-ml/ml_ops.sh
@@ -29,11 +29,11 @@
if [[ "${#FDATE}" != "8" || -z "${DSOURCE}" ]]; then
echo "ml_ops.sh syntax error"
echo "Please run ml_ops.sh again with the correct syntax:"
- echo "./ml_ops.sh YYYYMMDD TYPE [MAX RESULTS] [TOL]"
+ echo "./ml_ops.sh YYYYMMDD TYPE [TOL] [MAX RESULTS]"
echo "for example:"
- echo "./ml_ops.sh 20160122 dns 1000 1e-6"
+ echo "./ml_ops.sh 20160122 dns 1e-6 1000"
echo "./ml_ops.sh 20160122 flow"
- echo "./ml_ops.sh 20160122 proxy 100"
+ echo "./ml_ops.sh 20160122 proxy 1"
exit
fi
diff --git a/spot-setup/odm/create_event_avro.sql b/spot-setup/odm/create_event_avro.sql
index 16945cf..34941ca 100644
--- a/spot-setup/odm/create_event_avro.sql
+++ b/spot-setup/odm/create_event_avro.sql
@@ -18,10 +18,10 @@
CREATE EXTERNAL TABLE IF NOT EXISTS ${VAR:ODM_DBNAME}.${VAR:ODM_TABLENAME} (
-- Common
event_time bigint,
-begintime bigint,
-endtime bigint,
-event_insertime bigint,
-lastupdatetime bigint,
+begin_time bigint,
+end_time bigint,
+event_insert_time bigint,
+last_update_time bigint,
duration float,
event_id string,
name string,
@@ -39,8 +39,8 @@
query string,
service string,
state string,
-in_bytes int,
-out_bytes int,
+in_bytes bigint,
+out_bytes bigint,
xref string,
version string,
api string,
@@ -55,55 +55,62 @@
count int,
company string,
additional_attrs map<string,string>,
-totrust string,
-fromtrust string,
+to_trust string,
+from_trust string,
rule string,
threat string,
pcap_id int,
+session_id string,
+length int,
-- Device
dvc_time bigint,
dvc_ip4 bigint,
dvc_ip4_str string,
-dvc_ip6 bigint,
+dvc_ip6 string,
dvc_ip6_str string,
dvc_host string,
+dvc_mac string,
dvc_domain string,
dvc_type string,
dvc_vendor string,
dvc_fwd_ip4 bigint,
dvc_fwd_ip4_str string,
-dvc_fwd_ip6 bigint,
+dvc_fwd_ip6 string,
dvc_fwd_ip6_str string,
dvc_version string,
-- Network
src_ip4 bigint,
src_ip4_str string,
-src_ip6 bigint,
+src_ip6 string,
src_ip6_str string,
src_host string,
+src_mac string,
src_domain string,
src_port int,
src_country_code string,
src_country_name string,
src_region string,
src_city string,
-src_lat int,
-src_long int,
+src_lat float,
+src_long float,
+src_asn bigint,
+src_tos string,
dst_ip4 bigint,
dst_ip4_str string,
-dst_ip6 bigint,
+dst_ip6 string,
dst_ip6_str string,
dst_host string,
+dst_mac string,
dst_domain string,
dst_port int,
dst_country_code string,
dst_country_name string,
dst_region string,
dst_city string,
-dst_lat int,
-dst_long int,
-src_asn int,
-dst_asn int,
+dst_lat float,
+dst_long float,
+dst_asn bigint,
+dst_tos string,
net_direction string,
net_flags string,
-- File
@@ -112,10 +119,11 @@
file_atime bigint,
file_acls string,
file_type string,
-file_size int,
+file_size bigint,
file_desc string,
file_hash string,
file_hash_type string,
+file_uid string,
-- Endpoint
end_object string,
end_action string,
@@ -163,7 +171,7 @@
http_response_body_len int,
http_response_info_code int,
http_response_info_msg string,
-http_response_resp_fuids string,
+http_response_resp_fuids array<string>,
http_response_mime_types string,
http_response_headers map<string,string>,
-- SMTP
@@ -193,7 +201,7 @@
ftp_command string,
ftp_arg string,
ftp_mime_type string,
-ftp_file_size int,
+ftp_file_size bigint,
ftp_reply_code int,
ftp_reply_msg string,
ftp_data_channel_passive boolean,
@@ -224,8 +232,8 @@
tls_resumed boolean,
tls_next_protocol string,
tls_established boolean,
-tls_cert_chain_fuids string,
-tls_client_cert_chain_fuids string,
+tls_cert_chain_fuids array<string>,
+tls_client_cert_chain_fuids array<string>,
tls_subject string,
tls_issuer string,
-- SSH
@@ -250,14 +258,17 @@
irc_value string,
irc_additional_data string,
-- Flow
-flow_in_packets int,
-flow_out_packets int,
+flow_in_packets bigint,
+flow_out_packets bigint,
flow_conn_state string,
flow_history string,
flow_src_dscp string,
flow_dst_dscp string,
flow_input string,
flow_output string,
+flow_fwd_status string,
+flow_snmp_in string,
+flow_snmp_out string,
-- Vulnerability
vuln_id string,
vuln_type string,
@@ -291,7 +302,19 @@
av_signaturesubid string,
av_intrusionurl string,
av_intrusionpayloadurl string,
-objectname string)
+objectname string,
+-- Agent
+agent_severity string,
+agent_mac string,
+agent_time bigint,
+agent_id string,
+agent_description string,
+agent_type string,
+agent_ip4 bigint,
+agent_ip4_str string,
+agent_ip6 string,
+agent_ip6_str string,
+agent_host string)
PARTITIONED BY (
`p_dvc_vendor` string, -- i.e. Windows, PAN, Fireeye
`p_dvc_type` string, -- i.e. Unix, Sonicwall, Windows
@@ -299,4 +322,4 @@
)
STORED AS AVRO
LOCATION '${VAR:ODM_LOCATION}'
-TBLPROPERTIES ('avro.schema.url'='${VAR:ODM_AVRO_URL}');
\ No newline at end of file
+TBLPROPERTIES ('avro.schema.url'='${VAR:ODM_AVRO_URL}');
diff --git a/spot-setup/odm/create_event_pqt.sql b/spot-setup/odm/create_event_pqt.sql
index 5a2322d..908bf93 100644
--- a/spot-setup/odm/create_event_pqt.sql
+++ b/spot-setup/odm/create_event_pqt.sql
@@ -55,55 +55,62 @@
count int,
company string,
additional_attrs map<string,string>,
-totrust string,
-fromtrust string,
+to_trust string,
+from_trust string,
rule string,
threat string,
pcap_id int,
+session_id string,
+length int,
-- Device
dvc_time bigint,
dvc_ip4 bigint,
dvc_ip4_str string,
-dvc_ip6 bigint,
+dvc_ip6 string,
dvc_ip6_str string,
dvc_host string,
+dvc_mac string,
dvc_domain string,
dvc_type string,
dvc_vendor string,
dvc_fwd_ip4 bigint,
-dvc_fwd_ip4_str string,
-dvc_fwd_ip6 bigint,
-dvc_fwd_ip6_str string,
+dvc_fwd_ip4_str string,
+dvc_fwd_ip6 string,
+dvc_fwd_ip6_str string,
dvc_version string,
-- Network
src_ip4 bigint,
src_ip4_str string,
-src_ip6 bigint,
+src_ip6 string,
src_ip6_str string,
src_host string,
+src_mac string,
src_domain string,
src_port int,
src_country_code string,
src_country_name string,
src_region string,
src_city string,
-src_lat int,
-src_long int,
+src_lat float,
+src_long float,
+src_asn bigint,
+src_tos string,
dst_ip4 bigint,
dst_ip4_str string,
-dst_ip6 bigint,
+dst_ip6 string,
dst_ip6_str string,
dst_host string,
+dst_mac string,
dst_domain string,
dst_port int,
dst_country_code string,
dst_country_name string,
dst_region string,
dst_city string,
-dst_lat int,
-dst_long int,
-src_asn int,
-dst_asn int,
+dst_lat float,
+dst_long float,
+dst_asn bigint,
+dst_tos string,
net_direction string,
net_flags string,
-- File
@@ -112,10 +119,11 @@
file_atime bigint,
file_acls string,
file_type string,
-file_size int,
+file_size bigint,
file_desc string,
file_hash string,
file_hash_type string,
+file_uid string,
-- Endpoint
end_object string,
end_action string,
@@ -163,7 +171,7 @@
http_response_body_len int,
http_response_info_code int,
http_response_info_msg string,
-http_response_resp_fuids string,
+http_response_resp_fuids array<string>,
http_response_mime_types string,
http_response_headers map<string,string>,
-- SMTP
@@ -193,7 +201,7 @@
ftp_command string,
ftp_arg string,
ftp_mime_type string,
-ftp_file_size int,
+ftp_file_size bigint,
ftp_reply_code int,
ftp_reply_msg string,
ftp_data_channel_passive boolean,
@@ -224,8 +232,8 @@
tls_resumed boolean,
tls_next_protocol string,
tls_established boolean,
-tls_cert_chain_fuids string,
-tls_client_cert_chain_fuids string,
+tls_cert_chain_fuids array<string>,
+tls_client_cert_chain_fuids array<string>,
tls_subject string,
tls_issuer string,
-- SSH
@@ -258,6 +266,9 @@
flow_dst_dscp string,
flow_input string,
flow_output string,
+flow_fwd_status string,
+flow_snmp_in string,
+flow_snmp_out string,
-- Vulnerability
vuln_id string,
vuln_type string,
@@ -291,11 +302,23 @@
av_signaturesubid string,
av_intrusionurl string,
av_intrusionpayloadurl string,
-objectname string)
+objectname string,
+-- Agent
+agent_severity string,
+agent_mac string,
+agent_time bigint,
+agent_id string,
+agent_description string,
+agent_type string,
+agent_ip4 bigint,
+agent_ip4_str string,
+agent_ip6 string,
+agent_ip6_str string,
+agent_host string)
PARTITIONED BY (
`p_dvc_vendor` string, -- i.e. Windows, PAN, Fireeye
`p_dvc_type` string, -- i.e. Unix, Sonicwall, Windows
`p_dt` string -- i.e. 2017-01-01
)
STORED AS PARQUET
-LOCATION '${VAR:ODM_LOCATION}';
\ No newline at end of file
+LOCATION '${VAR:ODM_LOCATION}';
diff --git a/spot-setup/odm/event.avsc b/spot-setup/odm/event.avsc
index 7765d67..f4a0873 100644
--- a/spot-setup/odm/event.avsc
+++ b/spot-setup/odm/event.avsc
@@ -41,54 +41,61 @@
{"name":"sensitivity", "type":["null","string"],"doc":"Sensitivity label", "default": null},
{"name":"count", "type":["null","int"],"doc":"Generic count", "default": null},
{"name":"company", "type":["null","string"],"doc":"Company label", "default": null},
- {"name":"additional_attrs","type":["null", {"type": "map", "values": "string"}],"default":null, "doc":"Additional attributes of the event"},
- {"name":"totrust", "type":["null","string"],"doc":"TBD", "default": null},
- {"name":"fromtrust", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"additional_attrs","type":["null", "string"],"default":null, "doc":"Additional attributes of the event"},
+ {"name":"to_trust", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"from_trust", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"rule", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"threat", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"pcap_id", "type":["null","int"],"doc":"TBD", "default": null},
+ {"name":"session_id", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"length", "type":["null","int"],"doc":"TBD", "default": null},
{"name":"dvc_time", "type":["null","long"],"doc":"UTC timestamp from device where event/alert originates or is received", "default": null},
{"name":"dvc_ip4", "type":["null","long"],"doc":"IP address of device", "default": null},
{"name":"dvc_ip4_str", "type":["null","string"],"doc":"IP address of device", "default": null},
- {"name":"dvc_ip6", "type":["null","long"],"doc":"IP address of device", "default": null},
+ {"name":"dvc_ip6", "type":["null","string"],"doc":"IP address of device", "default": null},
{"name":"dvc_ip6_str", "type":["null","string"],"doc":"IP address of device", "default": null},
{"name":"dvc_host", "type":["null","string"],"doc":"Hostname of device", "default": null},
+ {"name":"dvc_mac", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"dvc_domain", "type":["null","string"],"doc":"Domain of device", "default": null},
{"name":"dvc_type", "type":["null","string"],"doc":"Device type that generated the log", "default": null},
{"name":"dvc_vendor", "type":["null","string"],"doc":"Vendor", "default": null},
{"name":"dvc_fwd_ip4", "type":["null","long"],"doc":"Forwarded from device", "default": null},
{"name":"dvc_fwd_ip4_str", "type":["null","string"],"doc":"Forwarded from device", "default": null},
- {"name":"dvc_fwd_ip6", "type":["null","long"],"doc":"Forwarded from device", "default": null},
+ {"name":"dvc_fwd_ip6", "type":["null","string"],"doc":"Forwarded from device", "default": null},
{"name":"dvc_fwd_ip6_str", "type":["null","string"],"doc":"Forwarded from device", "default": null},
{"name":"dvc_version", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"src_ip4", "type":["null","long"],"doc":"Source ip address of event", "default": null},
{"name":"src_ip4_str", "type":["null","string"],"doc":"Source ip address of event", "default": null},
- {"name":"src_ip6", "type":["null","long"],"doc":"Source ip address of event", "default": null},
+ {"name":"src_ip6", "type":["null","string"],"doc":"Source ip address of event", "default": null},
{"name":"src_ip6_str", "type":["null","string"],"doc":"Source ip address of event", "default": null},
{"name":"src_host", "type":["null","string"],"doc":"Source FQDN of event", "default": null},
+ {"name":"src_mac", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"src_domain", "type":["null","string"],"doc":"Domain name of source address", "default": null},
{"name":"src_port", "type":["null","int"],"doc":"Source port of event", "default": null},
{"name":"src_country_code", "type":["null","string"],"doc":"Source country code", "default": null},
{"name":"src_country_name", "type":["null","string"],"doc":"Source country name", "default": null},
{"name":"src_region", "type":["null","string"],"doc":"Source region", "default": null},
{"name":"src_city", "type":["null","string"],"doc":"Source city", "default": null},
- {"name":"src_lat", "type":["null","int"],"doc":"Source latitude", "default": null},
- {"name":"src_long", "type":["null","int"],"doc":"Source longitude", "default": null},
+ {"name":"src_lat", "type":["null","float"],"doc":"Source latitude", "default": null},
+ {"name":"src_long", "type":["null","float"],"doc":"Source longitude", "default": null},
+ {"name":"src_asn", "type":["null","long"],"doc":"Autonomous system number", "default": null},
+ {"name":"src_tos", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"dst_ip4", "type":["null","long"],"doc":"Destination ip address of event", "default": null},
{"name":"dst_ip4_str", "type":["null","string"],"doc":"Destination ip address of event", "default": null},
- {"name":"dst_ip6", "type":["null","long"],"doc":"Destination ip address of event", "default": null},
+ {"name":"dst_ip6", "type":["null","string"],"doc":"Destination ip address of event", "default": null},
{"name":"dst_ip6_str", "type":["null","string"],"doc":"Destination ip address of event", "default": null},
{"name":"dst_host", "type":["null","string"],"doc":"Destination FQDN of event", "default": null},
+ {"name":"dst_mac", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"dst_domain", "type":["null","string"],"doc":"Domain name of destination address", "default": null},
{"name":"dst_port", "type":["null","int"],"doc":"Destination port of event", "default": null},
{"name":"dst_country_code", "type":["null","string"],"doc":"Source country code", "default": null},
{"name":"dst_country_name", "type":["null","string"],"doc":"Source country name", "default": null},
{"name":"dst_region", "type":["null","string"],"doc":"Source region", "default": null},
{"name":"dst_city", "type":["null","string"],"doc":"Source city", "default": null},
- {"name":"dst_lat", "type":["null","int"],"doc":"Source latitude", "default": null},
- {"name":"dst_long", "type":["null","int"],"doc":"Source longitude", "default": null},
- {"name":"src_asn", "type":["null","int"],"doc":"Autonomous system number", "default": null},
- {"name":"dst_asn", "type":["null","int"],"doc":"Autonomous system number", "default": null},
+ {"name":"dst_lat", "type":["null","float"],"doc":"Source latitude", "default": null},
+ {"name":"dst_long", "type":["null","float"],"doc":"Source longitude", "default": null},
+ {"name":"dst_asn", "type":["null","long"],"doc":"Autonomous system number", "default": null},
+ {"name":"dst_tos", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"net_direction", "type":["null","string"],"doc":"Direction", "default": null},
{"name":"net_flags", "type":["null","string"],"doc":"TCP flags", "default": null},
{"name":"file_name", "type":["null","string"],"doc":"Filename from event", "default": null},
@@ -96,10 +103,11 @@
{"name":"file_atime", "type":["null","long"],"doc":"Timestamp (UTC) of file access", "default": null},
{"name":"file_acls", "type":["null","string"],"doc":"File permissions", "default": null},
{"name":"file_type", "type":["null","string"],"doc":"Type of file", "default": null},
- {"name":"file_size", "type":["null","int"],"doc":"Size of file in bytes", "default": null},
+ {"name":"file_size", "type":["null","long"],"doc":"Size of file in bytes", "default": null},
{"name":"file_desc", "type":["null","string"],"doc":"Description of file", "default": null},
{"name":"file_hash", "type":["null","string"],"doc":"Hash of file", "default": null},
{"name":"file_hash_type", "type":["null","string"],"doc":"Type of hash", "default": null},
+ {"name":"file_uid", "type":["null","string"],"doc":"File identifier", "default": null},
{"name":"end_object", "type":["null","string"],"doc":"File/Process/ Registry", "default": null},
{"name":"end_action", "type":["null","string"],"doc":"Action taken on object (open/delete/ edit)", "default": null},
{"name":"end_msg", "type":["null","string"],"doc":"Message (details of action taken on object)", "default": null},
@@ -170,7 +178,7 @@
{"name":"ftp_command", "type":["null","string"],"doc":"FTP command", "default": null},
{"name":"ftp_arg", "type":["null","string"],"doc":"Argument", "default": null},
{"name":"ftp_mime_type", "type":["null","string"],"doc":"Mime type", "default": null},
- {"name":"ftp_file_size", "type":["null","int"],"doc":"File size", "default": null},
+ {"name":"ftp_file_size", "type":["null","long"],"doc":"File size", "default": null},
{"name":"ftp_reply_code", "type":["null","int"],"doc":"Reply code", "default": null},
{"name":"ftp_reply_msg", "type":["null","string"],"doc":"Reply message", "default": null},
{"name":"ftp_data_channel_passive", "type":["null","boolean"],"doc":"Passive data channel?", "default": null},
@@ -229,6 +237,9 @@
{"name":"flow_dst_dscp", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"flow_input", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"flow_output", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"flow_fwd_status", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"flow_snmp_in", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"flow_snmp_out", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"vuln_id", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"vuln_type", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"vuln_status", "type":["null","string"],"doc":"TBD", "default": null},
@@ -260,7 +271,18 @@
{"name":"av_signaturesubid", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"av_intrusionurl", "type":["null","string"],"doc":"TBD", "default": null},
{"name":"av_intrusionpayloadurl", "type":["null","string"],"doc":"TBD", "default": null},
- {"name":"objectname", "type":["null","string"],"doc":"TBD", "default": null}
+ {"name":"objectname", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_severity", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_mac", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_time", "type":["null","long"],"doc":"TBD", "default": null},
+ {"name":"agent_id", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_description", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_type", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_ip4", "type":["null","long"],"doc":"TBD", "default": null},
+ {"name":"agent_ip4_str", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_ip6", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_ip6_str", "type":["null","string"],"doc":"TBD", "default": null},
+ {"name":"agent_host", "type":["null","string"],"doc":"TBD", "default": null}
],
"doc": "A view schema for storing Apache Spot Event data."
}
diff --git a/spot-setup/odm/odm_setup.sh b/spot-setup/odm/odm_setup.sh
index 7fc10bd..7a48149 100755
--- a/spot-setup/odm/odm_setup.sh
+++ b/spot-setup/odm/odm_setup.sh
@@ -92,7 +92,7 @@
# Check the format argument and make sure its supported
if [ "$format" != "pqt" ] && [ "$format" != "avro" ] ; then
- log "Format argument '$format' is not supported. Only Parquet and Avro are supported data storage formats. Use 'pqt' or 'avro' instead (i.e. ./odm_setup pqt)."
+ log "Format argument '$format' is not supported. Only Parquet and Avro are supported data storage formats. Use 'pqt' or 'avro' instead (i.e. ./odm_setup -f pqt)."
exit 1
fi
@@ -194,4 +194,4 @@
${impala_db_shell} --var=ODM_DBNAME=${DBNAME} --var=ODM_TABLENAME=${f} --var=ODM_LOCATION=${HUSER}/${d}/${f} --var=ODM_AVRO_URL=hdfs://${HUSER}/${d}/schema/${f}.avsc -c -f ${ODM_FILES_DIR}/create_${f}_avro.sql
fi
done
-done
\ No newline at end of file
+done