Merge pull request #143 from natedogs911/config_clean

clean config options via configurator.py
diff --git a/README.md b/README.md
index 13888ea..27a85d1 100644
--- a/README.md
+++ b/README.md
@@ -71,8 +71,8 @@
 
 Our Central repository for our Apache Spot solution is found here. If you find a bug, have question or something to discuss please contact us:
 
-* [Create an Issue](https://issues.apache.org/jira/browse/SPOT-20?jql=project%20%3D%20SPOT). 
-* [Go to our Slack channel](https://apachespot.slack.com/messages/general/). 
+* [Create an Issue](https://issues.apache.org/jira/browse/SPOT-20?jql=project%20%3D%20SPOT)
+* [Join the Dev List](mailto:issues-subscribe@spot.incubator.apache.org) and then [send us a message](mailto:dev@spot.incubator.apache.org)
 
 ## **Contributing to Apache Spot**
 
@@ -86,7 +86,7 @@
 * Fork the repo of the module that you wish to commit to.
 * Create a Branch, we use [topic branches](https://git-scm.com/book/en/v2/Git-Branching-Branching-Workflows#Topic-Branches) for our commits. 
 * Push your commit(s) to your repository.
-* Create a pull request to the original repo in Apache Spot organization.
+* Create a pull request to the original repo in Apache Spot organization. *(See Below for Merging details)*
 
 ### **Commit Guidelines**
 
@@ -94,10 +94,17 @@
 * Please be clear with the commit messages about what you are fixing or adding to the code base. If you code is addressing an open issue please add the reference to the issue in the comments with: Fix: Issue's URL. 
 
 
-### **Merge approval**
+### **Merge Process**
 
-Apache Spot maintainers use +1 in a comment on the code review to indicate acceptance, 
-at least 3 "+1" from maintainers are required to approve the merge. If you have any question or concern please feel free to add a comment in your pull request or branch and tag any of the maintainers.
+Thanks for considering to contribute to the Spot Project. In order to help make the process a little eaiser for everyone, please follow these steps.
+1) In order to start the merge process please open a ticket in the [Spot Jira](https://issues.apache.org/jira/projects/SPOT/issues) and take note of the Issue key *(SPOT-###)*. 
+2) Next, open a Pull-Request (PR) and reference the Issue key in the title of the PR. If you have any question or concern please feel free to add a comment in your pull request or branch and tag any of the maintainers.
+3) Now, it's time for the community to provide feedback on your commit. Getting community feedback can be hard, but start by sending a message to the Dev list; and make sure you're [subscribed](mailto:issues-subscribe@spot.incubator.apache.org) to the Dev List.
+4) At the same time, maintainers will be taking a look at your PR. The more community input you can get, in the form of comments rather than +1s, the more attention maintainers will give. 
+There are is a manual and an automatic merge process
+   * **Manual:** When a Project maintainer has given a '+1' in the comments, then you're PR has been accepted. However, it must be manually merged by a maintainer at this point. 
+   * **Automatic:** This process initiates when 3 maintainers provide a '+1'
+   * **Note:** if there are any merge conflicts you will have to come back and fix them before the process can continue.
 
 
 ## **Licensing**
diff --git a/issues-subscribe@spot.incubator.apache.org b/issues-subscribe@spot.incubator.apache.org
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/issues-subscribe@spot.incubator.apache.org
diff --git a/spot-ingest/start_ingest_standalone.sh b/spot-ingest/start_ingest_standalone.sh
index 1a16612..5d7ce36 100755
--- a/spot-ingest/start_ingest_standalone.sh
+++ b/spot-ingest/start_ingest_standalone.sh
@@ -58,7 +58,7 @@
 
 INGEST_DATE=`date +"%H_%M_%S"`
 
-screen -d -m -S SPOT-INGEST-${INGEST_CONF}-${INGEST_DATE}  -s /bin/bash
+screen -d -m -S SPOT-INGEST-${INGEST_CONF}-${INGEST_DATE}  -s `which bash`
 screen -S SPOT-INGEST-${INGEST_CONF}-${INGEST_DATE} -X setenv TZ ${TIME_ZONE}
 screen -dr  SPOT-INGEST-${INGEST_CONF}-${INGEST_DATE} -X screen -t Master sh -c "python master_collector.py -t ${INGEST_CONF} -w ${WORKERS_NUM} -id SPOT-INGEST-${INGEST_CONF}-${INGEST_DATE}; echo 'Closing Master...'; sleep 432000"
 
diff --git a/spot-ml/build.sbt b/spot-ml/build.sbt
index 5ffc520..cc83ed0 100644
--- a/spot-ml/build.sbt
+++ b/spot-ml/build.sbt
@@ -23,9 +23,7 @@
 
 val sparkVersion = "2.1.0"
 
-import sbtassembly.Plugin.AssemblyKeys._
-
-assemblySettings
+baseAssemblySettings
 
 libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion % "provided"
 libraryDependencies += "org.apache.spark" %% "spark-mllib" % sparkVersion
@@ -37,7 +35,7 @@
 
 val meta = """META.INF(.)*""".r
 
-mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) => {
+assemblyMergeStrategy in assembly := {
   case PathList("org", "apache", "commons", xs@_*) => MergeStrategy.last
   case PathList("com", "esotericsoftware", "minlog", xs@_*) => MergeStrategy.last
   case PathList("com", "google", xs@_*) => MergeStrategy.last
@@ -50,7 +48,6 @@
   case meta(_) => MergeStrategy.discard
   case x => MergeStrategy.first
 }
-}
 
 // super important with multiple tests running spark Contexts
 parallelExecution in Test := false
@@ -69,4 +66,4 @@
   }
 }
 
-resourceGenerators in Compile <+= getTop1MFileFromAlexa
\ No newline at end of file
+resourceGenerators in Compile += getTop1MFileFromAlexa
diff --git a/spot-ml/project/plugins.sbt b/spot-ml/project/plugins.sbt
index 55de9d9..30d2f56 100644
--- a/spot-ml/project/plugins.sbt
+++ b/spot-ml/project/plugins.sbt
@@ -15,4 +15,4 @@
  * limitations under the License.
  */
 
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.1")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
diff --git a/spot-ml/src/main/scala/org/apache/spot/dns/DNSSuspiciousConnectsAnalysis.scala b/spot-ml/src/main/scala/org/apache/spot/dns/DNSSuspiciousConnectsAnalysis.scala
index acf8fc6..dd8cb36 100644
--- a/spot-ml/src/main/scala/org/apache/spot/dns/DNSSuspiciousConnectsAnalysis.scala
+++ b/spot-ml/src/main/scala/org/apache/spot/dns/DNSSuspiciousConnectsAnalysis.scala
@@ -31,7 +31,7 @@
 
 /**
   * The suspicious connections analysis of DNS log data develops a probabilistic model the DNS queries
-  * made by each client IP and flags those assigned a low probability as "suspicious"
+  * made by each client IP and flags those assigned a low probability as "suspicious".
   */
 
 object DNSSuspiciousConnectsAnalysis {
@@ -106,9 +106,11 @@
 
 
   /**
-    *
+    * Return all DNS records with valid values.
+    * 
     * @param inputDNSRecords raw DNS records.
     * @return
+    * @see filterInvalidRecords(DataFrame)
     */
   def filterRecords(inputDNSRecords: DataFrame): DataFrame = {
 
@@ -135,9 +137,11 @@
   }
 
   /**
+    * Return all DNS records with invalid values.
     *
     * @param inputDNSRecords raw DNS records.
     * @return
+    * @see filterRecords(DataFrame)
     */
   def filterInvalidRecords(inputDNSRecords: DataFrame): DataFrame = {
 
@@ -164,6 +168,7 @@
   }
 
   /**
+    * Get all DNS records which score below the given threshold.
     *
     * @param scoredDNSRecords scored DNS records.
     * @param threshold        score tolerance.
diff --git a/spot-setup/APACHE-SPOT-SCHEMA.md b/spot-setup/APACHE-SPOT-SCHEMA.md
new file mode 100644
index 0000000..f6ba142
--- /dev/null
+++ b/spot-setup/APACHE-SPOT-SCHEMA.md
@@ -0,0 +1,436 @@
+# Apache Spot Schema 
+
+This document is to centralize a place where users can read information about Proxy, DNS and flow schema. From this document users with their own ingest can implement a module without using spot-ingest, or they can compare them. User can do that creating a data set with the expected columns by pipeline.
+
+- [Proxy](#proxy)
+
+    Attributes and rules.
+    * [Proxy Schema for spot-ingest](#proxy-schema-for-spot-ingest) 
+    * [Proxy Schema for spot-ml](#proxy-schema-for-spot-ml)
+    * [Proxy Schema for spot-oa](#proxy-schema-for-spot-oa)
+    * [Proxy Schema for spot-ui](#proxy-schema-for-spot-ui)
+
+- [Flow (spot-nfdump)](#flow-spot-nfdump)
+
+    Attributes and rules.
+    * [Flow Schema for spot-ingest](#Flow-schema-for-spot-ingest) 
+    * [Flow Schema for spot-ml](#flow-schema-for-spot-ml)
+    * [Flow Schema for spot-oa](#flow-schema-for-spot-oa)
+    * [Flow Schema for spot-ui](#flow-schema-for-spot-ui)
+
+- [DNS](#dns)
+
+    Attributes and rules.
+    * [DNS Schema for spot-ingest](#dns-schema-for-spot-ingest) 
+    * [DNS Schema for spot-ml](#dns-schema-for-spot-ml)
+    * [DNS Schema for spot-oa](#dns-schema-for-spot-oa)
+    * [DNS Schema for spot-ui](#dns-schema-for-spot-ui)
+
+## Proxy
+The table shows the list of attributes used in proxy. The columns indicated with field (:white_check_mark:) are used by the pipeline.  
+
+|Spot Field Name  |Type    |Description                                  |Original Field Name  |Format      |Spot-ingest       |Spot-ml           |Spot-oa           |Spot-ui           | 
+|---------------- |--------|---------------------------------------------|---------------------|------------|------------------|------------------|------------------|------------------|
+| p_date          | string | Date for the connection                     |        date         | yyyy-mm-dd |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| p_time	      | string | Time for the connection	                 |        time	       |  hh:MM:SS  |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| clientip        | string |IP address of the client sending the request |        c-ip	       | ip address	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| host        	  | string |Hostname from the client's request URL	     |       cs-host	   |    text	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| reqmethod	      | string |Request method used from client to appliance (HTTP Method - GET, POST, CONNECT) |	cs-method | 	text |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| useragent	      | string |Browser Type	                             | cs(User-Agent)	   |quoted text	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| resconttype	  | string |Content-type (Ex. text/html, image/xml)	     |rs(Content-Type) 	   | text	    |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| duration	      |  int   |Duration of the connection	                 |time-taken	       |numerical	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| username	      |string  |Client Username	                             |cs-username	       |text	    |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| authgroup   	  |string  |Client Authentication Group	                 |cs-auth-group 	   |text	    |:white_check_mark:|	-	    |     -    |       -  |
+| exceptionid	  |string  |Identifier of the exception resolved (empty if the transaction has not been terminated) |	x-exception-id 	| text	|:white_check_mark:|- | - |     -         |	
+| filterresult    |string  |Content filtering result: Denied, Proxied or Observed | sc-filter-result | text |:white_check_mark:|     -        |        -        |      -   |			
+| webcat	      |string  |All content categories of the request URL	 |cs-categories        |quoted text	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| referer	      |string  |Request header: Referer %S s-sitename The service type used to | cs(Referer) | url |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| respcode	      |string  |Protocol status code from appliance to client (HTTP Response Codes) | sc-status | numerical |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| action	      |string  |What type of action did the Appliance take to process this request; possible values include ALLOWED, DENIED, FAILED, SERVER_ERROR|s-action |text |:white_check_mark:| -| -|- | 			
+| urischeme	      |string  |Scheme of the original URL requested	     |cs-uri-scheme 	   |text	    |:white_check_mark:|      -       |     -           |      -          |		
+| uriport	      |string  |Port from the original URL requested	     |cs-uri-port 	       |numerical	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| uripath	      |string  |Path of the original URL requested without query |cs-uri-path 	   |text	    |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| uriquery	      |string  |Query from the original URL requested	     |cs-uri-query	       |text	    |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| uriextension	  |string  |Document extension from the original URL requested |cs-uri-extension |text	    |:white_check_mark:|      -       |     -           |      -          |		
+| serverip	      |string  |IP address of the appliance on which the client established its connection |s-ip  |ip address |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| scbytes	      |int	   |Number of bytes sent from appliance to client|sc-bytes             |numerical	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| csbytes	      |int	   |Number of bytes sent from client to appliance|cs-bytes 	           |numerical	|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| virusid	      |string  |x-virus-id 	                                 |x-virus-id 	       |text	    |:white_check_mark:|    -         |    -            |       -         |		
+| bcappname	      |string  |x-bluecoat-application-name 	             |x-bluecoat-application-name |quoted text |:white_check_mark:|  -        |  -              |    -            |			
+| bcappoper	      |string  |x-bluecoat-application-operation	         |x-bluecoat-application-operation |quoted text |:white_check_mark:|-     |   -             |    -            |			
+|fulluri	      |string  |Full URI concatenated from cs-host, cs-uri-path, cs-uri-query fields |it does not exist, it is calculated during ingest |text |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| word 	          |string  |      -                					     |           -          |   -       |     -            |  -               |:white_check_mark:|      -          |
+| ml_score	      |float   |				-	                         |          -           |      -    |        -         |   -              |:white_check_mark:|  -              |
+| respcode_name   |string  |IANA translation for the response code column|                -     |     -     |      -           |       -          |:white_check_mark:|:white_check_mark:|
+| uri_rep	      |string  |Reputation value according to Threat intelligence services| 	-   |	-		|        -         |  -               |:white_check_mark:|:white_check_mark:|
+| network_context |string  |User defined value					         |              -       |     -     |       -          |     -            |:white_check_mark:|:white_check_mark:| 
+
+
+## Flow (spot-nfdump) 
+The table shows the list of attributes used in flow. The columns indicated with field (:white_check_mark:) are used by the pipeline.  
+
+|Spot Field Name  |Type    |Description                                  |Original NFDUMP Field Name           |Format                   |Spot-ingest       |Spot-ml           |Spot-oa           |Spot-ui           | 
+|---------------- |--------|---------------------------------------------|-------------------------------------|-------------------------|------------------|------------------|------------------|------------------|
+| treceived  	  | string | Time the flow was received by the collector | tr	                               |YYYY-mm-DD HH:MM:SS      |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| unix_tstamp	  | bigint | treceived epoch time	                     |it is calculated by ingest hql script|number (1471431305)      |:white_check_mark:| -                | -                | -                |                     
+| tryear     	  | int    | treceived year 	                         |it is calculated by spot-nfdump	   |numerical                |:white_check_mark:|:white_check_mark:| -                | -                |
+| trmonth    	  | int    | treceived month	                         |it is calculated by spot-nfdump	   |numerical                |:white_check_mark:|:white_check_mark:| -                | -                |
+| trday      	  | int    | treceived day	                             |it is calculated by spot-nfdump	   |numerical                |:white_check_mark:|:white_check_mark:| -                | -                |
+| trhour     	  | int    | treceived hour	                             |it is calculated by spot-nfdump	   |numerical                |:white_check_mark:|:white_check_mark:| -                | -                |
+| trminute   	  | int    | treceived minute	                         |it is calculated by spot-nfdump	   |numerical                |:white_check_mark:|:white_check_mark:| -                | -                |
+| trsec      	  | int    | treceived seconds	                         |it is calculated by spot-nfdump	   |numerical                |:white_check_mark:|:white_check_mark:| -                | -                |
+| tdur       	  | float  | Duration	                                 | td	                               |xx.xx (18.04400062561035)|:white_check_mark:|:white_check_mark:| -                | -                |
+| sip        	  | string | Source IP Address          	             | sa	                               |ip address dotted decimal|:white_check_mark:|:white_check_mark:| -                | -                |
+| dip        	  | string | Destination IP Address	                     | da	                               |ip address dotted decimal|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| sport      	  | int    | Source Port	                             | sap	                               |numerical                |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| dport      	  | int    | Destination Port	                         | dap	                               |numerical                |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| proto      	  | string | Protocol	                                 | pr	                               |text (UDP, TCP, etc)     |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| flag       	  | string | TCP Flags	                                 | flg	                               |dotted flag representation (.A....)|:white_check_mark:| -      |:white_check_mark:|:white_check_mark:|   
+| fwd        	  | int    | Forwarding Status	                         | fwd	                               |numerical                |:white_check_mark:| -                | -                | -                |
+| stos       	  | int    | Source Tos (DSCP)	                         | stos	                               |numerical                |:white_check_mark:| -                |:white_check_mark:|:white_check_mark:|
+| ipkt       	  | bigint | Input Packets	                             | ipkt	                               |numerical                |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| ibyt       	  | bigint | Input Bytes	                             | ibyt	                               |numerical                |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| opkt       	  | bigint | Output Packets	                             | opkt	                               |numerical                |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| obyt       	  | bigint | Output Bytes	                             | obyt	                               |numerical                |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|
+| input      	  | int    | Input interface SNMP number	             | in	                               |numerical                |:white_check_mark:| -                |:white_check_mark:|:white_check_mark:| 
+| output     	  | int    | Output interface SNMP number	             | out	                               |numerical                |:white_check_mark:| -                |:white_check_mark:|:white_check_mark:|
+| sas        	  | int    | Source AS number	                         | sas	                               |numerical                |:white_check_mark:| -                | -                | -                |
+| das        	  | int    | Destination AS number	                     | das	                               |numerical                |:white_check_mark:| -                | -                | -                |
+| dtos       	  | int    | Destination Tos (DSCP)	                     | dtos	                               |numerical                |:white_check_mark:| -                | -                | -                |  
+| dir        	  | int    | direction	                                 | dir	                               |numerical (0,1)          |:white_check_mark:| -                | -                | -                | 
+| rip        	  | string | Router IP	                                 | ra	                               |ip address dotted decimal|:white_check_mark:| -                |:white_check_mark:|:white_check_mark:|
+| ML_score	      |float   | Score assigned by ML - Produced by ML		 |                                     |numerical			     |                  |                  |:white_check_mark:|                  |	
+| rank	          | int    | Rank number based on the order of ML_score values - Produced by OA |		       |numerical				 |                  | -                | -                |:white_check_mark:|
+| srcip_internal  |int     | Boolean value to identify an internal source IP - Produced by OA |                |                         | -                | -                | -                |:white_check_mark:|
+| dstip_internal  |int     | Boolean value to identify an internal destination IP - Produced by OA |		   |                         | -                | -                | -			      |:white_check_mark:|
+| src_geoloc	  |string  | Lat & Long values of the source IP - Produced by OA |						       |                         | -                | -                | -                |:white_check_mark:|
+| dst_geoloc	  |string  |Lat & Long values of the destination IP - Produced by OA |						   |                         | -                | -                | -                |:white_check_mark:|
+| src_domain	  |string  |Domain assigned to the source IP - Produced by OA |                                |						 | -                | -                | -                |:white_check_mark:|
+| dst_domain	  |string  |Domain assigned to the destination IP - Produced by OA |						   |                         | -                | -                | -                |:white_check_mark:|
+| src_rep	      |string  |Collection of reputation values assigned to the source IP from different TI services - Produced by OA | |    | -                | -				   | -                |:white_check_mark:|
+| dst_rep	      |string  |Collection of reputation values  assigned to the destination IP from different TI services - Produced by OA||| -                | -                | -         		  |:white_check_mark:|
+
+
+## DNS 
+The table shows the list of attributes used in DNS. The columns indicated with field (:white_check_mark:) are used by the pipeline.  
+
+|Spot Field Name  |Type    |Description                                  |Original NFDUMP Field Name           |Format                   |Spot-ingest       |Spot-ml           |Spot-oa           |Spot-ui           | 
+|---------------- |--------|---------------------------------------------|-------------------------------------|-------------------------|------------------|------------------|------------------|------------------|
+| frame_time   	  |string  |Tshark Frame Time received	                 |frame.time	          |Ex. Jan  4 2017 04:41:06.337519000 UTC|:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |		
+| unix_tstamp  	  |bigint  |Tshark Frame Time received epoch format      |frame.time_epoch	                   |numerical (1483504866)	 |:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |	
+| frame_len    	  |int     |Tshark Frame Length	                         |frame.len	                           |numerical	             |:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |		
+| ip_dst       	  |string  |Tshark IP destination (Client IP)	         |ip.dst	                           |ip address dotted decimal|:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |		
+| ip_src       	  |string  |Tshark IP source (DNS Server IP)	         |ip.src	                           |ip address dotted decimal|-                 | -                |:white_check_mark:| -                |  				
+| dns_qry_name 	  |string  |Tshark DNS Query Name	                     |dns.qry.name	                       |text	                 |:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |		
+| dns_qry_class	  |string  |Tshark DNS Query Class	                     |dns.qry.class	                       |hexadecimal (0x00000001) |:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |		
+| dns_qry_type 	  |int     |Tshark DNS Query Type	                     |dns.qry.type	                       |numerical	             |:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |		
+| dns_qry_rcode	  |int     |Tshark DNS Query Response Code	             |dns.flags.rcode	                   |numerical	             |:white_check_mark:|:white_check_mark:|:white_check_mark:| -                |	
+| dns_a        	  |string  |Tshark DNS Query A Record	                 |dns.a	text				           |                         | -                | -                | -                |:white_check_mark:|
+|ML_score	      |float   |Produced by ML                               |                                     |                         | -                | -                |:white_check_mark:|:white_check_mark:| 
+|tld	          |string  |Top level domain obtained from query name column - Produced by OA |                |                         | -                | -                | -                |:white_check_mark:|
+|query_rep	      |string  |Collection of reputation values assigned to the destination IP from different TI services - Produced by OA|| | -                | -                | -                |:white_check_mark:|
+|hh	              |int     |Obtained from frame time column - Produced by OA |                                 |                         | -                | -                | -                |:white_check_mark:|
+|dns_qry_class_name|string |Translation for the query class code - Produced by OA |                            |                         | -                | -                | -                |:white_check_mark:|
+|dns_qry_type_name|string  |Translation for the query type code - Produced by OA |                             |                         | -                | -                | -                |:white_check_mark:|
+|dns_qry_rcode_name|string |Translation for the query response code - Produced by OA |                         |                         | -                | -                | -                |:white_check_mark:|
+|network_context  |string  |Value to identify the destination IP as internal to the network - Produced by OA | |                         | -                | -                | -                |:white_check_mark:|
+
+
+### Proxy Schema for spot-ingest
+The table shows proxy schema attributes and the rules used specifically for ingest.
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| p_date          | -                   | -                      |
+| p_time          | -                   | -                      |
+| clientip        | -                   | -                      |
+| host            | -                   | -                      |
+| reqmethod       | -                   | -                      |
+| useragent       | -                   | -                      |
+| resconttype     | -                   | -                      |
+| duration        | -                   | -                      |
+| username        | -                   | -                      |
+| authgroup       | -                   | -                      | 
+| exceptionid     | -                   | -                      |
+| filterresult    | -                   | -                      | 
+| webcat          | -                   | -                      |
+| referer         | -                   | -                      |
+| respcode        | -                   | -                      |
+| action          | -                   | -                      |
+| urischeme       | -                   | -                      |
+| uriport         | -                   | -                      |
+| uripath         | -                   | -                      |    
+| uriquery        | -                   | -                      |
+| uriextension    | -                   | -                      |
+| serverip        | -                   | -                      |
+| scbytes         | -                   | -                      |
+| csbytes         | -                   | -                      |
+| virusid         | -                   | -                      | 
+| bcappname       | -                   | -                      |
+| bcappoper       | -                   | -                      |
+| fulluri         | -                   | produced by ingest     |
+
+
+### Proxy Schema for spot-ml
+The table shows proxy schema attributes and the rules used specifically for machine learning (ml).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| p_date          | Can't be null       | -                      |
+| p_time          | Can't be null       | -                      |
+| clientip        | Can't be null       | -                      |
+| host            | Can't be null       | -                      |
+| reqmethod       | -                   | -                      |
+| useragent       | -                   | Null will be replaced with "-" |
+| resconttype     | -                   | Null will be replaced with "-" |
+| duration        | -                   | -                      |
+| username        | -                   | -                      |
+| webcat          | -                   | -                      |
+|referer          | -                   | -                      |
+|respcode         | -                   | -                      |
+|uriport          | -                   | -                      |
+|uripath          | -                   | -                      |    
+|uriquery         | -                   | -                      |
+|serverip         | -                   | -                      |
+|scbytes          | -                   | -                      |
+|csbytes          | -                   | -                      |
+|fulluri          | Can't be null       | -                      |
+
+
+### Proxy Schema for spot-oa
+The table shows proxy schema attributes and the rules used specifically for operation analytics (oa).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| p_date          | -                   | -                      |
+| p_time          | -                   | -                      |
+| clientip        | -                   | -                      |
+| host            | -                   | -                      |
+| reqmethod       | -                   | -                      |
+| useragent       | -                   | -                      |
+| resconttype     | -                   | -                      |
+| duration        | -                   | -                      |
+| username        | -                   | -                      |
+| webcat          | -                   | -                      |
+|referer          | -                   | -                      |
+|respcode         | -                   | -                      |
+|uriport          | -                   | -                      |
+|uripath          | -                   | -                      |    
+|uriquery         | -                   | -                      |
+|serverip         | -                   | -                      |
+|scbytes          | -                   | -                      |
+|csbytes          | -                   | -                      |
+|fulluri          | -                   | -                      |
+| word            | -                   | -                      |
+| ml_score        | -                   | -                      |
+| respcode_name   | -                   | Produced by OA         |
+| uri_rep         | -                   | Produced by OA         |
+| network_context | -                   | Produced by OA         |
+
+
+### Proxy Schema for spot-ui
+The table shows proxy schema attributes and the rules used specifically for user interface (ui).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| p_date          | -                   | -                      |
+| p_time          | -                   | -                      |
+| clientip        | -                   | -                      |
+| host            | -                   | -                      |
+| reqmethod       | -                   | -                      |
+| useragent       | -                   | -                      |
+| resconttype     | -                   | -                      |
+| duration        | -                   | -                      |
+| username        | -                   | -                      |
+| webcat          | -                   | -                      |
+|referer          | -                   | -                      |
+|respcode         | -                   | -                      |
+|uriport          | -                   | -                      |
+|uripath          | -                   | -                      |    
+|uriquery         | -                   | -                      |
+|serverip         | -                   | -                      |
+|scbytes          | -                   | -                      |
+|csbytes          | -                   | -                      |
+|fulluri          | -                   | -                      |
+| respcode_name   | -                   | Optional               |
+| uri_rep         | -                   | Optional               |
+| network_context | -                   | Optional               |
+
+
+### Flow Schema for spot-ingest
+The table shows flow schema attributes and the rules used specifically for ingest.
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| treceived  	  | -	                | -                      |
+| unix_tstamp	  | -	                | produced by ingest     |
+| tryear     	  | -	                | produced by spot-nfdump|
+| trmonth    	  | -	                | produced by spot-nfdump|
+| trday      	  | - 	                | produced by spot-nfdump|
+| trhour     	  | -	                | produced by spot-nfdump|
+| trminute   	  | -	                | produced by spot-nfdump|
+| trsec      	  | -	                | produced by spot-nfdump|
+| tdur       	  | -	                | -                      |
+| sip        	  | -	                | -                      |
+| dip        	  | -	                | -                      |
+| sport      	  | -	                | -                      |
+| dport      	  | -	                | -                      |
+| proto      	  | -	                | -                      |
+| flag       	  | -	                | -                      |
+| fwd        	  | -	                | -                      |
+| stos       	  | -	                | -                      |
+| ipkt       	  | -	                | -                      |
+| ibyt       	  | -	                | -                      |
+| opkt       	  | -	                | -                      |
+| obyt       	  | -	                | -                      |
+| input      	  | -	                | -                      |
+| output     	  | -	                | -                      |
+| sas        	  | -	                | -                      |
+| das        	  | -	                | -                      |
+| dtos       	  | -	                | -                      |  
+| dir        	  | -	                | -                      |
+| rip        	  | -	                | -                      |
+
+
+### Flow Schema for spot-ml
+The table shows flow schema attributes and the rules used specifically for machine learning (ml).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| treceived  	  | Can't be null	    | -                      |
+| tryear     	  |	-                   | -                      |
+| trmonth    	  | -                   | -	                     |
+| trday      	  |	-                   | -                      |
+| trhour     	  | Should be a number between 0 and 23| -	     |
+| trminute   	  | Should be a number between 0 and 59| -	     |
+| trsec      	  | Should be a number between 0 and 59| -	     |
+| tdur       	  | -                   | - 	                 |
+| sip        	  | Can't be null	    | -                      |
+| dip        	  | Can't be null	    | -                      |
+| sport      	  | shlould be grater or equal to 0| - 	         |
+| dport      	  | shlould be grater or equal to 0| -           |
+| proto      	  | -                   | -	                     |
+| ipkt       	  | shlould be grater or equal to 0| -	         |
+| ibyt       	  | shlould be grater or equal to 0| -           |
+| opkt       	  |	-                   | -                      |
+| obyt       	  |	-                   | -                      |
+
+
+### Flow Schema for spot-oa
+The table shows flow schema attributes and the rules used specifically for operation analytics (oa).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| treceived    	  | -	                | -                      |
+| sip             | -	                | -                      |
+| dip             | -	                | -                      |
+| sport           | -	                | -                      |
+| dport           | -	                | -                      |
+| proto           | -	                | -                      |
+| flag            | -	                | -                      |
+| stos            | -	                | -                      |
+| ipkt            | -	                | -                      |
+| ibyt            | -	                | -                      |
+| opkt            | -	                | -                      |
+| obyt            | -	                | -                      |
+| input           | -	                | -                      |
+| output          | -	                | -                      |
+| rip             | -	                | -                      |
+| ML_score        | -	                | -                      |
+
+
+
+### Flow Schema for spot-ui
+The table shows flow schema attributes and the rules used specifically for user interface (ui).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| treceived       | -	                | -                      |
+| sip             | -	                | -                      |
+| dip             | -	                | -                      | 
+| sport           | -	                | -                      |
+| dport           | -	                | -                      |
+| proto           | -	                | -                      |
+| flag            | -	                | -                      |
+| stos            | -	                | -                      |
+| ipkt            | -	                | -                      |
+| ibyt            | -	                | -                      |
+| opkt            | -	                | -                      |
+| obyt            | -	                | -                      |
+| input           | -	                | -                      |
+| output          | -	                | -                      |
+| rip             | -	                | -                      |
+| rank            | -	                | -                      |
+| srcip_internal  | -	                | -                      |
+| dstip_internal  | -	                | -                      |
+| src_geoloc      | -	                | -                      |
+| dst_geoloc      | -	                | -                      |
+| src_domain      | -	                | -                      |
+| dst_domain      | -	                | -                      |
+| src_rep         | -	                | -                      |
+| dst_rep         | -	                | -                      |
+
+
+### DNS Schema for spot-ingest
+The table shows DNS schema attributes and the rules used specifically for ingest.
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| frame_time   	  | -	                | -                      |
+| unix_tstamp  	  | -	                | -                      |
+| frame_len    	  | -	                | -                      | 
+| ip_dst       	  | -	                | -                      |
+| ip_src       	  | -	                | -                      |
+| dns_qry_name 	  | -	                | -                      |
+| dns_qry_class	  | -	                | -                      |
+| dns_qry_type 	  | -	                | -                      |
+| dns_qry_rcode	  | -	                | -                      |
+| dns_a        	  | -	                | -                      |
+
+
+### DNS Schema for spot-ml
+The table shows DNS schema attributes and the rules used specifically for machine learning (ml).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| frame_time   	  | Can't be null, empty string or "-"	                               |-|
+| unix_tstamp  	  | Should be a number equal or greater than 0	                       |-|
+| frame_len    	  | Should be a number equal or greater than 0	                       |-|
+| ip_dst       	  | Can't be null, neither empty string or "-"	                       |-|
+| dns_qry_name 	  | Can't be null, neither empty string or "-"	                       |-|
+| dns_qry_class	  | If dns_qry_type and dns_qry_rcode are null, this one can't be null |-|
+| dns_qry_type 	  | If dns_qry_class and dns_qry_rcode are null, this can't be null	   |-|
+| dns_qry_rcode	  | If dns_qry_class and dns_qry_type are null, this can't be null	   |-|
+
+
+### DNS Schema for spot-oa
+The table shows DNS schema attributes and the rules used specifically for operation analytics (oa).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| frame_time      | -	                | -                      |
+| unix_tstamp     | -	                | -                      |
+| frame_len       | -	                | -                      |
+| ip_dst          | -	                | -                      |
+| ip_src          | -	                | -                      |
+| dns_qry_name    | -	                | -                      |
+| dns_qry_class   | -	                | -                      |
+| dns_qry_type    | -	                | -                      |
+| dns_qry_rcode   | -	                | -                      |
+| ML_score        | -	                | -                      |
+
+
+### DNS Schema for spot-ui
+The table shows DNS schema attributes and the rules used specifically for user interface (ui).
+
+| Spot field name | Rules               | Comments               |
+|-----------------|---------------------|------------------------| 
+| dns_a           | -	                | -                      |
+| ML_score        | -	                | -                      |
+| tld             | -	                | -                      |
+| query_rep       | -	                | -                      |
+| hh              | -	                | -                      |
+| dns_qry_class_name | -                | -                      |
+| dns_qry_type_name| -	                | -                      |
+| dns_qry_rcode_name | -                | -                      |
+| network_context | -	                | -                      |
\ No newline at end of file
diff --git a/spot-setup/hdfs_setup.sh b/spot-setup/hdfs_setup.sh
index 6e73a20..13e8fcb 100755
--- a/spot-setup/hdfs_setup.sh
+++ b/spot-setup/hdfs_setup.sh
@@ -19,8 +19,21 @@
 
 set -e
 
+help() {
+    echo -n "
+   Initialize folders and databases for Spot in Hadoop.
+
+ Options:
+  --no-sudo         Do not use sudo with hdfs commands.
+  -c                Specify config file (default = /etc/spot.conf)
+  -d                Override databases
+  -h, --help        Display this help and exit
+" 
+exit 0
+}
+
 function log() {
-printf "hdfs_setup.sh:\n $1\n"
+printf "hdfs_setup.sh:\\n %s\\n\\n" "$1"
 }
 
 function safe_mkdir() {
@@ -29,11 +42,11 @@
         # keeps the script from existing on existing folders
         local hdfs_cmd=$1
         local dir=$2
-        if $(hdfs dfs -test -d ${dir}); then
+        if hdfs dfs -test -d "${dir}"; then
             log "${dir} already exists"
         else
             log "running mkdir on ${dir}"
-            ${hdfs_cmd} dfs -mkdir ${dir}
+            ${hdfs_cmd} dfs -mkdir "${dir}"
         fi
 }
 
@@ -74,12 +87,15 @@
             db_override=$1
             shift
             ;;
+        "-h"|"--help")
+            help
+            ;;
     esac
 done
 
 # Sourcing spot configuration variables
-log "Sourcing ${SPOTCONF}\n"
-source $SPOTCONF
+log "Sourcing ${SPOTCONF}"
+source "$SPOTCONF"
 
 if [[ ${no_sudo} == "true" ]]; then
     hdfs_cmd="hdfs"
@@ -95,10 +111,10 @@
 fi
 
 if [[ -z "${db_override}" ]]; then
-        DBENGINE=$(echo ${DBENGINE} | tr '[:upper:]' '[:lower:]')
+        DBENGINE=$(echo "${DBENGINE}" | tr '[:upper:]' '[:lower:]')
         log "setting database engine to ${DBENGINE}"
 else
-        DBENGINE=$(echo ${db_override} | tr '[:upper:]' '[:lower:]')
+        DBENGINE=$(echo "${db_override}" | tr '[:upper:]' '[:lower:]')
         log "setting database engine to $db_override"
 fi
 
@@ -112,7 +128,11 @@
         db_script="${db_shell} --var=huser=${HUSER} --var=dbname=${DBNAME} -c -f"
         ;;
     hive)
-        db_shell="hive"
+        if [[ ${no_sudo} == "true" ]]; then
+            db_shell="hive"
+        else
+            db_shell="sudo -u hive hive"
+        fi
         db_query="${db_shell} -e"
         db_script="${db_shell} -hiveconf huser=${HUSER} -hiveconf dbname=${DBNAME} -f"
         ;;
@@ -128,33 +148,35 @@
 esac
 
 # Creating HDFS user's folder
-safe_mkdir ${hdfs_cmd} ${HUSER}
-${hdfs_cmd} dfs -chown ${USER}:supergroup ${HUSER}
-${hdfs_cmd} dfs -chmod 775 ${HUSER}
+safe_mkdir "${hdfs_cmd}" "${HUSER}"
+${hdfs_cmd} dfs -chown "${USER}":supergroup "${HUSER}"
+${hdfs_cmd} dfs -chmod 775 "${HUSER}"
 
 # Creating HDFS paths for each use case
 for d in "${DSOURCES[@]}" 
 do
 	echo "creating /$d"
-	safe_mkdir hdfs ${HUSER}/$d
+	safe_mkdir "${hdfs_cmd}" "${HUSER}/$d"
 	for f in "${DFOLDERS[@]}" 
 	do 
 		echo "creating $d/$f"
-		safe_mkdir ${hdfs_cmd} ${HUSER}/$d/$f
+		safe_mkdir "${hdfs_cmd}" "${HUSER}/$d/$f"
 	done
 
 	# Modifying permission on HDFS folders to allow Impala to read/write
-	hdfs dfs -chmod -R 775 ${HUSER}/$d
-	${hdfs_cmd} dfs -setfacl -R -m user:${db_override}:rwx ${HUSER}/$d
-	${hdfs_cmd} dfs -setfacl -R -m user:${USER}:rwx ${HUSER}/$d
+	${hdfs_cmd} dfs -chmod -R 775 "${HUSER}"/"$d"
+	${hdfs_cmd} dfs -setfacl -R -m user:"${db_override}":rwx "${HUSER}"/"$d"
+	${hdfs_cmd} dfs -setfacl -R -m user:"${USER}":rwx "${HUSER}"/"$d"
 done
 
 
 # Creating Spot Database
- ${db_query} "CREATE DATABASE IF NOT EXISTS ${DBNAME}";
+log "Creating Spot Database"
+${db_query} "CREATE DATABASE IF NOT EXISTS ${DBNAME}";
 
 
 # Creating tables
+log "Creating Database tables"
 for d in "${DSOURCES[@]}" 
 do
 	${db_script} "./${DBENGINE}/create_${d}_parquet.hql"