blob: 5eea0e092b6553e5b1a9d46a8bbf7a28f85f2403 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spot
import org.apache.spot.utilities.{FloatPointPrecisionUtility, FloatPointPrecisionUtility32, FloatPointPrecisionUtility64}
/**
* Parses arguments for the suspicious connections analysis.
*/
object SuspiciousConnectsArgumentParser {
val parser: scopt.OptionParser[SuspiciousConnectsConfig] = new scopt.OptionParser[SuspiciousConnectsConfig]("LDA") {
head("LDA Process", "1.1")
opt[String]("analysis").required().valueName("< flow | proxy | dns >").
action((x, c) => c.copy(analysis = x)).
text("choice of suspicious connections analysis to perform")
opt[String]("input").required().valueName("<hdfs path>").
action((x, c) => c.copy(inputPath = x)).
text("HDFS path to input")
opt[String]("feedback").valueName("<local file>").
action((x, c) => c.copy(feedbackFile = x)).
text("the local path of the file that contains the feedback scores")
opt[Int]("dupfactor").valueName("<non-negative integer>").
action((x, c) => c.copy(duplicationFactor = x)).
text("duplication factor controlling how to downgrade non-threatening connects from the feedback file")
opt[String]("ldatopiccount").required().valueName("number of topics in topic model").
action((x, c) => c.copy(topicCount = x.toInt)).
text("topic count")
opt[String]("userdomain").valueName("<user domain>").
action((x, c) => c.copy(userDomain = x)).
text("Domain of spot user (example: intel)")
opt[String]("scored").required().valueName("<hdfs path>").
action((x, c) => c.copy(hdfsScoredConnect = x)).
text("HDFS path for results")
opt[Double]("threshold").required().valueName("float64").
action((x, c) => c.copy(threshold = x)).
text("probability threshold for declaring anomalies")
opt[Int]("maxresults").required().valueName("integer").
action((x, c) => c.copy(maxResults = x)).
text("number of most suspicious connections to return")
opt[String]("delimiter").optional().valueName("character").
action((x, c) => c.copy(outputDelimiter = x)).
text("number of most suspicious connections to return")
opt[String]("prgseed").optional().valueName("long").
action((x, c) => c.copy(ldaPRGSeed = Some(x.toLong))).
text("seed for the pseudorandom generator")
opt[Int]("ldamaxiterations").optional().valueName("int").
action((x, c) => c.copy(ldaMaxiterations = x)).
text("maximum number of iterations used in LDA")
opt[Double]("ldaalpha").optional().valueName("float64").
action((x, c) => c.copy(ldaAlpha = x)).
text("document concentration for lda, default 1.02")
opt[Double]("ldabeta").optional().valueName("float64").
action((x, c) => c.copy(ldaBeta = x)).
text("topic concentration for lda, default 1.001")
opt[Int]("precision").optional().valueName("int").
action((x, c) => c.copy(precisionUtility = x match {
case 32 => FloatPointPrecisionUtility32
case 64 => FloatPointPrecisionUtility64
case _ => FloatPointPrecisionUtility64
}))
opt[String]("ldaoptimizer").optional().valueName("lda optimizer").
action((x, c) => c.copy(ldaOptimizer = x)).
text("LDA Optimizer: em for EM Optimizer or online Online Optimizer")
}
case class SuspiciousConnectsConfig(analysis: String = "",
inputPath: String = "",
feedbackFile: String = "",
duplicationFactor: Int = 1,
topicCount: Int = 20,
userDomain: String = "",
hdfsScoredConnect: String = "",
threshold: Double = 1.0d,
maxResults: Int = -1,
outputDelimiter: String = "\t",
ldaPRGSeed: Option[Long] = None,
ldaMaxiterations: Int = 20,
ldaAlpha: Double = 1.02,
ldaBeta: Double = 1.001,
ldaOptimizer: String = "em",
precisionUtility: FloatPointPrecisionUtility = FloatPointPrecisionUtility64)
}