blob: 45bbe660c8ee23f3b6460effdb26ff9fec3760d5 [file] [log] [blame]
package org.apache.s2graph.s2jobs.wal.transformer
import org.apache.s2graph.s2jobs.task.TaskConf
import org.apache.s2graph.s2jobs.wal.utils.UrlUtils
import org.apache.s2graph.s2jobs.wal.{DimVal, WalLog}
import play.api.libs.json.Json
case class ExtractDomain(taskConf: TaskConf) extends Transformer(taskConf) {
val urlDimensions = Json.parse(taskConf.options.getOrElse("urlDimensions", "[]")).as[Set[String]]
val hostDimName = taskConf.options.getOrElse("hostDimName", "host")
val domainDimName= taskConf.options.getOrElse("domainDimName", "domain")
val keywordDimName = taskConf.options.getOrElse("keywordDimName", "uri_keywords")
override def toDimValLs(walLog: WalLog, propertyKey: String, propertyValue: String): Seq[DimVal] = {
if (!urlDimensions(propertyKey)) Nil
else {
val (_, domains, kwdOpt) = UrlUtils.extract(propertyValue)
domains.headOption.toSeq.map(DimVal(hostDimName, _)) ++
domains.map(DimVal(domainDimName, _)) ++
kwdOpt.toSeq.map(DimVal(keywordDimName, _))
}
}
}