blob: e3629d16585bf89617b471583446163239818bbd [file] [log] [blame]
{
"name": "dist_streaming",
"process.type": "streaming",
"data.sources": [
{
"name": "new",
"checkpoint": {
"file.path": "hdfs://localhost/griffin/streaming/dump/old",
"info.path": "new",
"ready.time.interval": "10s",
"ready.time.delay": "0",
"time.range": ["0", "0"],
"read.only": true
}
},
{
"name": "old",
"connectors": [
{
"type": "kafka",
"version": "0.8",
"dataframe.name": "this",
"config": {
"kafka.config": {
"bootstrap.servers": "10.149.247.156:9092",
"group.id": "old",
"auto.offset.reset": "smallest",
"auto.commit.enable": "false"
},
"topics": "ttt",
"key.type": "java.lang.String",
"value.type": "java.lang.String"
},
"pre.proc": [
{
"dsl.type": "df-ops",
"in.dataframe.name": "this",
"out.dataframe.name": "s1",
"rule": "from_json"
},
{
"dsl.type": "spark-sql",
"out.dataframe.name": "this",
"rule": "select name, age from s1"
}
]
}
],
"checkpoint": {
"file.path": "hdfs://localhost/griffin/streaming/dump/old",
"info.path": "old",
"ready.time.interval": "10s",
"ready.time.delay": "0",
"time.range": ["-24h", "0"]
}
}
],
"evaluate.rule": {
"rules": [
{
"dsl.type": "griffin-dsl",
"dq.type": "distinct",
"out.dataframe.name": "dist",
"rule": "name, age",
"details": {
"source": "new",
"target": "old",
"total": "total",
"distinct": "distinct",
"dup": "dup",
"accu_dup": "accu_dup",
"num": "num",
"duplication.array": "dup"
},
"out":[
{
"type": "metric",
"name": "distinct"
}
]
}
]
},
"sinks": ["CONSOLE","ELASTICSEARCH"]
}