With Griffin shell, user can run dq jobs in command line. This is helpful for user to debug and run user dq jobs.
measure-x.x.x-package.tar.gz
in the target directory of measure module.measure-x.x.x/bin/griffin-tool.sh ENV_FILE DQ_FILE
{ "spark": { "log.level": "WARN", "config": { "spark.master": "local[*]" } }, "sinks": [ { "name": "MyConsoleSink", "type": "CONSOLE", "config": { "max.log.lines": 10 } }, { "name": "MyHDFSSink", "type": "HDFS", "config": { "path": "hdfs://localhost/griffin/batch/persist", "max.persist.lines": 10000, "max.lines.per.file": 10000 } }, { "name": "MyElasticSearchSink", "type": "ELASTICSEARCH", "config": { "method": "post", "api": "http://localhost:9200/griffin/accuracy", "connection.timeout": "1m", "retry": 10 } } ], "griffin.checkpoint": [] }
{ "name": "accu_batch", "process.type": "batch", "data.sources": [ { "name": "source", "baseline": true, "connector": { "type": "jdbc", "config": { "user": "xxx", "password": "xxx", "tablename": "stu", "where": "id < 3", "url":"jdbc:mysql://localhost:3306/test", "database": "test", "driver": "com.mysql.jdbc.Driver" } } }, { "name": "target", "connector": { "type": "jdbc", "config": { "user": "xxx", "password": "xxx", "tablename": "stu2", "where": "id < 3", "url":"jdbc:mysql://localhost:3306/test", "database": "test", "driver": "com.mysql.jdbc.Driver" } } } ], "evaluate.rule": { "rules": [ { "dsl.type": "griffin-dsl", "dq.type": "accuracy", "out.dataframe.name": "accu", "rule": "source.id = target.id AND upper(source.name) = upper(target.name) ", "details": { "source": "source", "target": "target", "miss": "miss_count", "total": "total_count", "matched": "matched_count" }, "out": [ { "type": "record", "name": "missRecords" } ] } ] }, "sinks": [ "consoleSink" ] }