| { |
| "name": "accu_batch", |
| "process.type": "batch", |
| "data.sources": [ |
| { |
| "name": "source", |
| "baseline": true, |
| "connector": { |
| "type": "file", |
| "config": { |
| "format": "text", |
| "paths": [ |
| "measure/src/test/resources/users_info_src.csv" |
| ] |
| }, |
| "pre.proc": [ |
| "select split(value, ',') as part from this", |
| "select cast(part[0] as long) as user_id, part[1] as first_name, part[2] as last_name, part[3] as address, part[4] as email, part[5] as phone, part[6] as post_code from this" |
| ] |
| } |
| } |
| ], |
| "measures": [ |
| { |
| "name": "completeness_measure", |
| "type": "completeness", |
| "data.source": "source", |
| "config": { |
| "expr": "post_code is null OR address RLIKE '\\\\d+$'" |
| }, |
| "out": [ |
| { |
| "type": "metric", |
| "name": "comp_metric", |
| "flatten": "map" |
| } |
| ] |
| }, |
| { |
| "name": "profiling_measure", |
| "type": "profiling", |
| "data.source": "source", |
| "config": { |
| "expr": [ |
| "user_id", |
| "post_code" |
| ], |
| "approx.distinct.count": true, |
| "round.scale": 2 |
| }, |
| "out": [ |
| { |
| "type": "metric", |
| "name": "prof_metric", |
| "flatten": "default" |
| } |
| ] |
| } |
| ], |
| "sinks": [ |
| "consoleSink" |
| ] |
| } |