| { |
| "name": "Batch-Preprocessing-Example", |
| "process.type": "batch", |
| "data.sources": [ |
| { |
| "name": "crime_report_source", |
| "baseline": true, |
| "connector": { |
| "type": "file", |
| "config": { |
| "format": "text", |
| "paths": [ |
| "measure/src/main/resources/crime_report.csv" |
| ] |
| }, |
| "pre.proc": [ |
| "select split(value, ',') as part from this", |
| "select part[0] as date_time, part[1] as incident, part[2] as address, part[3] as city, part[4] as zipcode from this", |
| "select cast(date_time as timestamp) as date_time, incident, address, city, cast(zipcode as int) as zipcode from this" |
| ] |
| } |
| } |
| ], |
| "measures": [ |
| { |
| "name": "completeness_measure", |
| "type": "completeness", |
| "data.source": "crime_report_source", |
| "config": { |
| "expr": "zipcode is null OR city is null" |
| }, |
| "out": [ |
| { |
| "type": "metric", |
| "name": "comp_metric", |
| "flatten": "map" |
| }, |
| { |
| "type": "record", |
| "name": "comp_records" |
| } |
| ] |
| } |
| ], |
| "sinks": [ |
| "consoleSink" |
| ] |
| } |