| { |
| "type": "index_hadoop", |
| "spec": { |
| "ioConfig": { |
| "type": "hadoop", |
| "inputSpec": { |
| "type": "static", |
| "inputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat", |
| "paths": "wikipedia.gz.orc" |
| }, |
| "metadataUpdateSpec": { |
| "type": "postgresql", |
| "connectURI": "jdbc:postgresql://localhost/druid", |
| "user" : "druid", |
| "password" : "asdf", |
| "segmentTable": "druid_segments" |
| }, |
| "segmentOutputPath": "/tmp/segments" |
| }, |
| "dataSchema": { |
| "dataSource": "wikipedia", |
| "parser": { |
| "type": "orc", |
| "parseSpec": { |
| "format": "timeAndDims", |
| "timestampSpec": { |
| "column": "timestamp", |
| "format": "auto" |
| }, |
| "dimensionsSpec": { |
| "dimensions": [ |
| "col1", |
| "col2" |
| ], |
| "dimensionExclusions": [], |
| "spatialDimensions": [] |
| } |
| }, |
| "typeString": "struct<timestamp:string,col1:string,col2:array<string>,val1:float>" |
| }, |
| "metricsSpec": [], |
| "granularitySpec": { |
| "type": "uniform", |
| "segmentGranularity": "DAY", |
| "queryGranularity": "NONE", |
| "intervals": ["2015-01-01/2017-01-01"] |
| } |
| }, |
| "tuningConfig": { |
| "type": "hadoop", |
| "workingPath": "tmp/working_path", |
| "partitionsSpec": { |
| "targetPartitionSize": 5000000 |
| }, |
| "jobProperties" : { |
| "mapreduce.map.java.opts": "-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps", |
| "mapreduce.reduce.java.opts": "-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps", |
| "mapred.child.java.opts": "-server -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" |
| }, |
| "leaveIntermediate": true |
| } |
| } |
| } |