| /* |
| * Druid - a distributed column store. |
| * Copyright (C) 2012 Metamarkets Group Inc. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version 2 |
| * of the License, or (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| */ |
| |
| package com.metamx; |
| |
| import com.google.common.base.Charsets; |
| import com.google.common.collect.Maps; |
| import com.metamx.common.IAE; |
| import com.metamx.common.ISE; |
| import org.codehaus.jackson.map.ObjectMapper; |
| import org.joda.time.DateTime; |
| import org.joda.time.DateTimeZone; |
| |
| import java.io.BufferedReader; |
| import java.io.BufferedWriter; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.io.OutputStreamWriter; |
| import java.util.Map; |
| |
| /** |
| */ |
| public class TsvToJson |
| { |
| public static void main(String[] args) throws IOException |
| { |
| ObjectMapper mapper = new ObjectMapper(); |
| |
| String[] fields = args[0].split(","); |
| File inFile = new File(args[1]); |
| File outFile = new File(args[2]); |
| |
| FieldHandler[] handlers = new FieldHandler[fields.length]; |
| for (int i = 0; i < fields.length; i++) { |
| String field = fields[i]; |
| String[] fieldParts = field.split(":"); |
| String fieldName = fieldParts[0]; |
| if (fieldParts.length < 2 || "string".equalsIgnoreCase(fieldParts[1])) { |
| handlers[i] = new StringField(fieldName); |
| } |
| else if ("number".equalsIgnoreCase(fieldParts[1])) { |
| handlers[i] = new NumberField(fieldName); |
| } |
| else if ("ISO8601".equals(fieldParts[1])) { |
| handlers[i] = new IsoToNumberField(fieldName); |
| } |
| else { |
| throw new IAE("Unknown type[%s]", fieldParts[1]); |
| } |
| } |
| |
| BufferedReader in = null; |
| BufferedWriter out = null; |
| try { |
| in = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), Charsets.UTF_8)); |
| out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFile), Charsets.UTF_8)); |
| String line = null; |
| int count = 0; |
| long currTime = System.currentTimeMillis(); |
| long startTime = currTime; |
| while ((line = in.readLine()) != null) { |
| if (count % 1000000 == 0) { |
| long nowTime = System.currentTimeMillis(); |
| System.out.printf("Processed [%,d] lines in %,d millis. Incremental time %,d millis.%n", count, nowTime - startTime, nowTime - currTime); |
| currTime = nowTime; |
| } |
| ++count; |
| String[] splits = line.split("\t"); |
| |
| if (splits.length == 30) { |
| continue; |
| } |
| |
| if (splits.length != handlers.length) { |
| throw new IAE("splits.length[%d] != handlers.length[%d]; line[%s]", splits.length, handlers.length, line); |
| } |
| |
| Map<String, Object> jsonMap = Maps.newLinkedHashMap(); |
| for (int i = 0; i < handlers.length; ++i) { |
| jsonMap.put(handlers[i].getFieldName(), handlers[i].process(splits[i])); |
| } |
| |
| final String str = mapper.writeValueAsString(jsonMap); |
| out.write(str); |
| out.write("\n"); |
| } |
| System.out.printf("Completed %,d lines in %,d millis.%n", count, System.currentTimeMillis() - startTime); |
| out.flush(); |
| } finally { |
| if (out != null) { |
| out.close(); |
| } |
| if (in != null) { |
| in.close(); |
| } |
| } |
| } |
| |
| public static interface FieldHandler |
| { |
| public String getFieldName(); |
| public Object process(String value); |
| } |
| |
| public static class StringField implements FieldHandler |
| { |
| private final String fieldName; |
| |
| public StringField( |
| String fieldName |
| ) |
| { |
| this.fieldName = fieldName; |
| } |
| |
| |
| @Override |
| public String getFieldName() |
| { |
| return fieldName; |
| } |
| |
| @Override |
| public Object process(String value) |
| { |
| return value; |
| } |
| } |
| |
| public static class NumberField implements FieldHandler |
| { |
| private final String fieldName; |
| |
| public NumberField( |
| String fieldName |
| ) |
| { |
| this.fieldName = fieldName; |
| } |
| |
| |
| @Override |
| public String getFieldName() |
| { |
| return fieldName; |
| } |
| |
| @Override |
| public Object process(String value) |
| { |
| try { |
| return Long.parseLong(value); |
| } catch (NumberFormatException e) { |
| return Double.parseDouble(value); |
| } |
| } |
| } |
| |
| public static class IsoToNumberField implements FieldHandler |
| { |
| |
| private final String fieldName; |
| |
| public IsoToNumberField( |
| String fieldName |
| ) |
| { |
| this.fieldName = fieldName; |
| } |
| |
| @Override |
| public String getFieldName() |
| { |
| return fieldName; |
| } |
| |
| @Override |
| public Object process(String value) |
| { |
| return new DateTime(value).getMillis(); |
| } |
| } |
| } |