blob: 9d4417fe597121da8144a4c6a667fc085e87b13b [file] [log] [blame]
/*
* Druid - a distributed column store.
* Copyright (C) 2012 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package com.metamx;
import com.google.common.base.Charsets;
import com.google.common.collect.Maps;
import com.metamx.common.IAE;
import com.metamx.common.ISE;
import org.codehaus.jackson.map.ObjectMapper;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.Map;
/**
*/
public class TsvToJson
{
public static void main(String[] args) throws IOException
{
ObjectMapper mapper = new ObjectMapper();
String[] fields = args[0].split(",");
File inFile = new File(args[1]);
File outFile = new File(args[2]);
FieldHandler[] handlers = new FieldHandler[fields.length];
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
String[] fieldParts = field.split(":");
String fieldName = fieldParts[0];
if (fieldParts.length < 2 || "string".equalsIgnoreCase(fieldParts[1])) {
handlers[i] = new StringField(fieldName);
}
else if ("number".equalsIgnoreCase(fieldParts[1])) {
handlers[i] = new NumberField(fieldName);
}
else if ("ISO8601".equals(fieldParts[1])) {
handlers[i] = new IsoToNumberField(fieldName);
}
else {
throw new IAE("Unknown type[%s]", fieldParts[1]);
}
}
BufferedReader in = null;
BufferedWriter out = null;
try {
in = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), Charsets.UTF_8));
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFile), Charsets.UTF_8));
String line = null;
int count = 0;
long currTime = System.currentTimeMillis();
long startTime = currTime;
while ((line = in.readLine()) != null) {
if (count % 1000000 == 0) {
long nowTime = System.currentTimeMillis();
System.out.printf("Processed [%,d] lines in %,d millis. Incremental time %,d millis.%n", count, nowTime - startTime, nowTime - currTime);
currTime = nowTime;
}
++count;
String[] splits = line.split("\t");
if (splits.length == 30) {
continue;
}
if (splits.length != handlers.length) {
throw new IAE("splits.length[%d] != handlers.length[%d]; line[%s]", splits.length, handlers.length, line);
}
Map<String, Object> jsonMap = Maps.newLinkedHashMap();
for (int i = 0; i < handlers.length; ++i) {
jsonMap.put(handlers[i].getFieldName(), handlers[i].process(splits[i]));
}
final String str = mapper.writeValueAsString(jsonMap);
out.write(str);
out.write("\n");
}
System.out.printf("Completed %,d lines in %,d millis.%n", count, System.currentTimeMillis() - startTime);
out.flush();
} finally {
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
}
}
public static interface FieldHandler
{
public String getFieldName();
public Object process(String value);
}
public static class StringField implements FieldHandler
{
private final String fieldName;
public StringField(
String fieldName
)
{
this.fieldName = fieldName;
}
@Override
public String getFieldName()
{
return fieldName;
}
@Override
public Object process(String value)
{
return value;
}
}
public static class NumberField implements FieldHandler
{
private final String fieldName;
public NumberField(
String fieldName
)
{
this.fieldName = fieldName;
}
@Override
public String getFieldName()
{
return fieldName;
}
@Override
public Object process(String value)
{
try {
return Long.parseLong(value);
} catch (NumberFormatException e) {
return Double.parseDouble(value);
}
}
}
public static class IsoToNumberField implements FieldHandler
{
private final String fieldName;
public IsoToNumberField(
String fieldName
)
{
this.fieldName = fieldName;
}
@Override
public String getFieldName()
{
return fieldName;
}
@Override
public Object process(String value)
{
return new DateTime(value).getMillis();
}
}
}