blob: ab68e9758773dc8a4483958e92ba85f653b68b4e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.benchmark;
import com.google.common.collect.Maps;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.BuiltinStorages;
import org.apache.tajo.catalog.CatalogUtil;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.partition.PartitionMethodDesc;
import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.common.TajoDataTypes.Type;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.exception.TajoException;
import java.io.IOException;
import java.util.Map;
public class TPCH extends BenchmarkSet {
private final Log LOG = LogFactory.getLog(TPCH.class);
private final String BENCHMARK_DIR = "benchmark/tpch";
public static final String LINEITEM = "lineitem";
public static final String CUSTOMER = "customer";
public static final String CUSTOMER_PARTS = "customer_parts";
public static final String NATION = "nation";
public static final String PART = "part";
public static final String REGION = "region";
public static final String ORDERS = "orders";
public static final String PARTSUPP = "partsupp";
public static final String SUPPLIER = "supplier";
public static final String SUPPLIER_COPY = "small_supplier";
public static final String EMPTY_ORDERS = "empty_orders";
public static final Map<String, Long> tableVolumes = Maps.newHashMap();
static {
tableVolumes.put(LINEITEM, 759863287L);
tableVolumes.put(CUSTOMER, 24346144L);
tableVolumes.put(CUSTOMER_PARTS, 707L);
tableVolumes.put(NATION, 2224L);
tableVolumes.put(PART, 24135125L);
tableVolumes.put(REGION, 389L);
tableVolumes.put(ORDERS, 171952161L);
tableVolumes.put(PARTSUPP, 118984616L);
tableVolumes.put(SUPPLIER, 1409184L);
tableVolumes.put(SUPPLIER_COPY, 5120L);
tableVolumes.put(EMPTY_ORDERS, 0L);
}
@Override
public void loadSchemas() {
Schema lineitem = new Schema()
.addColumn("l_orderkey", Type.INT4) // 0
.addColumn("l_partkey", Type.INT4) // 1
.addColumn("l_suppkey", Type.INT4) // 2
.addColumn("l_linenumber", Type.INT4) // 3
.addColumn("l_quantity", Type.FLOAT8) // 4
.addColumn("l_extendedprice", Type.FLOAT8) // 5
.addColumn("l_discount", Type.FLOAT8) // 6
.addColumn("l_tax", Type.FLOAT8) // 7
// TODO - This is temporal solution. 8 and 9 are actually Char type.
.addColumn("l_returnflag", Type.TEXT) // 8
.addColumn("l_linestatus", Type.TEXT) // 9
// TODO - This is temporal solution. 10,11, and 12 are actually Date type.
.addColumn("l_shipdate", Type.TEXT) // 10
.addColumn("l_commitdate", Type.TEXT) // 11
.addColumn("l_receiptdate", Type.TEXT) // 12
.addColumn("l_shipinstruct", Type.TEXT) // 13
.addColumn("l_shipmode", Type.TEXT) // 14
.addColumn("l_comment", Type.TEXT); // 15
schemas.put(LINEITEM, lineitem);
Schema customer = new Schema()
.addColumn("c_custkey", Type.INT4) // 0
.addColumn("c_name", Type.TEXT) // 1
.addColumn("c_address", Type.TEXT) // 2
.addColumn("c_nationkey", Type.INT4) // 3
.addColumn("c_phone", Type.TEXT) // 4
.addColumn("c_acctbal", Type.FLOAT8) // 5
.addColumn("c_mktsegment", Type.TEXT) // 6
.addColumn("c_comment", Type.TEXT); // 7
schemas.put(CUSTOMER, customer);
Schema customerParts = new Schema()
.addColumn("c_custkey", Type.INT4) // 0
.addColumn("c_name", Type.TEXT) // 1
.addColumn("c_address", Type.TEXT) // 2
.addColumn("c_phone", Type.TEXT) // 3
.addColumn("c_acctbal", Type.FLOAT8) // 4
.addColumn("c_mktsegment", Type.TEXT) // 5
.addColumn("c_comment", Type.TEXT); // 6
schemas.put(CUSTOMER_PARTS, customerParts);
Schema nation = new Schema()
.addColumn("n_nationkey", Type.INT4) // 0
.addColumn("n_name", Type.TEXT) // 1
.addColumn("n_regionkey", Type.INT4) // 2
.addColumn("n_comment", Type.TEXT); // 3
schemas.put(NATION, nation);
Schema part = new Schema()
.addColumn("p_partkey", Type.INT4) // 0
.addColumn("p_name", Type.TEXT) // 1
.addColumn("p_mfgr", Type.TEXT) // 2
.addColumn("p_brand", Type.TEXT) // 3
.addColumn("p_type", Type.TEXT) // 4
.addColumn("p_size", Type.INT4) // 5
.addColumn("p_container", Type.TEXT) // 6
.addColumn("p_retailprice", Type.FLOAT8) // 7
.addColumn("p_comment", Type.TEXT); // 8
schemas.put(PART, part);
Schema region = new Schema()
.addColumn("r_regionkey", Type.INT4) // 0
.addColumn("r_name", Type.TEXT) // 1
.addColumn("r_comment", Type.TEXT); // 2
schemas.put(REGION, region);
Schema orders = new Schema()
.addColumn("o_orderkey", Type.INT4) // 0
.addColumn("o_custkey", Type.INT4) // 1
.addColumn("o_orderstatus", Type.TEXT) // 2
.addColumn("o_totalprice", Type.FLOAT8) // 3
// TODO - This is temporal solution. o_orderdate is actually Date type.
.addColumn("o_orderdate", Type.TEXT) // 4
.addColumn("o_orderpriority", Type.TEXT) // 5
.addColumn("o_clerk", Type.TEXT) // 6
.addColumn("o_shippriority", Type.INT4) // 7
.addColumn("o_comment", Type.TEXT); // 8
schemas.put(ORDERS, orders);
schemas.put(EMPTY_ORDERS, orders);
Schema partsupp = new Schema()
.addColumn("ps_partkey", Type.INT4) // 0
.addColumn("ps_suppkey", Type.INT4) // 1
.addColumn("ps_availqty", Type.INT4) // 2
.addColumn("ps_supplycost", Type.FLOAT8) // 3
.addColumn("ps_comment", Type.TEXT); // 4
schemas.put(PARTSUPP, partsupp);
Schema supplier = new Schema()
.addColumn("s_suppkey", Type.INT4) // 0
.addColumn("s_name", Type.TEXT) // 1
.addColumn("s_address", Type.TEXT) // 2
.addColumn("s_nationkey", Type.INT4) // 3
.addColumn("s_phone", Type.TEXT) // 4
.addColumn("s_acctbal", Type.FLOAT8) // 5
.addColumn("s_comment", Type.TEXT); // 6
schemas.put(SUPPLIER, supplier);
schemas.put(SUPPLIER_COPY, supplier);
}
public void loadOutSchema() {
Schema q2 = new Schema()
.addColumn("s_acctbal", Type.FLOAT8)
.addColumn("s_name", Type.TEXT)
.addColumn("n_name", Type.TEXT)
.addColumn("p_partkey", Type.INT4)
.addColumn("p_mfgr", Type.TEXT)
.addColumn("s_address", Type.TEXT)
.addColumn("s_phone", Type.TEXT)
.addColumn("s_comment", Type.TEXT);
outSchemas.put("q2", q2);
}
public void loadQueries() throws IOException {
loadQueries(BENCHMARK_DIR);
}
public void loadTables() throws TajoException {
loadTable(LINEITEM);
loadTable(CUSTOMER);
loadTable(CUSTOMER_PARTS);
loadTable(NATION);
loadTable(PART);
loadTable(REGION);
loadTable(ORDERS);
loadTable(PARTSUPP) ;
loadTable(SUPPLIER);
loadTable(SUPPLIER_COPY);
loadTable(EMPTY_ORDERS);
}
public void loadTable(String tableName) throws TajoException {
TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, new TajoConf());
PartitionMethodDesc partitionMethodDesc = null;
if (tableName.equals(CUSTOMER_PARTS)) {
Schema expressionSchema = new Schema();
expressionSchema.addColumn("c_nationkey", TajoDataTypes.Type.INT4);
partitionMethodDesc = new PartitionMethodDesc(
tajo.getCurrentDatabase(),
CUSTOMER_PARTS,
CatalogProtos.PartitionType.COLUMN,
"c_nationkey",
expressionSchema);
}
tajo.createExternalTable(tableName, getSchema(tableName),
new Path(dataDir, tableName).toUri(), meta, partitionMethodDesc);
}
}