| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.cloudera.sqoop.hive; |
| |
| import java.io.BufferedReader; |
| import java.io.File; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.junit.Test; |
| |
| import com.cloudera.sqoop.SqoopOptions; |
| import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; |
| import com.cloudera.sqoop.testutil.CommonArgs; |
| import com.cloudera.sqoop.testutil.HsqldbTestServer; |
| import com.cloudera.sqoop.testutil.ImportJobTestCase; |
| import com.cloudera.sqoop.tool.BaseSqoopTool; |
| import com.cloudera.sqoop.tool.CodeGenTool; |
| import com.cloudera.sqoop.tool.CreateHiveTableTool; |
| import com.cloudera.sqoop.tool.ImportTool; |
| import com.cloudera.sqoop.tool.SqoopTool; |
| import org.apache.commons.cli.ParseException; |
| |
| /** |
| * Test HiveImport capability after an import to HDFS. |
| */ |
| public class TestHiveImport extends ImportJobTestCase { |
| |
| public static final Log LOG = LogFactory.getLog( |
| TestHiveImport.class.getName()); |
| |
| /** |
| * Sets the expected number of columns in the table being manipulated |
| * by the test. Under the hood, this sets the expected column names |
| * to DATA_COLi for 0 <= i < numCols. |
| * @param numCols the number of columns to be created. |
| */ |
| private void setNumCols(int numCols) { |
| String [] cols = new String[numCols]; |
| for (int i = 0; i < numCols; i++) { |
| cols[i] = "DATA_COL" + i; |
| } |
| |
| setColNames(cols); |
| } |
| |
| /** |
| * Create the argv to pass to Sqoop. |
| * @return the argv as an array of strings. |
| */ |
| protected String [] getArgv(boolean includeHadoopFlags, String [] moreArgs) { |
| ArrayList<String> args = new ArrayList<String>(); |
| |
| if (includeHadoopFlags) { |
| CommonArgs.addHadoopFlags(args); |
| } |
| |
| if (null != moreArgs) { |
| for (String arg: moreArgs) { |
| args.add(arg); |
| } |
| } |
| |
| args.add("--table"); |
| args.add(getTableName()); |
| args.add("--warehouse-dir"); |
| args.add(getWarehouseDir()); |
| args.add("--connect"); |
| args.add(HsqldbTestServer.getUrl()); |
| args.add("--hive-import"); |
| String [] colNames = getColNames(); |
| if (null != colNames) { |
| args.add("--split-by"); |
| args.add(colNames[0]); |
| } else { |
| fail("Could not determine column names."); |
| } |
| |
| args.add("--num-mappers"); |
| args.add("1"); |
| |
| for (String a : args) { |
| LOG.debug("ARG : "+ a); |
| } |
| |
| return args.toArray(new String[0]); |
| } |
| |
| /** |
| * @return the argv to supply to a create-table only job for Hive imports. |
| */ |
| protected String [] getCreateTableArgv(boolean includeHadoopFlags, |
| String [] moreArgs) { |
| |
| ArrayList<String> args = new ArrayList<String>(); |
| |
| if (null != moreArgs) { |
| for (String arg: moreArgs) { |
| args.add(arg); |
| } |
| } |
| |
| args.add("--table"); |
| args.add(getTableName()); |
| args.add("--connect"); |
| args.add(HsqldbTestServer.getUrl()); |
| |
| return args.toArray(new String[0]); |
| } |
| |
| /** |
| * @return the argv to supply to a code-gen only job for Hive imports. |
| */ |
| protected String [] getCodeGenArgs() { |
| ArrayList<String> args = new ArrayList<String>(); |
| |
| args.add("--table"); |
| args.add(getTableName()); |
| args.add("--connect"); |
| args.add(HsqldbTestServer.getUrl()); |
| args.add("--hive-import"); |
| |
| return args.toArray(new String[0]); |
| } |
| |
| /** |
| * @return the argv to supply to a ddl-executing-only job for Hive imports. |
| */ |
| protected String [] getCreateHiveTableArgs(String [] extraArgs) { |
| ArrayList<String> args = new ArrayList<String>(); |
| |
| args.add("--table"); |
| args.add(getTableName()); |
| args.add("--connect"); |
| args.add(HsqldbTestServer.getUrl()); |
| |
| if (null != extraArgs) { |
| for (String arg : extraArgs) { |
| args.add(arg); |
| } |
| } |
| |
| return args.toArray(new String[0]); |
| } |
| |
| private SqoopOptions getSqoopOptions(String [] args, SqoopTool tool) { |
| SqoopOptions opts = null; |
| try { |
| opts = tool.parseArguments(args, null, null, true); |
| } catch (Exception e) { |
| fail("Invalid options: " + e.toString()); |
| } |
| |
| return opts; |
| } |
| |
| private void runImportTest(String tableName, String [] types, |
| String [] values, String verificationScript, String [] args, |
| SqoopTool tool) throws IOException { |
| |
| // create a table and populate it with a row... |
| createTableWithColTypes(types, values); |
| |
| // set up our mock hive shell to compare our generated script |
| // against the correct expected one. |
| SqoopOptions options = getSqoopOptions(args, tool); |
| String hiveHome = options.getHiveHome(); |
| assertNotNull("hive.home was not set", hiveHome); |
| Path testDataPath = new Path(new Path(hiveHome), |
| "scripts/" + verificationScript); |
| System.setProperty("expected.script", testDataPath.toString()); |
| |
| // verify that we can import it correctly into hive. |
| runImport(tool, args); |
| } |
| |
| /** Test that we can generate a file containing the DDL and not import. */ |
| @Test |
| public void testGenerateOnly() throws IOException { |
| final String TABLE_NAME = "GenerateOnly"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(1); |
| |
| // Figure out where our target generated .q file is going to be. |
| SqoopOptions options = getSqoopOptions(getArgv(false, null), |
| new ImportTool()); |
| Path ddlFile = new Path(new Path(options.getCodeOutputDir()), |
| TABLE_NAME + ".q"); |
| FileSystem fs = FileSystem.getLocal(new Configuration()); |
| |
| // If it's already there, remove it before running the test to ensure |
| // that it's the current test that generated the file. |
| if (fs.exists(ddlFile)) { |
| if (!fs.delete(ddlFile, false)) { |
| LOG.warn("Could not delete previous ddl file: " + ddlFile); |
| } |
| } |
| |
| // Run a basic import, but specify that we're just generating definitions. |
| String [] types = { "INTEGER" }; |
| String [] vals = { "42" }; |
| runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), |
| new CodeGenTool()); |
| |
| // Test that the generated definition file exists. |
| assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile)); |
| |
| Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), |
| TABLE_NAME); |
| assertFalse("Import actually happened!", fs.exists(hiveImportPath)); |
| } |
| |
| |
| /** Test that strings and ints are handled in the normal fashion. */ |
| @Test |
| public void testNormalHiveImport() throws IOException { |
| final String TABLE_NAME = "NORMAL_HIVE_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" }; |
| String [] vals = { "'test'", "42", "'somestring'" }; |
| runImportTest(TABLE_NAME, types, vals, "normalImport.q", |
| getArgv(false, null), new ImportTool()); |
| } |
| |
| /** Test that table is created in hive with no data import. */ |
| @Test |
| public void testCreateOnlyHiveImport() throws IOException { |
| final String TABLE_NAME = "CREATE_ONLY_HIVE_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" }; |
| String [] vals = { "'test'", "42", "'somestring'" }; |
| runImportTest(TABLE_NAME, types, vals, |
| "createOnlyImport.q", getCreateHiveTableArgs(null), |
| new CreateHiveTableTool()); |
| } |
| |
| /** |
| * Test that table is created in hive and replaces the existing table if |
| * any. |
| */ |
| @Test |
| public void testCreateOverwriteHiveImport() throws IOException { |
| final String TABLE_NAME = "CREATE_OVERWRITE_HIVE_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" }; |
| String [] vals = { "'test'", "42", "'somestring'" }; |
| String [] extraArgs = {"--hive-overwrite", "--create-hive-table"}; |
| runImportTest(TABLE_NAME, types, vals, |
| "createOverwriteImport.q", getCreateHiveTableArgs(extraArgs), |
| new CreateHiveTableTool()); |
| } |
| |
| /** Test that dates are coerced properly to strings. */ |
| @Test |
| public void testDate() throws IOException { |
| final String TABLE_NAME = "DATE_HIVE_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(2); |
| String [] types = { "VARCHAR(32)", "DATE" }; |
| String [] vals = { "'test'", "'2009-05-12'" }; |
| runImportTest(TABLE_NAME, types, vals, "dateImport.q", |
| getArgv(false, null), new ImportTool()); |
| } |
| |
| /** Test that NUMERICs are coerced to doubles. */ |
| @Test |
| public void testNumeric() throws IOException { |
| final String TABLE_NAME = "NUMERIC_HIVE_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(2); |
| String [] types = { "NUMERIC", "CHAR(64)" }; |
| String [] vals = { "3.14159", "'foo'" }; |
| runImportTest(TABLE_NAME, types, vals, "numericImport.q", |
| getArgv(false, null), new ImportTool()); |
| } |
| |
| /** If bin/hive returns an error exit status, we should get an IOException. */ |
| @Test |
| public void testHiveExitFails() { |
| // The expected script is different than the one which would be generated |
| // by this, so we expect an IOException out. |
| final String TABLE_NAME = "FAILING_HIVE_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(2); |
| String [] types = { "NUMERIC", "CHAR(64)" }; |
| String [] vals = { "3.14159", "'foo'" }; |
| try { |
| runImportTest(TABLE_NAME, types, vals, "failingImport.q", |
| getArgv(false, null), new ImportTool()); |
| // If we get here, then the run succeeded -- which is incorrect. |
| fail("FAILING_HIVE_IMPORT test should have thrown IOException"); |
| } catch (IOException ioe) { |
| // expected; ok. |
| } |
| } |
| |
| /** Test that we can set delimiters how we want them. */ |
| @Test |
| public void testCustomDelimiters() throws IOException { |
| final String TABLE_NAME = "CUSTOM_DELIM_IMPORT"; |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" }; |
| String [] vals = { "'test'", "42", "'somestring'" }; |
| String [] extraArgs = { |
| "--fields-terminated-by", ",", |
| "--lines-terminated-by", "|", |
| }; |
| runImportTest(TABLE_NAME, types, vals, "customDelimImport.q", |
| getArgv(false, extraArgs), new ImportTool()); |
| } |
| |
| /** |
| * Test hive import with row that has new line in it. |
| */ |
| @Test |
| public void testFieldWithHiveDelims() throws IOException, |
| InterruptedException { |
| final String TABLE_NAME = "FIELD_WITH_NL_HIVE_IMPORT"; |
| |
| LOG.info("Doing import of single row into FIELD_WITH_NL_HIVE_IMPORT table"); |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" }; |
| String[] vals = { "'test with \n new lines \n'", "42", |
| "'oh no " + '\01' + " field delims " + '\01' + "'", }; |
| String[] moreArgs = { "--"+ BaseSqoopTool.HIVE_DROP_DELIMS_ARG }; |
| |
| runImportTest(TABLE_NAME, types, vals, "fieldWithNewlineImport.q", |
| getArgv(false, moreArgs), new ImportTool()); |
| |
| LOG.info("Validating data in single row is present in: " |
| + "FIELD_WITH_NL_HIVE_IMPORT table"); |
| |
| // Ideally, we would actually invoke hive code to verify that record with |
| // record and field delimiters have values replaced and that we have the |
| // proper number of hive records. Unfortunately, this is a non-trivial task, |
| // and better dealt with at an integration test level |
| // |
| // Instead, this assumes the path of the generated table and just validate |
| // map job output. |
| |
| // Get and read the raw output file |
| String whDir = getWarehouseDir(); |
| File p = new File(new File(whDir, TABLE_NAME), "part-m-00000"); |
| File f = new File(p.toString()); |
| FileReader fr = new FileReader(f); |
| BufferedReader br = new BufferedReader(fr); |
| try { |
| // verify the output |
| assertEquals(br.readLine(), "test with new lines " + '\01' + "42" |
| + '\01' + "oh no field delims "); |
| assertEquals(br.readLine(), null); // should only be one line |
| } catch (IOException ioe) { |
| fail("Unable to read files generated from hive"); |
| } finally { |
| br.close(); |
| } |
| } |
| |
| /** |
| * Test hive import with row that has new line in it. |
| */ |
| @Test |
| public void testFieldWithHiveDelimsReplacement() throws IOException, |
| InterruptedException { |
| final String TABLE_NAME = "FIELD_WITH_NL_REPLACEMENT_HIVE_IMPORT"; |
| |
| LOG.info("Doing import of single row into " |
| + "FIELD_WITH_NL_REPLACEMENT_HIVE_IMPORT table"); |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" }; |
| String[] vals = { "'test with\nnew lines\n'", "42", |
| "'oh no " + '\01' + " field delims " + '\01' + "'", }; |
| String[] moreArgs = { "--"+BaseSqoopTool.HIVE_DELIMS_REPLACEMENT_ARG, " "}; |
| |
| runImportTest(TABLE_NAME, types, vals, |
| "fieldWithNewlineReplacementImport.q", getArgv(false, moreArgs), |
| new ImportTool()); |
| |
| LOG.info("Validating data in single row is present in: " |
| + "FIELD_WITH_NL_REPLACEMENT_HIVE_IMPORT table"); |
| |
| // Ideally, we would actually invoke hive code to verify that record with |
| // record and field delimiters have values replaced and that we have the |
| // proper number of hive records. Unfortunately, this is a non-trivial task, |
| // and better dealt with at an integration test level |
| // |
| // Instead, this assumes the path of the generated table and just validate |
| // map job output. |
| |
| // Get and read the raw output file |
| String whDir = getWarehouseDir(); |
| File p = new File(new File(whDir, TABLE_NAME), "part-m-00000"); |
| File f = new File(p.toString()); |
| FileReader fr = new FileReader(f); |
| BufferedReader br = new BufferedReader(fr); |
| try { |
| // verify the output |
| assertEquals(br.readLine(), "test with new lines " + '\01' + "42" |
| + '\01' + "oh no field delims "); |
| assertEquals(br.readLine(), null); // should only be one line |
| } catch (IOException ioe) { |
| fail("Unable to read files generated from hive"); |
| } finally { |
| br.close(); |
| } |
| } |
| |
| /** |
| * Test hive drop and replace option validation. |
| */ |
| @Test |
| public void testHiveDropAndReplaceOptionValidation() throws ParseException { |
| LOG.info("Testing conflicting Hive delimiter drop/replace options"); |
| |
| setNumCols(3); |
| String[] moreArgs = { "--"+BaseSqoopTool.HIVE_DELIMS_REPLACEMENT_ARG, " ", |
| "--"+BaseSqoopTool.HIVE_DROP_DELIMS_ARG, }; |
| |
| ImportTool tool = new ImportTool(); |
| try { |
| tool.validateOptions(tool.parseArguments(getArgv(false, moreArgs), null, |
| null, true)); |
| fail("Expected InvalidOptionsException"); |
| } catch (InvalidOptionsException ex) { |
| /* success */ |
| } |
| } |
| |
| /** |
| * Test hive import with row that has new line in it. |
| */ |
| @Test |
| public void testImportHiveWithPartitions() throws IOException, |
| InterruptedException { |
| final String TABLE_NAME = "PARTITION_HIVE_IMPORT"; |
| |
| LOG.info("Doing import of single row into PARTITION_HIVE_IMPORT table"); |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)", }; |
| String[] vals = { "'whoop'", "42", "'I am a row in a partition'", }; |
| String[] moreArgs = { "--" + BaseSqoopTool.HIVE_PARTITION_KEY_ARG, "ds", |
| "--" + BaseSqoopTool.HIVE_PARTITION_VALUE_ARG, "20110413", }; |
| |
| runImportTest(TABLE_NAME, types, vals, "partitionImport.q", |
| getArgv(false, moreArgs), new ImportTool()); |
| } |
| |
| /** |
| * If partition key is set to one of importing columns, we should get an |
| * IOException. |
| * */ |
| @Test |
| public void testImportWithBadPartitionKey() { |
| final String TABLE_NAME = "FAILING_PARTITION_HIVE_IMPORT"; |
| |
| LOG.info("Doing import of single row into " + TABLE_NAME + " table"); |
| setCurTableName(TABLE_NAME); |
| setNumCols(3); |
| String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)", }; |
| String[] vals = { "'key'", "42", "'I am a row in a partition'", }; |
| |
| String partitionKey = getColNames()[0]; |
| |
| // Specify 1st column as partition key and import every column of the |
| // table by default (i.e. no --columns option). |
| String[] moreArgs1 = { |
| "--" + BaseSqoopTool.HIVE_PARTITION_KEY_ARG, |
| partitionKey, |
| }; |
| |
| // Specify 1st column as both partition key and importing column. |
| String[] moreArgs2 = { |
| "--" + BaseSqoopTool.HIVE_PARTITION_KEY_ARG, |
| partitionKey, |
| "--" + BaseSqoopTool.COLUMNS_ARG, |
| partitionKey, |
| }; |
| |
| // Test hive-import with the 1st args. |
| try { |
| runImportTest(TABLE_NAME, types, vals, "partitionImport.q", |
| getArgv(false, moreArgs1), new ImportTool()); |
| fail(TABLE_NAME + " test should have thrown IOException"); |
| } catch (IOException ioe) { |
| // expected; ok. |
| } |
| |
| // Test hive-import with the 2nd args. |
| try { |
| runImportTest(TABLE_NAME, types, vals, "partitionImport.q", |
| getArgv(false, moreArgs2), new ImportTool()); |
| fail(TABLE_NAME + " test should have thrown IOException"); |
| } catch (IOException ioe) { |
| // expected; ok. |
| } |
| |
| // Test create-hive-table with the 1st args. |
| try { |
| runImportTest(TABLE_NAME, types, vals, "partitionImport.q", |
| getCreateTableArgv(false, moreArgs1), new CreateHiveTableTool()); |
| fail(TABLE_NAME + " test should have thrown IOException"); |
| } catch (IOException ioe) { |
| // expected; ok. |
| } |
| } |
| } |