SQOOP-1904: Add support for DB2 XML data type (import use case)
(Ying Cao via Attila Szabo)
diff --git a/src/java/org/apache/sqoop/manager/Db2Manager.java b/src/java/org/apache/sqoop/manager/Db2Manager.java
index e39aa4c..52ab05e 100644
--- a/src/java/org/apache/sqoop/manager/Db2Manager.java
+++ b/src/java/org/apache/sqoop/manager/Db2Manager.java
@@ -23,7 +23,9 @@
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sqoop.mapreduce.db.Db2DataDrivenDBInputFormat;
@@ -48,6 +50,11 @@
private static final String DRIVER_CLASS =
"com.ibm.db2.jcc.DB2Driver";
+ private static final String XML_TO_JAVA_DATA_TYPE = "String";
+
+ private Map<String, String> columnTypeNames;
+
+
public Db2Manager(final SqoopOptions opts) {
super(DRIVER_CLASS, opts);
}
@@ -123,4 +130,98 @@
return databases.toArray(new String[databases.size()]);
}
+
+ /**
+ * Return hive type for SQL type.
+ *
+ * @param tableName
+ * table name
+ * @param columnName
+ * column name
+ * @param sqlType
+ * sql data type
+ * @return hive type
+ */
+ @Override
+ public String toHiveType(String tableName, String columnName, int sqlType) {
+ String hiveType = super.toHiveType(tableName, columnName, sqlType);
+ if (hiveType == null) {
+ hiveType = toDbSpecificHiveType(tableName, columnName);
+ }
+ return hiveType;
+ }
+
+ /**
+ * Resolve a database-specific type to the Hive type that should contain it.
+ *
+ * @param tableName
+ * table name
+ * @param colName
+ * column name
+ * @return the name of a Hive type to hold the sql datatype, or null if
+ * none.
+ */
+ private String toDbSpecificHiveType(String tableName, String colName) {
+ if (columnTypeNames == null) {
+ columnTypeNames = getColumnTypeNames(tableName, options.getCall(),
+ options.getSqlQuery());
+ }
+ LOG.debug("database-specific Column Types and names returned = ("
+ + StringUtils.join(columnTypeNames.keySet(), ",") + ")=>("
+ + StringUtils.join(columnTypeNames.values(), ",") + ")");
+
+ String colTypeName = columnTypeNames.get(colName);
+
+ if (colTypeName != null) {
+ if (colTypeName.toUpperCase().startsWith("XML")) {
+ return XML_TO_JAVA_DATA_TYPE;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Return java type for SQL type.
+ *
+ * @param tableName
+ * table name
+ * @param columnName
+ * column name
+ * @param sqlType
+ * sql type
+ * @return java type
+ */
+ @Override
+ public String toJavaType(String tableName, String columnName, int sqlType) {
+ String javaType = super.toJavaType(tableName, columnName, sqlType);
+ if (javaType == null) {
+ javaType = toDbSpecificJavaType(tableName, columnName);
+ }
+ return javaType;
+ }
+
+ /**
+ * Resolve a database-specific type to the Java type that should contain it.
+ *
+ * @param tableName
+ * table name
+ * @param colName
+ * column name
+ * @return the name of a Java type to hold the sql datatype, or null if
+ * none.
+ */
+ private String toDbSpecificJavaType(String tableName, String colName) {
+ if (columnTypeNames == null) {
+ columnTypeNames = getColumnTypeNames(tableName, options.getCall(),
+ options.getSqlQuery());
+ }
+ String colTypeName = columnTypeNames.get(colName);
+ if (colTypeName != null) {
+ if (colTypeName.equalsIgnoreCase("XML")) {
+ return XML_TO_JAVA_DATA_TYPE;
+ }
+ }
+ return null;
+ }
+
}
diff --git a/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java b/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java
new file mode 100644
index 0000000..2ae3af8
--- /dev/null
+++ b/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java
@@ -0,0 +1,249 @@
+ /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.manager.db2;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.sqoop.manager.Db2Manager;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.cloudera.sqoop.SqoopOptions;
+import com.cloudera.sqoop.testutil.CommonArgs;
+import com.cloudera.sqoop.testutil.ImportJobTestCase;
+import com.cloudera.sqoop.util.FileListing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Test the DB2 XML data type.
+ *
+ * This uses JDBC to import data from an DB2 database into HDFS.
+ *
+ * Since this requires an DB2 Server installation,
+ * this class is named in such a way that Sqoop's default QA process does
+ * not run it. You need to run this manually with
+ * -Dtestcase=DB2XmlTypeImportManualTest
+
+ * You need to put DB2 JDBC driver library (db2jcc.jar) in a location
+ * where Sqoop will be able to access it (since this library cannot be checked
+ * into Apache's tree for licensing reasons).
+ *
+ * To set up your test environment:
+ * Install DB2 Express 9.7 C server.
+ * Create a database SQOOP
+ * Create a login SQOOP with password PASSWORD and grant all
+ * access for database SQOOP to user SQOOP.
+ */
+public class DB2XmlTypeImportManualTest extends ImportJobTestCase {
+
+ public static final Log LOG = LogFactory.getLog(
+ DB2XmlTypeImportManualTest.class.getName());
+
+ static final String HOST_URL = System.getProperty(
+ "sqoop.test.db2.connectstring.host_url",
+ "jdbc:db2://db2host:60000");
+
+ static final String DATABASE_NAME = System.getProperty(
+ "sqoop.test.db2.connectstring.database",
+ "SQOOP");
+ static final String DATABASE_USER = System.getProperty(
+ "sqoop.test.db2.connectstring.username",
+ "SQOOP");
+ static final String DATABASE_PASSWORD = System.getProperty(
+ "sqoop.test.db2.connectstring.password",
+ "SQOOP");
+
+ static final String TABLE_NAME = "COMPANY";
+ static final String CONNECT_STRING = HOST_URL
+ + "/" + DATABASE_NAME;
+ static final String HIVE_TABLE_NAME = "HCOMPANY";
+ static String ExpectedResults =
+ "1,doc1,<company name=\"Company1\"><emp id=\"31201\" salary=\"60000\" gender=\"Female\"><name><first>Laura </first><last>Brown</last></name><dept id=\"M25\">Finance</dept></emp></company>";
+
+
+ static {
+ LOG.info("Using DB2 CONNECT_STRING HOST_URL is : "+HOST_URL);
+ LOG.info("Using DB2 CONNECT_STRING: " + CONNECT_STRING);
+ }
+
+ // instance variables populated during setUp, used during tests
+ private Db2Manager manager;
+
+ protected String getTableName() {
+ return TABLE_NAME;
+ }
+
+
+ @Before
+ public void setUp() {
+ super.setUp();
+
+ SqoopOptions options = new SqoopOptions(CONNECT_STRING, getTableName());
+ options.setUsername(DATABASE_USER);
+ options.setPassword(DATABASE_PASSWORD);
+
+ manager = new Db2Manager(options);
+
+ // Drop the existing table, if there is one.
+ Connection conn = null;
+ Statement stmt = null;
+ try {
+ conn = manager.getConnection();
+ stmt = conn.createStatement();
+ stmt.execute("DROP TABLE " + getTableName());
+ } catch (SQLException sqlE) {
+ LOG.info("Table was not dropped: " + sqlE.getMessage());
+ } finally {
+ try {
+ if (null != stmt) {
+ stmt.close();
+ }
+ } catch (Exception ex) {
+ LOG.warn("Exception while closing stmt", ex);
+ }
+ }
+
+ // Create and populate table
+ try {
+ conn = manager.getConnection();
+ conn.setAutoCommit(false);
+ stmt = conn.createStatement();
+ String xml ="xmlparse(document '<company name=\"Company1\">\n"+
+ "<emp id=\"31201\" salary=\"60000\" gender=\"Female\">"+
+ "<name><first>Laura </first><last>Brown</last></name>"+
+ "<dept id=\"M25\">Finance</dept></emp></company>')";
+
+
+ // create the database table and populate it with data.
+ stmt.executeUpdate("CREATE TABLE " + getTableName() + " ("
+ + "ID int, "
+ + "DOCNAME VARCHAR(20), "
+ + "DOC XML)");
+
+ stmt.executeUpdate("INSERT INTO " + getTableName() + " VALUES("
+ + "1,'doc1', "
+ + xml
+ +" )");
+ conn.commit();
+ } catch (SQLException sqlE) {
+ LOG.error("Encountered SQL Exception: ", sqlE);
+ fail("SQLException when running test setUp(): " + sqlE);
+ } finally {
+ try {
+ if (null != stmt) {
+ stmt.close();
+ }
+ } catch (Exception ex) {
+ LOG.warn("Exception while closing connection/stmt", ex);
+ }
+ }
+ }
+
+ @After
+ public void tearDown() {
+ super.tearDown();
+ try {
+ manager.close();
+ } catch (SQLException sqlE) {
+ LOG.error("Got SQLException: " + sqlE);
+ }
+ }
+
+ @Test
+ public void testDb2Import() throws IOException {
+
+ runDb2Test(ExpectedResults);
+
+ }
+
+ private String [] getArgv() {
+ ArrayList<String> args = new ArrayList<String>();
+
+ CommonArgs.addHadoopFlags(args);
+ args.add("--connect");
+ args.add(CONNECT_STRING);
+ args.add("--username");
+ args.add(DATABASE_USER);
+ args.add("--password");
+ args.add(DATABASE_PASSWORD);
+
+ args.add("--table");
+ args.add(TABLE_NAME);
+ args.add("--warehouse-dir");
+ args.add(getWarehouseDir());
+ args.add("--hive-table");
+ args.add(HIVE_TABLE_NAME);
+ args.add("--num-mappers");
+ args.add("1");
+
+ return args.toArray(new String[0]);
+ }
+
+ private void runDb2Test(String expectedResults) throws IOException {
+
+ Path warehousePath = new Path(this.getWarehouseDir());
+ Path tablePath = new Path(warehousePath, getTableName());
+ Path filePath = new Path(tablePath, "part-m-00000");
+
+ File tableFile = new File(tablePath.toString());
+ if (tableFile.exists() && tableFile.isDirectory()) {
+ // remove the directory before running the import.
+ FileListing.recursiveDeleteDir(tableFile);
+ }
+
+ String [] argv = getArgv();
+ try {
+ runImport(argv);
+ LOG.info("finish runImport with argv is : "+argv);
+ } catch (IOException ioe) {
+ LOG.error("Got IOException during import: " + ioe);
+ fail(ioe.toString());
+ }
+
+ File f = new File(filePath.toString());
+ assertTrue("Could not find imported data file", f.exists());
+ BufferedReader r = null;
+ try {
+ // Read through the file and make sure it's all there.
+ r = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
+ assertEquals(expectedResults, r.readLine());
+ } catch (IOException ioe) {
+ LOG.error("Got IOException verifying results: " + ioe);
+ fail(ioe.toString());
+ } finally {
+ IOUtils.closeStream(r);
+ }
+ }
+
+}