blob: 846228a1d0a2b636aea23d3bed6306042653c8b8 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.manager.postgresql;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.File;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.sqoop.manager.ConnManager;
import org.apache.sqoop.manager.PostgresqlManager;
import org.apache.sqoop.SqoopOptions;
import org.apache.sqoop.testutil.CommonArgs;
import org.apache.sqoop.testutil.ImportJobTestCase;
import org.apache.sqoop.util.FileListing;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* Test the PostgresqlManager and DirectPostgresqlManager implementations.
* The former uses the postgres JDBC driver to perform an import;
* the latter uses pg_dump to facilitate it.
*
* Since this requires a Postgresql installation on your local machine to use,
* this class is named in such a way that Hadoop's default QA process does not
* run it. You need to run this manually with -Dtestcase=PostgresqlImportTest or
* -Dthirdparty=true.
*
* You need to put Postgresql's JDBC driver library into a location where
* Hadoop can access it (e.g., $HADOOP_HOME/lib).
*
* To configure a postgresql database to allow local connections, put the
* following in /etc/postgresql/8.3/main/pg_hba.conf:
* local all all trust
* host all all 127.0.0.1/32 trust
* host all all ::1/128 trust
*
* ... and comment out any other lines referencing 127.0.0.1 or ::1.
*
* Also in the file /etc/postgresql/8.3/main/postgresql.conf, uncomment
* the line that starts with listen_addresses and set its value to '*' as
* follows
* listen_addresses = '*'
*
* For postgresql 8.1, this may be in /var/lib/pgsql/data, instead. You may
* need to restart the postgresql service after modifying this file.
*
* You should also create a sqooptest user and database:
*
* $ sudo -u postgres psql -U postgres template1
* template1=> CREATE USER sqooptest;
* template1=> CREATE DATABASE sqooptest;
* template1=> GRANT ALL ON DATABASE sqooptest TO sqooptest;
* template1=> \q
*
*/
public class PostgresqlImportTest extends ImportJobTestCase {
public static final Log LOG = LogFactory.getLog(
PostgresqlImportTest.class.getName());
static final String HOST_URL = System.getProperty(
"sqoop.test.postgresql.connectstring.host_url",
"jdbc:postgresql://localhost/");
static final String DATABASE_USER = System.getProperty(
"sqoop.test.postgresql.username",
"sqooptest");
static final String DATABASE_NAME = System.getProperty(
"sqoop.test.postgresql.database",
"sqooptest");
static final String PASSWORD = System.getProperty(
"sqoop.test.postgresql.password");
static final String TABLE_NAME = "EMPLOYEES_PG";
static final String NULL_TABLE_NAME = "NULL_EMPLOYEES_PG";
static final String SPECIAL_TABLE_NAME = "EMPLOYEES_PG's";
static final String DIFFERENT_TABLE_NAME = "DIFFERENT_TABLE";
static final String SCHEMA_PUBLIC = "public";
static final String SCHEMA_SPECIAL = "special";
static final String CONNECT_STRING = HOST_URL + DATABASE_NAME;
protected Connection connection;
@Override
protected boolean useHsqldbTestServer() {
return false;
}
public String quoteTableOrSchemaName(String tableName) {
return "\"" + tableName + "\"";
}
private String getDropTableStatement(String tableName, String schema) {
return "DROP TABLE IF EXISTS " + quoteTableOrSchemaName(schema) + "." + quoteTableOrSchemaName(tableName);
}
@Before
public void setUp() {
super.setUp();
LOG.debug("Setting up another postgresql test: " + CONNECT_STRING);
setUpData(TABLE_NAME, SCHEMA_PUBLIC, false);
setUpData(NULL_TABLE_NAME, SCHEMA_PUBLIC, true);
setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC, false);
setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL, false);
LOG.debug("setUp complete.");
}
@After
public void tearDown() {
try {
Statement stmt = connection.createStatement();
stmt.executeUpdate(getDropTableStatement(TABLE_NAME, SCHEMA_PUBLIC));
stmt.executeUpdate(getDropTableStatement(NULL_TABLE_NAME, SCHEMA_PUBLIC));
stmt.executeUpdate(getDropTableStatement(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC));
stmt.executeUpdate(getDropTableStatement(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL));
} catch (SQLException e) {
LOG.error("Can't clean up the database:", e);
}
super.tearDown();
try {
connection.close();
} catch (SQLException e) {
LOG.error("Ignoring exception in tearDown", e);
}
}
public void setUpData(String tableName, String schema, boolean nullEntry) {
SqoopOptions options = new SqoopOptions(CONNECT_STRING, tableName);
options.setUsername(DATABASE_USER);
options.setPassword(PASSWORD);
ConnManager manager = null;
Statement st = null;
try {
manager = new PostgresqlManager(options);
connection = manager.getConnection();
connection.setAutoCommit(false);
st = connection.createStatement();
// Create schema if not exists in dummy way (always create and ignore
// errors.
try {
st.executeUpdate("CREATE SCHEMA " + manager.escapeTableName(schema));
connection.commit();
} catch (SQLException e) {
LOG.info("Couldn't create schema " + schema + " (is o.k. as long as"
+ "the schema already exists.");
connection.rollback();
}
String fullTableName = manager.escapeTableName(schema)
+ "." + manager.escapeTableName(tableName);
LOG.info("Creating table: " + fullTableName);
try {
// Try to remove the table first. DROP TABLE IF EXISTS didn't
// get added until pg 8.3, so we just use "DROP TABLE" and ignore
// any exception here if one occurs.
st.executeUpdate("DROP TABLE " + fullTableName);
} catch (SQLException e) {
LOG.info("Couldn't drop table " + schema + "." + tableName + " (ok)");
// Now we need to reset the transaction.
connection.rollback();
}
st.executeUpdate("CREATE TABLE " + fullTableName + " ("
+ manager.escapeColName("id") + " INT NOT NULL PRIMARY KEY, "
+ manager.escapeColName("name") + " VARCHAR(24) NOT NULL, "
+ manager.escapeColName("start_date") + " DATE, "
+ manager.escapeColName("Salary") + " FLOAT, "
+ manager.escapeColName("Fired") + " BOOL, "
+ manager.escapeColName("dept") + " VARCHAR(32))");
st.executeUpdate("INSERT INTO " + fullTableName
+ " VALUES(1,'Aaron','2009-05-14',1000000.00,TRUE,'engineering')");
st.executeUpdate("INSERT INTO " + fullTableName
+ " VALUES(2,'Bob','2009-04-20',400.00,TRUE,'sales')");
st.executeUpdate("INSERT INTO " + fullTableName
+ " VALUES(3,'Fred','2009-01-23',15.00,FALSE,'marketing')");
if (nullEntry) {
st.executeUpdate("INSERT INTO " + fullTableName
+ " VALUES(4,'Mike',NULL,NULL,NULL,NULL)");
}
connection.commit();
} catch (SQLException sqlE) {
LOG.error("Encountered SQL Exception: " + sqlE);
sqlE.printStackTrace();
fail("SQLException when running test setUp(): " + sqlE);
} finally {
try {
if (null != st) {
st.close();
}
if (null != manager) {
manager.close();
}
} catch (SQLException sqlE) {
LOG.warn("Got SQLException when closing connection: " + sqlE);
}
}
LOG.debug("setUp complete.");
}
private String [] getArgv(boolean isDirect, String tableName,
String... extraArgs) {
ArrayList<String> args = new ArrayList<String>();
CommonArgs.addHadoopFlags(args);
args.add("--table");
args.add(tableName);
args.add("--warehouse-dir");
args.add(getWarehouseDir());
args.add("--connect");
args.add(CONNECT_STRING);
args.add("--username");
args.add(DATABASE_USER);
args.add("--password");
args.add(PASSWORD);
args.add("--where");
args.add("id > 1");
args.add("-m");
args.add("1");
if (isDirect) {
args.add("--direct");
}
for (String arg : extraArgs) {
args.add(arg);
}
return args.toArray(new String[0]);
}
private void doImportAndVerify(boolean isDirect, String[] expectedResults,
String tableName, String... extraArgs) throws IOException {
Path warehousePath = new Path(this.getWarehouseDir());
Path tablePath = new Path(warehousePath, tableName);
// if importing with merge step, directory should exist and output should be from a reducer
boolean isMerge = Arrays.asList(extraArgs).contains("--merge-key");
Path filePath = new Path(tablePath, isMerge ? "part-r-00000" : "part-m-00000");
File tableFile = new File(tablePath.toString());
if (tableFile.exists() && tableFile.isDirectory() && !isMerge) {
// remove the directory before running the import.
FileListing.recursiveDeleteDir(tableFile);
}
String [] argv = getArgv(isDirect, tableName, extraArgs);
try {
runImport(argv);
} catch (IOException ioe) {
LOG.error("Got IOException during import: " + ioe.toString());
ioe.printStackTrace();
fail(ioe.toString());
}
File f = new File(filePath.toString());
assertTrue("Could not find imported data file, " + f, f.exists());
BufferedReader r = null;
try {
// Read through the file and make sure it's all there.
r = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
for (String expectedLine : expectedResults) {
assertEquals(expectedLine, r.readLine());
}
} catch (IOException ioe) {
LOG.error("Got IOException verifying results: " + ioe.toString());
ioe.printStackTrace();
fail(ioe.toString());
} finally {
IOUtils.closeStream(r);
}
}
@Test
public void testJdbcBasedImport() throws IOException {
String [] expectedResults = {
"2,Bob,2009-04-20,400.0,true,sales",
"3,Fred,2009-01-23,15.0,false,marketing",
};
doImportAndVerify(false, expectedResults, TABLE_NAME);
}
@Test
public void testDirectImport() throws IOException {
String [] expectedResults = {
"2,Bob,2009-04-20,400,TRUE,sales",
"3,Fred,2009-01-23,15,FALSE,marketing",
};
doImportAndVerify(true, expectedResults, TABLE_NAME);
}
@Test
public void testListTables() throws IOException {
SqoopOptions options = new SqoopOptions(new Configuration());
options.setConnectString(CONNECT_STRING);
options.setUsername(DATABASE_USER);
options.setPassword(PASSWORD);
ConnManager mgr = new PostgresqlManager(options);
String[] tables = mgr.listTables();
Arrays.sort(tables);
assertTrue(TABLE_NAME + " is not found!",
Arrays.binarySearch(tables, TABLE_NAME) >= 0);
}
@Test
public void testTableNameWithSpecialCharacter() throws IOException {
String [] expectedResults = {
"2,Bob,2009-04-20,400.0,true,sales",
"3,Fred,2009-01-23,15.0,false,marketing",
};
doImportAndVerify(false, expectedResults, SPECIAL_TABLE_NAME);
}
@Test
public void testIncrementalImport() throws IOException {
String [] expectedResults = { };
String [] extraArgs = { "--incremental", "lastmodified",
"--check-column", "start_date",
};
doImportAndVerify(false, expectedResults, TABLE_NAME, extraArgs);
}
@Test
public void testDirectIncrementalImport() throws IOException {
String [] expectedResults = { };
String [] extraArgs = { "--incremental", "lastmodified",
"--check-column", "start_date",
};
doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
}
@Test
public void testDirectIncrementalImportMerge() throws IOException {
String [] expectedResults = { };
String [] extraArgs = { "--incremental", "lastmodified",
"--check-column", "start_date",
};
doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
extraArgs = new String[] { "--incremental", "lastmodified",
"--check-column", "start_date",
"--merge-key", "id",
"--last-value", "2009-04-20"
};
doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
}
@Test
public void testDifferentSchemaImport() throws IOException {
String [] expectedResults = {
"2,Bob,2009-04-20,400.0,true,sales",
"3,Fred,2009-01-23,15.0,false,marketing",
};
String [] extraArgs = { "--",
"--schema", SCHEMA_SPECIAL,
};
doImportAndVerify(false, expectedResults, DIFFERENT_TABLE_NAME, extraArgs);
}
@Test
public void testDifferentSchemaImportDirect() throws IOException {
String [] expectedResults = {
"2,Bob,2009-04-20,400,TRUE,sales",
"3,Fred,2009-01-23,15,FALSE,marketing",
};
String [] extraArgs = { "--",
"--schema", SCHEMA_SPECIAL,
};
doImportAndVerify(true, expectedResults, DIFFERENT_TABLE_NAME, extraArgs);
}
@Test
public void testNullEscapeCharacters() throws Exception {
String [] expectedResults = {
"2,Bob,2009-04-20,400,TRUE,sales",
"3,Fred,2009-01-23,15,FALSE,marketing",
"4,Mike,\\N,\\N,\\N,\\N",
};
String [] extraArgs = {
"--null-string", "\\\\\\\\N",
"--null-non-string", "\\\\\\\\N",
};
doImportAndVerify(true, expectedResults, NULL_TABLE_NAME, extraArgs);
}
@Test
public void testDifferentBooleanValues() throws Exception {
String [] expectedResults = {
"2,Bob,2009-04-20,400,REAL_TRUE,sales",
"3,Fred,2009-01-23,15,REAL_FALSE,marketing",
};
String [] extraArgs = {
"--",
"--boolean-true-string", "REAL_TRUE",
"--boolean-false-string", "REAL_FALSE",
};
doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
}
}