Revert "SQOOP-2920: sqoop performance deteriorates significantly on wide datasets; sqoop 100% on cpu"
I've mistakenly committed SQOOP-2920 and SQOOP-2906 inside this commit,
so I'll revert it and commit them separately.
diff --git a/src/java/org/apache/sqoop/avro/AvroUtil.java b/src/java/org/apache/sqoop/avro/AvroUtil.java
index ee29f14..319be0f 100644
--- a/src/java/org/apache/sqoop/avro/AvroUtil.java
+++ b/src/java/org/apache/sqoop/avro/AvroUtil.java
@@ -114,25 +114,11 @@
* Format candidate to avro specifics
*/
public static String toAvroIdentifier(String candidate) {
- char[] data = candidate.toCharArray();
- boolean skip = false;
- int stringIndex = 0;
-
- for (char c:data) {
- if (Character.isLetterOrDigit(c) || c == '_') {
- data[stringIndex++] = c;
- skip = false;
- } else if(!skip) {
- data[stringIndex++] = '_';
- skip = true;
- }
- }
-
- char initial = data[0];
- if (Character.isLetter(initial) || initial == '_') {
- return new String(data, 0, stringIndex);
+ String formattedCandidate = candidate.replaceAll("\\W+", "_");
+ if (formattedCandidate.substring(0,1).matches("[a-zA-Z_]")) {
+ return formattedCandidate;
} else {
- return "AVRO_".concat(new String(data, 0, stringIndex));
+ return "AVRO_" + formattedCandidate;
}
}
diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java
index 9d91887..23a9c41 100644
--- a/src/java/org/apache/sqoop/orm/ClassWriter.java
+++ b/src/java/org/apache/sqoop/orm/ClassWriter.java
@@ -1064,49 +1064,57 @@
* @param colNames - ordered list of column names for table.
* @param sb - StringBuilder to append code to
*/
- private void generateSetField(Map<String, Integer> columnTypes, String[] colNames, String[] rawColNames,
- StringBuilder sb) {
- String sep = System.getProperty("line.separator");
- sb.append(" public void setField(String __fieldName, Object __fieldVal) " + "{" + sep);
- sb.append(" if (!setters.containsKey(__fieldName)) {" + sep);
- sb.append(" throw new RuntimeException(\"No such field:\"+__fieldName);" + sep);
- sb.append(" }" + sep);
- sb.append(" setters.get(__fieldName).setField(__fieldVal);" + sep);
- sb.append(" }\n" + sep);
- }
+ private void generateSetField(Map<String, Integer> columnTypes,
+ String [] colNames, String [] rawColNames, StringBuilder sb) {
- private void generateConstructorAndInitMethods(Map<String, Integer> colTypes, String[] colNames, String[] rawColNames,
- String typeName, StringBuilder sb) {
- String sep = System.getProperty("line.separator");
- int numberOfMethods = getNumberOfMethods(colNames, maxColumnsPerMethod);
- for (int methodNumber = 0; methodNumber < numberOfMethods; ++methodNumber) {
- sb.append(" private void init" + methodNumber + "() {" + sep);
- for (int i = methodNumber * maxColumnsPerMethod; i < topBoundary(colNames, methodNumber,
- maxColumnsPerMethod); ++i) {
- String colName = colNames[i];
- String rawColName = rawColNames[i];
- int sqlType = colTypes.get(colName);
- String javaType = toJavaType(colName, sqlType);
+ int numberOfMethods =
+ this.getNumberOfMethods(colNames, maxColumnsPerMethod);
+
+ sb.append(" public void setField(String __fieldName, Object __fieldVal) "
+ + "{\n");
+ if (numberOfMethods > 1) {
+ boolean first = true;
+ for (int i = 0; i < numberOfMethods; ++i) {
+ if (!first) {
+ sb.append(" else");
+ }
+ sb.append(" if (this.setField" + i
+ + "(__fieldName, __fieldVal)) {\n");
+ sb.append(" return;\n");
+ sb.append(" }\n");
+ first = false;
+ }
+ } else {
+ boolean first = true;
+ for (int i = 0; i < colNames.length; i++) {
+ int sqlType = columnTypes.get(colNames[i]);
+ String javaType = toJavaType(colNames[i], sqlType);
if (null == javaType) {
- LOG.error("Cannot resolve SQL type " + sqlType);
continue;
} else {
- sb.append(" setters.put(\"" + serializeRawColName(rawColName) + "\", new FieldSetterCommand() {" + sep);
- sb.append(" @Override" + sep);
- sb.append(" public void setField(Object value) {" + sep);
- sb.append(" " + colName + " = (" + javaType + ")value;" + sep);
- sb.append(" }" + sep);
- sb.append(" });" + sep);
+ if (!first) {
+ sb.append(" else");
+ }
+
+ sb.append(" if (\"" + serializeRawColName(rawColNames[i]) + "\".equals(__fieldName)) {\n");
+ sb.append(" this." + colNames[i] + " = (" + javaType
+ + ") __fieldVal;\n");
+ sb.append(" }\n");
+ first = false;
}
}
- sb.append(" }" + sep);
}
- sb.append(" public " + typeName + "() {" + sep);
+ sb.append(" else {\n");
+ sb.append(" throw new RuntimeException(");
+ sb.append("\"No such field: \" + __fieldName);\n");
+ sb.append(" }\n");
+ sb.append(" }\n");
+
for (int i = 0; i < numberOfMethods; ++i) {
- sb.append(" init" + i + "();" + sep);
+ myGenerateSetField(columnTypes, colNames, rawColNames, sb, i, maxColumnsPerMethod);
}
- sb.append(" }" + sep);
}
+
/**
* Raw column name is a column name as it was created on database and we need to serialize it between
* double quotes into java class that will be further complied with javac. Various databases supports
@@ -1176,7 +1184,7 @@
sb.append(" public Map<String, Object> getFieldMap() {\n");
sb.append(" Map<String, Object> __sqoop$field_map = "
- + "new HashMap<String, Object>();\n");
+ + "new TreeMap<String, Object>();\n");
if (numberOfMethods > 1) {
for (int i = 0; i < numberOfMethods; ++i) {
sb.append(" this.getFieldMap" + i + "(__sqoop$field_map);\n");
@@ -1926,7 +1934,7 @@
sb.append("import java.util.Iterator;\n");
sb.append("import java.util.List;\n");
sb.append("import java.util.Map;\n");
- sb.append("import java.util.HashMap;\n");
+ sb.append("import java.util.TreeMap;\n");
sb.append("\n");
String className = tableNameInfo.getShortClassForTable(tableName);
@@ -1936,12 +1944,7 @@
+ CLASS_WRITER_VERSION + ";\n");
sb.append(
" public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n");
- sb.append(" public static interface FieldSetterCommand {");
- sb.append(" void setField(Object value);");
- sb.append(" }");
sb.append(" protected ResultSet __cur_result_set;\n");
- sb.append(" private Map<String, FieldSetterCommand> setters = new HashMap<String, FieldSetterCommand>();\n");
- generateConstructorAndInitMethods(columnTypes, colNames, rawColNames, className, sb);
generateFields(columnTypes, colNames, className, sb);
generateEquals(columnTypes, colNames, className, sb);
generateDbRead(columnTypes, colNames, sb);
diff --git a/src/java/org/apache/sqoop/orm/CompilationManager.java b/src/java/org/apache/sqoop/orm/CompilationManager.java
index 0a2a87f..ce165e8 100644
--- a/src/java/org/apache/sqoop/orm/CompilationManager.java
+++ b/src/java/org/apache/sqoop/orm/CompilationManager.java
@@ -296,15 +296,11 @@
// we only record the subdir parts in the zip entry.
String fullPath = entry.getAbsolutePath();
String chompedPath = fullPath.substring(baseDirName.length());
- int indexOfDollarSign = chompedPath.indexOf("$");
- String innerTypesChompedPath = chompedPath
- .substring(0, indexOfDollarSign == -1 ? chompedPath.length() : indexOfDollarSign);
boolean include = chompedPath.endsWith(".class")
- && (sources.contains(
+ && sources.contains(
chompedPath.substring(0, chompedPath.length() - ".class".length())
- + ".java")
- || sources.contains(innerTypesChompedPath + ".java"));
+ + ".java");
if (include) {
// include this file.
diff --git a/src/test/com/cloudera/sqoop/orm/TestClassWriter.java b/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
index 10a0969..498db73 100644
--- a/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
+++ b/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
@@ -26,7 +26,6 @@
import java.sql.Connection;
import java.sql.Statement;
import java.sql.SQLException;
-import java.util.Random;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
@@ -58,9 +57,6 @@
public static final Log LOG =
LogFactory.getLog(TestClassWriter.class.getName());
- private static final String WIDE_TABLE_NAME = "WIDETABLE";
- private static final int WIDE_TABLE_COLUMN_COUNT = 800;
- private static final int WIDE_TABLE_ROW_COUNT = 20_000;
// instance variables populated during setUp, used during tests
private HsqldbTestServer testServer;
@@ -126,16 +122,12 @@
static final String JAR_GEN_DIR = ImportJobTestCase.TEMP_BASE_DIR
+ "sqoop/test/jargen";
- private File runGenerationTest(String[] argv, String classNameToCheck) {
- return runGenerationTest(argv, classNameToCheck, HsqldbTestServer.getTableName());
- }
-
/**
* Run a test to verify that we can generate code and it emits the output
* files where we expect them.
* @return
*/
- private File runGenerationTest(String[] argv, String classNameToCheck, String tableName) {
+ private File runGenerationTest(String [] argv, String classNameToCheck) {
File codeGenDirFile = new File(CODE_GEN_DIR);
File classGenDirFile = new File(JAR_GEN_DIR);
@@ -148,7 +140,7 @@
CompilationManager compileMgr = new CompilationManager(options);
ClassWriter writer = new ClassWriter(options, manager,
- tableName, compileMgr);
+ HsqldbTestServer.getTableName(), compileMgr);
try {
writer.generate();
@@ -683,55 +675,4 @@
};
runFailedGenerationTest(argv, HsqldbTestServer.getTableName());
}
-
- @Test(timeout = 10000)
- public void testWideTableClassGeneration() throws Exception {
- createWideTable();
- options = new SqoopOptions(HsqldbTestServer.getDbUrl(), WIDE_TABLE_NAME);
-
- // Set the option strings in an "argv" to redirect our srcdir and bindir.
- String [] argv = {
- "--bindir",
- JAR_GEN_DIR,
- "--outdir",
- CODE_GEN_DIR,
- };
-
- File ormJarFile = runGenerationTest(argv, WIDE_TABLE_NAME, WIDE_TABLE_NAME);
-
- ClassLoader prevClassLoader = ClassLoaderStack.addJarFile(ormJarFile.getCanonicalPath(),
- WIDE_TABLE_NAME);
- Class tableClass = Class.forName(WIDE_TABLE_NAME, true,
- Thread.currentThread().getContextClassLoader());
-
- Object instance = tableClass.newInstance();
- Method setterMethod = tableClass.getMethod("setField", String.class, Object.class);
- Random random = new Random(0);
- for (int j = 0; j < WIDE_TABLE_ROW_COUNT; ++j) {
- for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) {
- setterMethod.invoke(instance, "INTFIELD" + i, random.nextInt());
- }
- }
-
- if (null != prevClassLoader) {
- ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
- }
- }
-
- private void createWideTable() throws Exception {
- try (Connection conn = testServer.getConnection(); Statement stmt = conn.createStatement();) {
- stmt.executeUpdate("DROP TABLE \"" + WIDE_TABLE_NAME + "\" IF EXISTS");
- StringBuilder sb = new StringBuilder("CREATE TABLE \"" + WIDE_TABLE_NAME + "\" (");
- for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) {
- sb.append("intField" + i + " INT");
- if (i < WIDE_TABLE_COLUMN_COUNT - 1) {
- sb.append(",");
- } else {
- sb.append(")");
- }
- }
- stmt.executeUpdate(sb.toString());
- conn.commit();
- }
- }
}