blob: e98fdfe9ffe94681438874471584a537ebc98fbe [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.manager.oracle;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Assert;
import org.junit.Test;
import org.apache.sqoop.lib.SqoopRecord;
import org.apache.sqoop.manager.oracle.OraOopConstants.
OraOopOracleBlockToSplitAllocationMethod;
/**
* Unit tests for OraOopDataDrivenDBInputFormat.
*/
public class TestOraOopDataDrivenDBInputFormat extends OraOopTestCase {
/**
* We're just exposing a protected method so that it can be called by this
* unit test...
*/
public class Exposer<T extends SqoopRecord> extends
OraOopDataDrivenDBInputFormat<T> {
@Override
public
List<InputSplit>
groupTableDataChunksIntoSplits(
List<? extends OraOopOracleDataChunk> dataChunks,
int desiredNumberOfSplits,
OraOopConstants.OraOopOracleBlockToSplitAllocationMethod
blockAllocationMethod) {
return super.groupTableDataChunksIntoSplits(dataChunks,
desiredNumberOfSplits, blockAllocationMethod);
}
}
@SuppressWarnings("unchecked")
@Test
public void testgroupTableDataChunksIntoSplits() {
List<OraOopOracleDataChunk> dataChunks =
new ArrayList<OraOopOracleDataChunk>();
int startBlockNumber = 1;
for (int idx = 0; idx < 241; idx++) {
OraOopOracleDataChunk dataChunk =
new OraOopOracleDataChunkExtent("23480", 666, 1, startBlockNumber,
startBlockNumber + 8);
startBlockNumber += 8;
dataChunks.add(dataChunk);
}
@SuppressWarnings("rawtypes")
Exposer e = new Exposer();
// Prevent setJdbcFetchSize() from logging information about the fetch-size
// changing. Otherwise, the junit output will be polluted with messages
// about
// things that aren't actually a problem...
boolean logIsBeingCached = Exposer.LOG.getCacheLogEntries();
Exposer.LOG.setCacheLogEntries(true);
List<InputSplit> splits =
e.groupTableDataChunksIntoSplits(dataChunks, 32,
OraOopOracleBlockToSplitAllocationMethod.SEQUENTIAL);
Exposer.LOG.setCacheLogEntries(logIsBeingCached);
int highestNumberOfDataChunksAllocatedToASplit = 0;
int lowestNumberOfDataChunksAllocatedToASplit = Integer.MAX_VALUE;
// Check that all splits have data-chunks assigned to them...
for (InputSplit split : splits) {
int dataChunksAllocatedToThisSplit =
((OraOopDBInputSplit) split).getNumberOfDataChunks();
highestNumberOfDataChunksAllocatedToASplit =
Math.max(highestNumberOfDataChunksAllocatedToASplit,
dataChunksAllocatedToThisSplit);
lowestNumberOfDataChunksAllocatedToASplit =
Math.min(lowestNumberOfDataChunksAllocatedToASplit,
dataChunksAllocatedToThisSplit);
}
if (lowestNumberOfDataChunksAllocatedToASplit == 0) {
Assert
.fail("There is a split that has not had any "
+ "data-chunks allocated to it.");
}
// Check that the split with the least data-chunks has at least
// 75% of the number of data-chunks of the split with the most
// data-chunks...
double minExpectedWorkloadRatio = 0.75;
double actualWorkloadRatio =
(double) lowestNumberOfDataChunksAllocatedToASplit
/ highestNumberOfDataChunksAllocatedToASplit;
if (actualWorkloadRatio < minExpectedWorkloadRatio) {
Assert.fail(String.format(
"There is too much difference in the amount of work assigned "
+ "to the 'smallest' split and the 'largest' split. "
+ "The split with the least work should have at least %s "
+ "of the workload of the 'largest' split, but it actually "
+ "only has %s of the workload of the 'largest' split.",
minExpectedWorkloadRatio, actualWorkloadRatio));
}
}
}