Merge from trunk
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/MR-2454@1418161 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 95e4450..970d351 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -289,6 +289,9 @@
HADOOP-9037. Bug in test-patch.sh and precommit build process (Kihwal Lee
via jlowe)
+ HADOOP-9121. InodeTree.java has redundant check for vName while
+ throwing exception. (Arup Malakar via suresh)
+
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -306,6 +309,10 @@
HADOOP-9020. Add a SASL PLAIN server (daryn via bobby)
+ HADOOP-9090. Support on-demand publish of metrics. (Mostafa Elhemali via
+ suresh)
+
+
IMPROVEMENTS
HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
@@ -453,6 +460,14 @@
HADOOP-9064. Augment DelegationTokenRenewer API to cancel the tokens on
calls to removeRenewAction. (kkambatl via tucu)
+ HADOOP-8958. ViewFs:Non absolute mount name failures when running
+ multiple tests on Windows. (Chris Nauroth via suresh)
+
+ HADOOP-9103. UTF8 class does not properly decode Unicode characters
+ outside the basic multilingual plane. (todd)
+
+ HADOOP-9070. Kerberos SASL server cannot find kerberos key. (daryn via atm)
+
Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
index ef64831..3047851 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
@@ -118,8 +118,7 @@
return result;
}
- INode<T> resolveInternal(final String pathComponent)
- throws FileNotFoundException {
+ INode<T> resolveInternal(final String pathComponent) {
return children.get(pathComponent);
}
@@ -336,8 +335,8 @@
}
if (!gotMountTableEntry) {
throw new IOException(
- "ViewFs: Cannot initialize: Empty Mount table in config for " +
- vName == null ? "viewfs:///" : ("viewfs://" + vName + "/"));
+ "ViewFs: Cannot initialize: Empty Mount table in config for " +
+ "viewfs://" + vName + "/");
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java
index ef75129..4124949 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java
@@ -22,6 +22,7 @@
import java.io.DataInput;
import java.io.DataOutput;
+import org.apache.hadoop.util.StringUtils;
import org.apache.commons.logging.*;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -31,6 +32,9 @@
*
* <p>Also includes utilities for efficiently reading and writing UTF-8.
*
+ * Note that this decodes UTF-8 but actually encodes CESU-8, a variant of
+ * UTF-8: see http://en.wikipedia.org/wiki/CESU-8
+ *
* @deprecated replaced by Text
*/
@Deprecated
@@ -209,6 +213,19 @@
return result;
}
+ /**
+ * Convert a UTF-8 encoded byte array back into a string.
+ *
+ * @throws IOException if the byte array is invalid UTF8
+ */
+ public static String fromBytes(byte[] bytes) throws IOException {
+ DataInputBuffer dbuf = new DataInputBuffer();
+ dbuf.reset(bytes, 0, bytes.length);
+ StringBuilder buf = new StringBuilder(bytes.length);
+ readChars(dbuf, buf, bytes.length);
+ return buf.toString();
+ }
+
/** Read a UTF-8 encoded string.
*
* @see DataInput#readUTF()
@@ -230,18 +247,48 @@
while (i < nBytes) {
byte b = bytes[i++];
if ((b & 0x80) == 0) {
+ // 0b0xxxxxxx: 1-byte sequence
buffer.append((char)(b & 0x7F));
- } else if ((b & 0xE0) != 0xE0) {
+ } else if ((b & 0xE0) == 0xC0) {
+ // 0b110xxxxx: 2-byte sequence
buffer.append((char)(((b & 0x1F) << 6)
| (bytes[i++] & 0x3F)));
- } else {
+ } else if ((b & 0xF0) == 0xE0) {
+ // 0b1110xxxx: 3-byte sequence
buffer.append((char)(((b & 0x0F) << 12)
| ((bytes[i++] & 0x3F) << 6)
| (bytes[i++] & 0x3F)));
+ } else if ((b & 0xF8) == 0xF0) {
+ // 0b11110xxx: 4-byte sequence
+ int codepoint =
+ ((b & 0x07) << 18)
+ | ((bytes[i++] & 0x3F) << 12)
+ | ((bytes[i++] & 0x3F) << 6)
+ | ((bytes[i++] & 0x3F));
+ buffer.append(highSurrogate(codepoint))
+ .append(lowSurrogate(codepoint));
+ } else {
+ // The UTF8 standard describes 5-byte and 6-byte sequences, but
+ // these are no longer allowed as of 2003 (see RFC 3629)
+
+ // Only show the next 6 bytes max in the error code - in case the
+ // buffer is large, this will prevent an exceedingly large message.
+ int endForError = Math.min(i + 5, nBytes);
+ throw new IOException("Invalid UTF8 at " +
+ StringUtils.byteToHexString(bytes, i - 1, endForError));
}
}
}
+ private static char highSurrogate(int codePoint) {
+ return (char) ((codePoint >>> 10)
+ + (Character.MIN_HIGH_SURROGATE - (Character.MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
+ }
+
+ private static char lowSurrogate(int codePoint) {
+ return (char) ((codePoint & 0x3ff) + Character.MIN_LOW_SURROGATE);
+ }
+
/** Write a UTF-8 encoded string.
*
* @see DataOutput#writeUTF(String)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index eb735ff..093aada 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -199,7 +199,8 @@
// in ObjectWritable to efficiently transmit arrays of primitives
// 6 : Made RPC payload header explicit
// 7 : Changed Ipc Connection Header to use Protocol buffers
- public static final byte CURRENT_VERSION = 7;
+ // 8 : SASL server always sends a final response
+ public static final byte CURRENT_VERSION = 8;
/**
* Initial and max size of response buffer
@@ -1220,8 +1221,8 @@
AUDITLOG.warn(AUTH_FAILED_FOR + clientIP + ":" + attemptingUser);
throw e;
}
- if (replyToken == null && authMethod == AuthMethod.PLAIN) {
- // client needs at least response to know if it should use SIMPLE
+ if (saslServer.isComplete() && replyToken == null) {
+ // send final response for success
replyToken = new byte[0];
}
if (replyToken != null) {
@@ -1392,7 +1393,7 @@
}
private AuthMethod initializeAuthContext(AuthMethod authMethod)
- throws IOException {
+ throws IOException, InterruptedException {
try {
if (enabledAuthMethods.contains(authMethod)) {
saslServer = createSaslServer(authMethod);
@@ -1425,8 +1426,7 @@
}
private SaslServer createSaslServer(AuthMethod authMethod)
- throws IOException {
- SaslServer saslServer = null;
+ throws IOException, InterruptedException {
String hostname = null;
String saslProtocol = null;
CallbackHandler saslCallback = null;
@@ -1462,10 +1462,23 @@
"Server does not support SASL " + authMethod);
}
- String mechanism = authMethod.getMechanismName();
- saslServer = Sasl.createSaslServer(
- mechanism, saslProtocol, hostname,
- SaslRpcServer.SASL_PROPS, saslCallback);
+ return createSaslServer(authMethod.getMechanismName(), saslProtocol,
+ hostname, saslCallback);
+ }
+
+ private SaslServer createSaslServer(final String mechanism,
+ final String protocol,
+ final String hostname,
+ final CallbackHandler callback
+ ) throws IOException, InterruptedException {
+ SaslServer saslServer = UserGroupInformation.getCurrentUser().doAs(
+ new PrivilegedExceptionAction<SaslServer>() {
+ @Override
+ public SaslServer run() throws SaslException {
+ return Sasl.createSaslServer(mechanism, protocol, hostname,
+ SaslRpcServer.SASL_PROPS, callback);
+ }
+ });
if (saslServer == null) {
throw new AccessControlException(
"Unable to find SASL server implementation for " + mechanism);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
index ab403a2..e853319 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
@@ -91,6 +91,17 @@
public abstract void register(Callback callback);
/**
+ * Requests an immediate publish of all metrics from sources to sinks.
+ *
+ * This is a "soft" request: the expectation is that a best effort will be
+ * done to synchronously snapshot the metrics from all the sources and put
+ * them in all the sinks (including flushing the sinks) before returning to
+ * the caller. If this can't be accomplished in reasonable time it's OK to
+ * return to the caller before everything is done.
+ */
+ public abstract void publishMetricsNow();
+
+ /**
* Shutdown the metrics system completely (usually during server shutdown.)
* The MetricsSystemMXBean will be unregistered.
* @return true if shutdown completed
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java
index 688eca4..56868c1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.metrics2.impl;
import java.util.Random;
+import java.util.concurrent.*;
import static com.google.common.base.Preconditions.*;
@@ -48,6 +49,7 @@
private volatile boolean stopping = false;
private volatile boolean inError = false;
private final int period, firstRetryDelay, retryCount;
+ private final long oobPutTimeout;
private final float retryBackoff;
private final MetricsRegistry registry = new MetricsRegistry("sinkadapter");
private final MutableStat latency;
@@ -69,6 +71,8 @@
this.period = checkArg(period, period > 0, "period");
firstRetryDelay = checkArg(retryDelay, retryDelay > 0, "retry delay");
this.retryBackoff = checkArg(retryBackoff, retryBackoff>1, "retry backoff");
+ oobPutTimeout = (long)
+ (firstRetryDelay * Math.pow(retryBackoff, retryCount) * 1000);
this.retryCount = retryCount;
this.queue = new SinkQueue<MetricsBuffer>(checkArg(queueCapacity,
queueCapacity > 0, "queue capacity"));
@@ -95,6 +99,23 @@
}
return true; // OK
}
+
+ public boolean putMetricsImmediate(MetricsBuffer buffer) {
+ WaitableMetricsBuffer waitableBuffer =
+ new WaitableMetricsBuffer(buffer);
+ if (!queue.enqueue(waitableBuffer)) {
+ LOG.warn(name + " has a full queue and can't consume the given metrics.");
+ dropped.incr();
+ return false;
+ }
+ if (!waitableBuffer.waitTillNotified(oobPutTimeout)) {
+ LOG.warn(name +
+ " couldn't fulfill an immediate putMetrics request in time." +
+ " Abandoning.");
+ return false;
+ }
+ return true;
+ }
void publishMetricsFromQueue() {
int retryDelay = firstRetryDelay;
@@ -158,6 +179,9 @@
sink.flush();
latency.add(Time.now() - ts);
}
+ if (buffer instanceof WaitableMetricsBuffer) {
+ ((WaitableMetricsBuffer)buffer).notifyAnyWaiters();
+ }
LOG.debug("Done");
}
@@ -191,4 +215,26 @@
MetricsSink sink() {
return sink;
}
+
+ static class WaitableMetricsBuffer extends MetricsBuffer {
+ private final Semaphore notificationSemaphore =
+ new Semaphore(0);
+
+ public WaitableMetricsBuffer(MetricsBuffer metricsBuffer) {
+ super(metricsBuffer);
+ }
+
+ public boolean waitTillNotified(long millisecondsToWait) {
+ try {
+ return notificationSemaphore.tryAcquire(millisecondsToWait,
+ TimeUnit.MILLISECONDS);
+ } catch (InterruptedException e) {
+ return false;
+ }
+ }
+
+ public void notifyAnyWaiters() {
+ notificationSemaphore.release();
+ }
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
index 0bf320b..2224d81 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
@@ -344,9 +344,19 @@
synchronized void onTimerEvent() {
logicalTime += period;
if (sinks.size() > 0) {
- publishMetrics(sampleMetrics());
+ publishMetrics(sampleMetrics(), false);
}
}
+
+ /**
+ * Requests an immediate publish of all metrics from sources to sinks.
+ */
+ @Override
+ public void publishMetricsNow() {
+ if (sinks.size() > 0) {
+ publishMetrics(sampleMetrics(), true);
+ }
+ }
/**
* Sample all the sources for a snapshot of metrics/tags
@@ -380,12 +390,20 @@
/**
* Publish a metrics snapshot to all the sinks
* @param buffer the metrics snapshot to publish
+ * @param immediate indicates that we should publish metrics immediately
+ * instead of using a separate thread.
*/
- synchronized void publishMetrics(MetricsBuffer buffer) {
+ synchronized void publishMetrics(MetricsBuffer buffer, boolean immediate) {
int dropped = 0;
for (MetricsSinkAdapter sa : sinks.values()) {
long startTime = Time.now();
- dropped += sa.putMetrics(buffer, logicalTime) ? 0 : 1;
+ boolean result;
+ if (immediate) {
+ result = sa.putMetricsImmediate(buffer);
+ } else {
+ result = sa.putMetrics(buffer, logicalTime);
+ }
+ dropped += result ? 0 : 1;
publishStat.add(Time.now() - startTime);
}
droppedPubAll.incr(dropped);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
index 046f0e7..3f3e8ef 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
@@ -28,6 +28,9 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.util.ReflectionUtils;
/** The class represents a cluster of computer with a tree hierarchical
* network topology.
@@ -52,6 +55,19 @@
super(msg);
}
}
+
+ /**
+ * Get an instance of NetworkTopology based on the value of the configuration
+ * parameter net.topology.impl.
+ *
+ * @param conf the configuration to be used
+ * @return an instance of NetworkTopology
+ */
+ public static NetworkTopology getInstance(Configuration conf){
+ return ReflectionUtils.newInstance(
+ conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
+ NetworkTopology.class, NetworkTopology.class), conf);
+ }
/** InnerNode represents a switch/router of a data center or rack.
* Different from a leaf node, it has non-null children.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java
index 446b38e..c502fbd 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java
@@ -26,6 +26,7 @@
import org.apache.hadoop.fs.FsConstants;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.viewfs.ConfigUtil;
+import org.apache.hadoop.util.Shell;
import org.mortbay.log.Log;
@@ -123,8 +124,11 @@
* in the target file system.
*/
static void linkUpFirstComponents(Configuration conf, String path, FileSystem fsTarget, String info) {
- int indexOf2ndSlash = path.indexOf('/', 1);
- String firstComponent = path.substring(0, indexOf2ndSlash);
+ int indexOfEnd = path.indexOf('/', 1);
+ if (Shell.WINDOWS) {
+ indexOfEnd = path.indexOf('/', indexOfEnd + 1);
+ }
+ String firstComponent = path.substring(0, indexOfEnd);
URI linkTarget = fsTarget.makeQualified(new Path(firstComponent)).toUri();
ConfigUtil.addLink(conf, firstComponent, linkTarget);
Log.info("Added link for " + info + " "
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java
index ac63217..fd519ff 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java
@@ -25,6 +25,7 @@
import org.apache.hadoop.fs.FsConstants;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.viewfs.ConfigUtil;
+import org.apache.hadoop.util.Shell;
import org.mortbay.log.Log;
@@ -120,8 +121,11 @@
*/
static void linkUpFirstComponents(Configuration conf, String path,
FileContext fsTarget, String info) {
- int indexOf2ndSlash = path.indexOf('/', 1);
- String firstComponent = path.substring(0, indexOf2ndSlash);
+ int indexOfEnd = path.indexOf('/', 1);
+ if (Shell.WINDOWS) {
+ indexOfEnd = path.indexOf('/', indexOfEnd + 1);
+ }
+ String firstComponent = path.substring(0, indexOfEnd);
URI linkTarget = fsTarget.makeQualified(new Path(firstComponent)).toUri();
ConfigUtil.addLink(conf, firstComponent, linkTarget);
Log.info("Added link for " + info + " "
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java
index 5c068a1..902f215 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java
@@ -19,8 +19,12 @@
package org.apache.hadoop.io;
import junit.framework.TestCase;
+import java.io.IOException;
import java.util.Random;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.StringUtils;
+
/** Unit tests for UTF8. */
@SuppressWarnings("deprecation")
public class TestUTF8 extends TestCase {
@@ -92,5 +96,55 @@
assertEquals(s, new String(dob.getData(), 2, dob.getLength()-2, "UTF-8"));
}
-
+
+ /**
+ * Test encoding and decoding of UTF8 outside the basic multilingual plane.
+ *
+ * This is a regression test for HADOOP-9103.
+ */
+ public void testNonBasicMultilingualPlane() throws Exception {
+ // Test using the "CAT FACE" character (U+1F431)
+ // See http://www.fileformat.info/info/unicode/char/1f431/index.htm
+ String catFace = "\uD83D\uDC31";
+
+ // This encodes to 4 bytes in UTF-8:
+ byte[] encoded = catFace.getBytes("UTF-8");
+ assertEquals(4, encoded.length);
+ assertEquals("f09f90b1", StringUtils.byteToHexString(encoded));
+
+ // Decode back to String using our own decoder
+ String roundTrip = UTF8.fromBytes(encoded);
+ assertEquals(catFace, roundTrip);
+ }
+
+ /**
+ * Test that decoding invalid UTF8 throws an appropriate error message.
+ */
+ public void testInvalidUTF8() throws Exception {
+ byte[] invalid = new byte[] {
+ 0x01, 0x02, (byte)0xff, (byte)0xff, 0x01, 0x02, 0x03, 0x04, 0x05 };
+ try {
+ UTF8.fromBytes(invalid);
+ fail("did not throw an exception");
+ } catch (IOException ioe) {
+ GenericTestUtils.assertExceptionContains(
+ "Invalid UTF8 at ffff01020304", ioe);
+ }
+ }
+
+ /**
+ * Test for a 5-byte UTF8 sequence, which is now considered illegal.
+ */
+ public void test5ByteUtf8Sequence() throws Exception {
+ byte[] invalid = new byte[] {
+ 0x01, 0x02, (byte)0xf8, (byte)0x88, (byte)0x80,
+ (byte)0x80, (byte)0x80, 0x04, 0x05 };
+ try {
+ UTF8.fromBytes(invalid);
+ fail("did not throw an exception");
+ } catch (IOException ioe) {
+ GenericTestUtils.assertExceptionContains(
+ "Invalid UTF8 at f88880808004", ioe);
+ }
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
index 1001234..7294ee9 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
@@ -29,8 +29,6 @@
import java.util.HashSet;
import java.util.List;
import java.util.Set;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -115,31 +113,23 @@
final int expectedCountFromGanglia30 = expectedMetrics.length;
final int expectedCountFromGanglia31 = 2 * expectedMetrics.length;
- // use latch to make sure we received required records before shutting
- // down the MetricSystem
- CountDownLatch latch = new CountDownLatch(
- expectedCountFromGanglia30 + expectedCountFromGanglia31);
-
// Setup test for GangliaSink30
AbstractGangliaSink gsink30 = new GangliaSink30();
gsink30.init(cb.subset("test"));
- MockDatagramSocket mockds30 = new MockDatagramSocket(latch);
+ MockDatagramSocket mockds30 = new MockDatagramSocket();
GangliaMetricsTestHelper.setDatagramSocket(gsink30, mockds30);
// Setup test for GangliaSink31
AbstractGangliaSink gsink31 = new GangliaSink31();
gsink31.init(cb.subset("test"));
- MockDatagramSocket mockds31 = new MockDatagramSocket(latch);
+ MockDatagramSocket mockds31 = new MockDatagramSocket();
GangliaMetricsTestHelper.setDatagramSocket(gsink31, mockds31);
// register the sinks
ms.register("gsink30", "gsink30 desc", gsink30);
ms.register("gsink31", "gsink31 desc", gsink31);
- ms.onTimerEvent(); // trigger something interesting
+ ms.publishMetricsNow(); // publish the metrics
- // wait for all records and the stop MetricSystem. Without this
- // sometime the ms gets shutdown before all the sinks have consumed
- latch.await(200, TimeUnit.MILLISECONDS);
ms.stop();
// check GanfliaSink30 data
@@ -198,7 +188,6 @@
*/
private class MockDatagramSocket extends DatagramSocket {
private ArrayList<byte[]> capture;
- private CountDownLatch latch;
/**
* @throws SocketException
@@ -207,15 +196,6 @@
capture = new ArrayList<byte[]>();
}
- /**
- * @param latch
- * @throws SocketException
- */
- public MockDatagramSocket(CountDownLatch latch) throws SocketException {
- this();
- this.latch = latch;
- }
-
/* (non-Javadoc)
* @see java.net.DatagramSocket#send(java.net.DatagramPacket)
*/
@@ -225,9 +205,6 @@
byte[] bytes = new byte[p.getLength()];
System.arraycopy(p.getData(), p.getOffset(), bytes, 0, p.getLength());
capture.add(bytes);
-
- // decrement the latch
- latch.countDown();
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java
index 3d3f070..0aa1903 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java
@@ -18,7 +18,11 @@
package org.apache.hadoop.metrics2.impl;
-import java.util.List;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.*;
+
+import javax.annotation.Nullable;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -26,9 +30,11 @@
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.runners.MockitoJUnitRunner;
+
import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
+import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import org.apache.commons.configuration.SubsetConfiguration;
@@ -36,6 +42,8 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.metrics2.MetricsException;
import static org.apache.hadoop.test.MoreAsserts.*;
+
+import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricsRecord;
import org.apache.hadoop.metrics2.MetricsSink;
import org.apache.hadoop.metrics2.MetricsSource;
@@ -47,6 +55,7 @@
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+import org.apache.hadoop.util.StringUtils;
/**
* Test the MetricsSystemImpl class
@@ -72,7 +81,7 @@
}
@Test public void testInitFirst() throws Exception {
- ConfigBuilder cb = new ConfigBuilder().add("*.period", 8)
+ new ConfigBuilder().add("*.period", 8)
//.add("test.sink.plugin.urls", getPluginUrlsAsString())
.add("test.sink.test.class", TestSink.class.getName())
.add("test.*.source.filter.exclude", "s0")
@@ -93,8 +102,9 @@
MetricsSink sink2 = mock(MetricsSink.class);
ms.registerSink("sink1", "sink1 desc", sink1);
ms.registerSink("sink2", "sink2 desc", sink2);
- ms.onTimerEvent(); // trigger something interesting
+ ms.publishMetricsNow(); // publish the metrics
ms.stop();
+ ms.shutdown();
verify(sink1, times(2)).putMetrics(r1.capture());
List<MetricsRecord> mr1 = r1.getAllValues();
@@ -104,6 +114,177 @@
assertEquals("output", mr1, mr2);
}
+ @Test public void testMultiThreadedPublish() throws Exception {
+ new ConfigBuilder().add("*.period", 80)
+ .add("test.sink.Collector.queue.capacity", "20")
+ .save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test"));
+ final MetricsSystemImpl ms = new MetricsSystemImpl("Test");
+ ms.start();
+ final int numThreads = 10;
+ final CollectingSink sink = new CollectingSink(numThreads);
+ ms.registerSink("Collector",
+ "Collector of values from all threads.", sink);
+ final TestSource[] sources = new TestSource[numThreads];
+ final Thread[] threads = new Thread[numThreads];
+ final String[] results = new String[numThreads];
+ final CyclicBarrier barrier1 = new CyclicBarrier(numThreads),
+ barrier2 = new CyclicBarrier(numThreads);
+ for (int i = 0; i < numThreads; i++) {
+ sources[i] = ms.register("threadSource" + i,
+ "A source of my threaded goodness.",
+ new TestSource("threadSourceRec" + i));
+ threads[i] = new Thread(new Runnable() {
+ private boolean safeAwait(int mySource, CyclicBarrier barrier) {
+ try {
+ barrier1.await(2, TimeUnit.SECONDS);
+ } catch (InterruptedException e) {
+ results[mySource] = "Interrupted";
+ return false;
+ } catch (BrokenBarrierException e) {
+ results[mySource] = "Broken Barrier";
+ return false;
+ } catch (TimeoutException e) {
+ results[mySource] = "Timed out on barrier";
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void run() {
+ int mySource = Integer.parseInt(Thread.currentThread().getName());
+ if (sink.collected[mySource].get() != 0L) {
+ results[mySource] = "Someone else collected my metric!";
+ return;
+ }
+ // Wait for all the threads to come here so we can hammer
+ // the system at the same time
+ if (!safeAwait(mySource, barrier1)) return;
+ sources[mySource].g1.set(230);
+ ms.publishMetricsNow();
+ // Since some other thread may have snatched my metric,
+ // I need to wait for the threads to finish before checking.
+ if (!safeAwait(mySource, barrier2)) return;
+ if (sink.collected[mySource].get() != 230L) {
+ results[mySource] = "Metric not collected!";
+ return;
+ }
+ results[mySource] = "Passed";
+ }
+ }, "" + i);
+ }
+ for (Thread t : threads)
+ t.start();
+ for (Thread t : threads)
+ t.join();
+ assertEquals(0L, ms.droppedPubAll.value());
+ assertTrue(StringUtils.join("\n", Arrays.asList(results)),
+ Iterables.all(Arrays.asList(results), new Predicate<String>() {
+ @Override
+ public boolean apply(@Nullable String input) {
+ return input.equalsIgnoreCase("Passed");
+ }
+ }));
+ ms.stop();
+ ms.shutdown();
+ }
+
+ private static class CollectingSink implements MetricsSink {
+ private final AtomicLong[] collected;
+
+ public CollectingSink(int capacity) {
+ collected = new AtomicLong[capacity];
+ for (int i = 0; i < capacity; i++) {
+ collected[i] = new AtomicLong();
+ }
+ }
+
+ @Override
+ public void init(SubsetConfiguration conf) {
+ }
+
+ @Override
+ public void putMetrics(MetricsRecord record) {
+ final String prefix = "threadSourceRec";
+ if (record.name().startsWith(prefix)) {
+ final int recordNumber = Integer.parseInt(
+ record.name().substring(prefix.length()));
+ ArrayList<String> names = new ArrayList<String>();
+ for (AbstractMetric m : record.metrics()) {
+ if (m.name().equalsIgnoreCase("g1")) {
+ collected[recordNumber].set(m.value().longValue());
+ return;
+ }
+ names.add(m.name());
+ }
+ }
+ }
+
+ @Override
+ public void flush() {
+ }
+ }
+
+ @Test public void testHangingSink() {
+ new ConfigBuilder().add("*.period", 8)
+ .add("test.sink.test.class", TestSink.class.getName())
+ .add("test.sink.hanging.retry.delay", "1")
+ .add("test.sink.hanging.retry.backoff", "1.01")
+ .add("test.sink.hanging.retry.count", "0")
+ .save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test"));
+ MetricsSystemImpl ms = new MetricsSystemImpl("Test");
+ ms.start();
+ TestSource s = ms.register("s3", "s3 desc", new TestSource("s3rec"));
+ s.c1.incr();
+ HangingSink hanging = new HangingSink();
+ ms.registerSink("hanging", "Hang the sink!", hanging);
+ ms.publishMetricsNow();
+ assertEquals(1L, ms.droppedPubAll.value());
+ assertFalse(hanging.getInterrupted());
+ ms.stop();
+ ms.shutdown();
+ assertTrue(hanging.getInterrupted());
+ assertTrue("The sink didn't get called after its first hang " +
+ "for subsequent records.", hanging.getGotCalledSecondTime());
+ }
+
+ private static class HangingSink implements MetricsSink {
+ private volatile boolean interrupted;
+ private boolean gotCalledSecondTime;
+ private boolean firstTime = true;
+
+ public boolean getGotCalledSecondTime() {
+ return gotCalledSecondTime;
+ }
+
+ public boolean getInterrupted() {
+ return interrupted;
+ }
+
+ @Override
+ public void init(SubsetConfiguration conf) {
+ }
+
+ @Override
+ public void putMetrics(MetricsRecord record) {
+ // No need to hang every time, just the first record.
+ if (!firstTime) {
+ gotCalledSecondTime = true;
+ return;
+ }
+ firstTime = false;
+ try {
+ Thread.sleep(10 * 1000);
+ } catch (InterruptedException ex) {
+ interrupted = true;
+ }
+ }
+
+ @Override
+ public void flush() {
+ }
+ }
+
@Test public void testRegisterDups() {
MetricsSystem ms = new MetricsSystemImpl();
TestSource ts1 = new TestSource("ts1");
@@ -116,6 +297,7 @@
MetricsSource s2 = ms.getSource("ts1");
assertNotNull(s2);
assertNotSame(s1, s2);
+ ms.shutdown();
}
@Test(expected=MetricsException.class) public void testRegisterDupError() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 4dbdd64..0c346c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -17,6 +17,9 @@
reliably storing HDFS edit logs. See dedicated section below for breakdown
of subtasks.
+ HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup.
+ (Junping Du via szetszwo)
+
IMPROVEMENTS
HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants ->
@@ -89,17 +92,12 @@
HDFS-3040. TestMulitipleNNDataBlockScanner is misspelled. (Madhukara Phatak
via atm)
- HDFS-3049. During the normal NN startup process, fall back on a different
- edit log if we see one that is corrupt (Colin Patrick McCabe via todd)
-
HDFS-3478. Test quotas with Long.Max_Value. (Sujay Rau via eli)
HDFS-3498. Support replica removal in BlockPlacementPolicy and make
BlockPlacementPolicyDefault extensible for reusing code in subclasses.
(Junping Du via szetszwo)
- HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd)
-
HDFS-3510. Editlog pre-allocation is performed prior to writing edits
to avoid partial edits case disk out of space.(Colin McCabe via suresh)
@@ -143,8 +141,6 @@
HDFS-4052. BlockManager#invalidateWork should print log outside the lock.
(Jing Zhao via suresh)
- HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh)
-
HDFS-4124. Refactor INodeDirectory#getExistingPathINodes() to enable
returning more than INode array. (Jing Zhao via suresh)
@@ -157,10 +153,6 @@
HDFS-4152. Add a new class BlocksMapUpdateInfo for the parameter in
INode.collectSubtreeBlocksAndClear(..). (Jing Zhao via szetszwo)
- HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm)
-
- HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd)
-
HDFS-4206. Change the fields in INode and its subclasses to private.
(szetszwo)
@@ -173,6 +165,11 @@
HDFS-4209. Clean up the addNode/addChild/addChildNoQuotaCheck methods in
FSDirectory and INodeDirectory. (szetszwo)
+ HDFS-3358. Specify explicitly that the NN UI status total is talking
+ of persistent objects on heap. (harsh)
+
+ HDFS-4234. Use generic code for choosing datanode in Balancer. (szetszwo)
+
OPTIMIZATIONS
BUG FIXES
@@ -265,107 +262,9 @@
HDFS-4105. The SPNEGO user for secondary namenode should use the web
keytab. (Arpit Gupta via jitendra)
- BREAKDOWN OF HDFS-3077 SUBTASKS
-
- HDFS-3077. Quorum-based protocol for reading and writing edit logs.
- (todd, Brandon Li, and Hari Mankude via todd)
-
- HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd)
-
- HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs
- (todd)
-
- HDFS-3693. JNStorage should read its storage info even before a writer
- becomes active (todd)
-
- HDFS-3725. Fix QJM startup when individual JNs have gaps (todd)
-
- HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd)
-
- HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm)
-
- HDFS-3793. Implement genericized format() in QJM (todd)
-
- HDFS-3795. QJM: validate journal dir at startup (todd)
-
- HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid
- segment (todd)
-
- HDFS-3799. QJM: handle empty log segments during recovery (todd)
-
- HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd)
-
- HDFS-3800. improvements to QJM fault testing (todd)
-
- HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed
- setting of HTTP port. (todd and atm)
-
- HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd
- and atm)
-
- HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode"
- (eli)
-
- HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd)
-
- HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli)
-
- HDFS-3863. Track last "committed" txid in QJM (todd)
-
- HDFS-3869. Expose non-file journal manager details in web UI (todd)
-
- HDFS-3884. Journal format() should reset cached values (todd)
-
- HDFS-3870. Add metrics to JournalNode (todd)
-
- HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd)
-
- HDFS-3726. If a logger misses an RPC, don't retry that logger until next
- segment (todd)
-
- HDFS-3893. QJM: Make QJM work with security enabled. (atm)
-
- HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
-
- HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
-
- HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
-
- HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
-
- HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are
- out-of-sync (todd)
-
- HDFS-3899. QJM: Add client-side metrics (todd)
-
- HDFS-3914. QJM: acceptRecovery should abort current segment (todd)
-
- HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd)
-
- HDFS-3906. QJM: quorum timeout on failover with large log segment (todd)
-
- HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error
- before being formatted (todd)
-
- HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC
- client caching (todd)
-
- HDFS-3926. QJM: Add user documentation for QJM. (atm)
-
- HDFS-3943. QJM: remove currently-unused md5sum field (todd)
-
- HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd)
-
- HDFS-3955. QJM: Make acceptRecovery() atomic. (todd)
-
- HDFS-3956. QJM: purge temporary files when no longer within retention
- period (todd)
-
- HDFS-4004. TestJournalNode#testJournal fails because of test case execution
- order (Chao Shi via todd)
-
- HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet
- (Chao Shi via todd)
+ HDFS-4240. For nodegroup-aware block placement, when a node is excluded,
+ the nodes in the same nodegroup should also be excluded. (Junping Du
+ via szetszwo)
Release 2.0.3-alpha - Unreleased
@@ -386,6 +285,9 @@
HDFS-4155. libhdfs implementation of hsync API (Liang Xie via todd)
+ HDFS-4213. Add an API to hsync for updating the last block length at the
+ namenode. (Jing Zhao via szetszwo)
+
IMPROVEMENTS
HDFS-3925. Prettify PipelineAck#toString() for printing to a log
@@ -487,6 +389,24 @@
HDFS-4214. OfflineEditsViewer should print out the offset at which it
encountered an error. (Colin Patrick McCabe via atm)
+ HDFS-4199. Provide test for HdfsVolumeId. (Ivan A. Veselovsky via atm)
+
+ HDFS-3049. During the normal NN startup process, fall back on a different
+ edit log if we see one that is corrupt (Colin Patrick McCabe via todd)
+
+ HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd)
+
+ HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh)
+
+ HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm)
+
+ HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd)
+
+ HDFS-4268. Remove redundant enum NNHAStatusHeartbeat.State. (shv)
+
+ HDFS-3680. Allow customized audit logging in HDFS FSNamesystem. (Marcelo
+ Vanzin via atm)
+
OPTIMIZATIONS
BUG FIXES
@@ -636,6 +556,122 @@
HDFS-4216. Do not ignore QuotaExceededException when adding symlinks.
(szetszwo)
+ HDFS-4242. Map.Entry is incorrectly used in LeaseManager since the behavior
+ of it is undefined after the iteration or modifications of the map.
+ (szetszwo)
+
+ HDFS-4231. BackupNode: Introduce BackupState. (shv)
+
+ HDFS-4243. When replacing an INodeDirectory, the parent pointers of the
+ children of the child have to be updated to the new child. (Jing Zhao
+ via szetszwo)
+
+ HDFS-4238. Standby namenode should not do purging of shared
+ storage edits. (todd)
+
+ BREAKDOWN OF HDFS-3077 SUBTASKS
+
+ HDFS-3077. Quorum-based protocol for reading and writing edit logs.
+ (todd, Brandon Li, and Hari Mankude via todd)
+
+ HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd)
+
+ HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs
+ (todd)
+
+ HDFS-3693. JNStorage should read its storage info even before a writer
+ becomes active (todd)
+
+ HDFS-3725. Fix QJM startup when individual JNs have gaps (todd)
+
+ HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd)
+
+ HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm)
+
+ HDFS-3793. Implement genericized format() in QJM (todd)
+
+ HDFS-3795. QJM: validate journal dir at startup (todd)
+
+ HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid
+ segment (todd)
+
+ HDFS-3799. QJM: handle empty log segments during recovery (todd)
+
+ HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd)
+
+ HDFS-3800. improvements to QJM fault testing (todd)
+
+ HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed
+ setting of HTTP port. (todd and atm)
+
+ HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd
+ and atm)
+
+ HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode"
+ (eli)
+
+ HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd)
+
+ HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli)
+
+ HDFS-3863. Track last "committed" txid in QJM (todd)
+
+ HDFS-3869. Expose non-file journal manager details in web UI (todd)
+
+ HDFS-3884. Journal format() should reset cached values (todd)
+
+ HDFS-3870. Add metrics to JournalNode (todd)
+
+ HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd)
+
+ HDFS-3726. If a logger misses an RPC, don't retry that logger until next
+ segment (todd)
+
+ HDFS-3893. QJM: Make QJM work with security enabled. (atm)
+
+ HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
+
+ HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
+
+ HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
+
+ HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
+
+ HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are
+ out-of-sync (todd)
+
+ HDFS-3899. QJM: Add client-side metrics (todd)
+
+ HDFS-3914. QJM: acceptRecovery should abort current segment (todd)
+
+ HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd)
+
+ HDFS-3906. QJM: quorum timeout on failover with large log segment (todd)
+
+ HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error
+ before being formatted (todd)
+
+ HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC
+ client caching (todd)
+
+ HDFS-3926. QJM: Add user documentation for QJM. (atm)
+
+ HDFS-3943. QJM: remove currently-unused md5sum field (todd)
+
+ HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd)
+
+ HDFS-3955. QJM: Make acceptRecovery() atomic. (todd)
+
+ HDFS-3956. QJM: purge temporary files when no longer within retention
+ period (todd)
+
+ HDFS-4004. TestJournalNode#testJournal fails because of test case execution
+ order (Chao Shi via todd)
+
+ HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet
+ (Chao Shi via todd)
+
+
Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES
@@ -2025,6 +2061,11 @@
BUG FIXES
+ HDFS-4247. saveNamespace should be tolerant of dangling lease (daryn)
+
+ HDFS-4248. Renaming directories may incorrectly remove the paths in leases
+ under the tree. (daryn via szetszwo)
+
Release 0.23.5 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java
index 8e32805..aa67850 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java
@@ -27,26 +27,38 @@
* HDFS-specific volume identifier which implements {@link VolumeId}. Can be
* used to differentiate between the data directories on a single datanode. This
* identifier is only unique on a per-datanode basis.
+ *
+ * Note that invalid IDs are represented by {@link VolumeId#INVALID_VOLUME_ID}.
*/
@InterfaceStability.Unstable
@InterfaceAudience.Public
public class HdfsVolumeId implements VolumeId {
-
+
private final byte[] id;
- private final boolean isValid;
- public HdfsVolumeId(byte[] id, boolean isValid) {
+ public HdfsVolumeId(byte[] id) {
+ if (id == null) {
+ throw new NullPointerException("A valid Id can only be constructed " +
+ "with a non-null byte array.");
+ }
this.id = id;
- this.isValid = isValid;
}
@Override
- public boolean isValid() {
- return isValid;
+ public final boolean isValid() {
+ return true;
}
@Override
public int compareTo(VolumeId arg0) {
+ if (arg0 == null) {
+ return 1;
+ }
+ if (!arg0.isValid()) {
+ // any valid ID is greater
+ // than any invalid ID:
+ return 1;
+ }
return hashCode() - arg0.hashCode();
}
@@ -63,8 +75,10 @@
if (obj == this) {
return true;
}
-
HdfsVolumeId that = (HdfsVolumeId) obj;
+ // NB: if (!obj.isValid()) { return false; } check is not necessary
+ // because we have class identity checking above, and for this class
+ // isValid() is always true.
return new EqualsBuilder().append(this.id, that.id).isEquals();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java
index f24ed66..b756241 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java
@@ -29,6 +29,48 @@
public interface VolumeId extends Comparable<VolumeId> {
/**
+ * Represents an invalid Volume ID (ID for unknown content).
+ */
+ public static final VolumeId INVALID_VOLUME_ID = new VolumeId() {
+
+ @Override
+ public int compareTo(VolumeId arg0) {
+ // This object is equal only to itself;
+ // It is greater than null, and
+ // is always less than any other VolumeId:
+ if (arg0 == null) {
+ return 1;
+ }
+ if (arg0 == this) {
+ return 0;
+ } else {
+ return -1;
+ }
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // this object is equal only to itself:
+ return (obj == this);
+ }
+
+ @Override
+ public int hashCode() {
+ return Integer.MIN_VALUE;
+ }
+
+ @Override
+ public boolean isValid() {
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return "Invalid VolumeId";
+ }
+ };
+
+ /**
* Indicates if the disk identifier is valid. Invalid identifiers indicate
* that the block was not present, or the location could otherwise not be
* determined.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java
index de74e02..934f8df 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java
@@ -202,7 +202,7 @@
ArrayList<VolumeId> l = new ArrayList<VolumeId>(b.getLocations().length);
// Start off all IDs as invalid, fill it in later with results from RPCs
for (int i = 0; i < b.getLocations().length; i++) {
- l.add(new HdfsVolumeId(null, false));
+ l.add(VolumeId.INVALID_VOLUME_ID);
}
blockVolumeIds.put(b, l);
}
@@ -236,7 +236,7 @@
// Get the VolumeId by indexing into the list of VolumeIds
// provided by the datanode
byte[] volumeId = metaVolumeIds.get(volumeIndex);
- HdfsVolumeId id = new HdfsVolumeId(volumeId, true);
+ HdfsVolumeId id = new HdfsVolumeId(volumeId);
// Find out which index we are in the LocatedBlock's replicas
LocatedBlock locBlock = extBlockToLocBlock.get(extBlock);
DatanodeInfo[] dnInfos = locBlock.getLocations();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 9ea1ec5..994390c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -246,6 +246,8 @@
public static final String DFS_HOSTS = "dfs.hosts";
public static final String DFS_HOSTS_EXCLUDE = "dfs.hosts.exclude";
public static final String DFS_CLIENT_LOCAL_INTERFACES = "dfs.client.local.interfaces";
+ public static final String DFS_NAMENODE_AUDIT_LOGGERS_KEY = "dfs.namenode.audit.loggers";
+ public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default";
// Much code in hdfs is not yet updated to use these keys.
public static final String DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY = "dfs.client.block.write.locateFollowingBlock.retries";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
index 9a8769c..ec8d118 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@@ -46,6 +46,7 @@
import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -1487,9 +1488,14 @@
*/
@Override
public void hflush() throws IOException {
- flushOrSync(false);
+ flushOrSync(false, EnumSet.noneOf(SyncFlag.class));
}
+ @Override
+ public void hsync() throws IOException {
+ hsync(EnumSet.noneOf(SyncFlag.class));
+ }
+
/**
* The expected semantics is all data have flushed out to all replicas
* and all replicas have done posix fsync equivalent - ie the OS has
@@ -1498,17 +1504,35 @@
* Note that only the current block is flushed to the disk device.
* To guarantee durable sync across block boundaries the stream should
* be created with {@link CreateFlag#SYNC_BLOCK}.
+ *
+ * @param syncFlags
+ * Indicate the semantic of the sync. Currently used to specify
+ * whether or not to update the block length in NameNode.
*/
- @Override
- public void hsync() throws IOException {
- flushOrSync(true);
+ public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
+ flushOrSync(true, syncFlags);
}
- private void flushOrSync(boolean isSync) throws IOException {
+ /**
+ * Flush/Sync buffered data to DataNodes.
+ *
+ * @param isSync
+ * Whether or not to require all replicas to flush data to the disk
+ * device
+ * @param syncFlags
+ * Indicate extra detailed semantic of the flush/sync. Currently
+ * mainly used to specify whether or not to update the file length in
+ * the NameNode
+ * @throws IOException
+ */
+ private void flushOrSync(boolean isSync, EnumSet<SyncFlag> syncFlags)
+ throws IOException {
dfsClient.checkOpen();
isClosed();
try {
long toWaitFor;
+ long lastBlockLength = -1L;
+ boolean updateLength = syncFlags.contains(SyncFlag.UPDATE_LENGTH);
synchronized (this) {
/* Record current blockOffset. This might be changed inside
* flushBuffer() where a partial checksum chunk might be flushed.
@@ -1572,13 +1596,20 @@
} // end synchronized
waitForAckedSeqno(toWaitFor);
-
- // If any new blocks were allocated since the last flush,
- // then persist block locations on namenode.
- //
- if (persistBlocks.getAndSet(false)) {
+
+ if (updateLength) {
+ synchronized (this) {
+ if (streamer != null && streamer.block != null) {
+ lastBlockLength = streamer.block.getNumBytes();
+ }
+ }
+ }
+ // If 1) any new blocks were allocated since the last flush, or 2) to
+ // update length in NN is requried, then persist block locations on
+ // namenode.
+ if (persistBlocks.getAndSet(false) || updateLength) {
try {
- dfsClient.namenode.fsync(src, dfsClient.clientName);
+ dfsClient.namenode.fsync(src, dfsClient.clientName, lastBlockLength);
} catch (IOException ioe) {
DFSClient.LOG.warn("Unable to persist blocks in hflush for " + src, ioe);
// If we got an error here, it might be because some other thread called
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java
index 23256e6..adc8764 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.client;
import java.io.IOException;
+import java.util.EnumSet;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -56,4 +57,24 @@
public synchronized int getCurrentBlockReplication() throws IOException {
return ((DFSOutputStream)getWrappedStream()).getCurrentBlockReplication();
}
+
+ /**
+ * Sync buffered data to DataNodes (flush to disk devices).
+ *
+ * @param syncFlags
+ * Indicate the detailed semantic and actions of the hsync.
+ * @throws IOException
+ * @see FSDataOutputStream#hsync()
+ */
+ public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
+ ((DFSOutputStream) getWrappedStream()).hsync(syncFlags);
+ }
+
+ public static enum SyncFlag {
+ /**
+ * When doing sync to DataNodes, also update the metadata (block
+ * length) in the NameNode
+ */
+ UPDATE_LENGTH;
+ }
}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
index ddb58fc..d7ce8e5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
@@ -815,14 +815,15 @@
* The file must be currently open for writing.
* @param src The string representation of the path
* @param client The string representation of the client
- *
+ * @param lastBlockLength The length of the last block (under construction)
+ * to be reported to NameNode
* @throws AccessControlException permission denied
* @throws FileNotFoundException file <code>src</code> is not found
* @throws UnresolvedLinkException if <code>src</code> contains a symlink.
* @throws IOException If an I/O error occurred
*/
@Idempotent
- public void fsync(String src, String client)
+ public void fsync(String src, String client, long lastBlockLength)
throws AccessControlException, FileNotFoundException,
UnresolvedLinkException, IOException;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
index 01d72e5..668fa00 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
@@ -688,7 +688,7 @@
public FsyncResponseProto fsync(RpcController controller,
FsyncRequestProto req) throws ServiceException {
try {
- server.fsync(req.getSrc(), req.getClient());
+ server.fsync(req.getSrc(), req.getClient(), req.getLastBlockLength());
return VOID_FSYNC_RESPONSE;
} catch (IOException e) {
throw new ServiceException(e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
index 78250b7..bcec403 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
@@ -659,12 +659,11 @@
}
@Override
- public void fsync(String src, String client) throws AccessControlException,
- FileNotFoundException, UnresolvedLinkException, IOException {
- FsyncRequestProto req = FsyncRequestProto.newBuilder()
- .setSrc(src)
- .setClient(client)
- .build();
+ public void fsync(String src, String client, long lastBlockLength)
+ throws AccessControlException, FileNotFoundException,
+ UnresolvedLinkException, IOException {
+ FsyncRequestProto req = FsyncRequestProto.newBuilder().setSrc(src)
+ .setClient(client).setLastBlockLength(lastBlockLength).build();
try {
rpcProxy.fsync(null, req);
} catch (ServiceException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
index 0603d15..e7833d1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -26,6 +26,7 @@
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
@@ -1232,9 +1233,9 @@
if (s == null) return null;
switch (s.getState()) {
case ACTIVE:
- return new NNHAStatusHeartbeat(NNHAStatusHeartbeat.State.ACTIVE, s.getTxid());
+ return new NNHAStatusHeartbeat(HAServiceState.ACTIVE, s.getTxid());
case STANDBY:
- return new NNHAStatusHeartbeat(NNHAStatusHeartbeat.State.STANDBY, s.getTxid());
+ return new NNHAStatusHeartbeat(HAServiceState.STANDBY, s.getTxid());
default:
throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" + s.getState());
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
index 577d73b..473f259 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
@@ -75,6 +75,7 @@
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.net.Node;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
@@ -168,7 +169,7 @@
* <ol>
* <li>The cluster is balanced. Exiting
* <li>No block can be moved. Exiting...
- * <li>No block has been moved for 3 iterations. Exiting...
+ * <li>No block has been moved for 5 iterations. Exiting...
* <li>Received an IO exception: failure reason. Exiting...
* <li>Another balancer is running. Exiting...
* </ol>
@@ -222,7 +223,7 @@
private Map<String, BalancerDatanode> datanodes
= new HashMap<String, BalancerDatanode>();
- private NetworkTopology cluster = new NetworkTopology();
+ private NetworkTopology cluster;
final static private int MOVER_THREAD_POOL_SIZE = 1000;
final private ExecutorService moverExecutor =
@@ -249,7 +250,7 @@
* Return true if a block and its proxy are chosen; false otherwise
*/
private boolean chooseBlockAndProxy() {
- // iterate all source's blocks until find a good one
+ // iterate all source's blocks until find a good one
for (Iterator<BalancerBlock> blocks=
source.getBlockIterator(); blocks.hasNext();) {
if (markMovedIfGoodBlock(blocks.next())) {
@@ -293,22 +294,35 @@
* @return true if a proxy is found; otherwise false
*/
private boolean chooseProxySource() {
- // check if there is replica which is on the same rack with the target
+ final DatanodeInfo targetDN = target.getDatanode();
+ boolean find = false;
for (BalancerDatanode loc : block.getLocations()) {
- if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
- if (loc.addPendingBlock(this)) {
- proxySource = loc;
+ // check if there is replica which is on the same rack with the target
+ if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
+ find = true;
+ // if cluster is not nodegroup aware or the proxy is on the same
+ // nodegroup with target, then we already find the nearest proxy
+ if (!cluster.isNodeGroupAware()
+ || cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)) {
return true;
}
}
- }
- // find out a non-busy replica
- for (BalancerDatanode loc : block.getLocations()) {
- if (loc.addPendingBlock(this)) {
- proxySource = loc;
- return true;
+
+ if (!find) {
+ // find out a non-busy replica out of rack of target
+ find = addTo(loc);
}
}
+
+ return find;
+ }
+
+ // add a BalancerDatanode as proxy source for specific block movement
+ private boolean addTo(BalancerDatanode bdn) {
+ if (bdn.addPendingBlock(this)) {
+ proxySource = bdn;
+ return true;
+ }
return false;
}
@@ -544,7 +558,7 @@
}
/** Decide if still need to move more bytes */
- protected boolean isMoveQuotaFull() {
+ protected boolean hasSpaceForScheduling() {
return scheduledSize<maxSize2Move;
}
@@ -686,7 +700,7 @@
NodeTask task = tasks.next();
BalancerDatanode target = task.getDatanode();
PendingBlockMove pendingBlock = new PendingBlockMove();
- if ( target.addPendingBlock(pendingBlock) ) {
+ if (target.addPendingBlock(pendingBlock)) {
// target is not busy, so do a tentative block allocation
pendingBlock.source = this;
pendingBlock.target = target;
@@ -787,9 +801,10 @@
*/
private static void checkReplicationPolicyCompatibility(Configuration conf
) throws UnsupportedActionException {
- if (BlockPlacementPolicy.getInstance(conf, null, null).getClass() !=
- BlockPlacementPolicyDefault.class) {
- throw new UnsupportedActionException("Balancer without BlockPlacementPolicyDefault");
+ if (BlockPlacementPolicy.getInstance(conf, null, null) instanceof
+ BlockPlacementPolicyDefault) {
+ throw new UnsupportedActionException(
+ "Balancer without BlockPlacementPolicyDefault");
}
}
@@ -804,6 +819,7 @@
this.threshold = p.threshold;
this.policy = p.policy;
this.nnc = theblockpool;
+ cluster = NetworkTopology.getInstance(conf);
}
/* Shuffle datanode array */
@@ -907,17 +923,53 @@
LOG.info(nodes.size() + " " + name + ": " + nodes);
}
- /* Decide all <source, target> pairs and
+ /** A matcher interface for matching nodes. */
+ private interface Matcher {
+ /** Given the cluster topology, does the left node match the right node? */
+ boolean match(NetworkTopology cluster, Node left, Node right);
+ }
+
+ /** Match datanodes in the same node group. */
+ static final Matcher SAME_NODE_GROUP = new Matcher() {
+ @Override
+ public boolean match(NetworkTopology cluster, Node left, Node right) {
+ return cluster.isOnSameNodeGroup(left, right);
+ }
+ };
+
+ /** Match datanodes in the same rack. */
+ static final Matcher SAME_RACK = new Matcher() {
+ @Override
+ public boolean match(NetworkTopology cluster, Node left, Node right) {
+ return cluster.isOnSameRack(left, right);
+ }
+ };
+
+ /** Match any datanode with any other datanode. */
+ static final Matcher ANY_OTHER = new Matcher() {
+ @Override
+ public boolean match(NetworkTopology cluster, Node left, Node right) {
+ return left != right;
+ }
+ };
+
+ /**
+ * Decide all <source, target> pairs and
* the number of bytes to move from a source to a target
* Maximum bytes to be moved per node is
* Min(1 Band worth of bytes, MAX_SIZE_TO_MOVE).
* Return total number of bytes to move in this iteration
*/
private long chooseNodes() {
- // Match nodes on the same rack first
- chooseNodes(true);
- // Then match nodes on different racks
- chooseNodes(false);
+ // First, match nodes on the same node group if cluster is node group aware
+ if (cluster.isNodeGroupAware()) {
+ chooseNodes(SAME_NODE_GROUP);
+ }
+
+ // Then, match nodes on the same rack
+ chooseNodes(SAME_RACK);
+ // At last, match all remaining nodes
+ chooseNodes(ANY_OTHER);
assert (datanodes.size() >= sources.size()+targets.size())
: "Mismatched number of datanodes (" +
@@ -932,162 +984,94 @@
return bytesToMove;
}
- /* if onRack is true, decide all <source, target> pairs
- * where source and target are on the same rack; Otherwise
- * decide all <source, target> pairs where source and target are
- * on different racks
- */
- private void chooseNodes(boolean onRack) {
+ /** Decide all <source, target> pairs according to the matcher. */
+ private void chooseNodes(final Matcher matcher) {
/* first step: match each overUtilized datanode (source) to
* one or more underUtilized datanodes (targets).
*/
- chooseTargets(underUtilizedDatanodes.iterator(), onRack);
+ chooseDatanodes(overUtilizedDatanodes, underUtilizedDatanodes, matcher);
/* match each remaining overutilized datanode (source) to
* below average utilized datanodes (targets).
* Note only overutilized datanodes that haven't had that max bytes to move
* satisfied in step 1 are selected
*/
- chooseTargets(belowAvgUtilizedDatanodes.iterator(), onRack);
+ chooseDatanodes(overUtilizedDatanodes, belowAvgUtilizedDatanodes, matcher);
- /* match each remaining underutilized datanode to
- * above average utilized datanodes.
+ /* match each remaining underutilized datanode (target) to
+ * above average utilized datanodes (source).
* Note only underutilized datanodes that have not had that max bytes to
* move satisfied in step 1 are selected.
*/
- chooseSources(aboveAvgUtilizedDatanodes.iterator(), onRack);
- }
-
- /* choose targets from the target candidate list for each over utilized
- * source datanode. OnRackTarget determines if the chosen target
- * should be on the same rack as the source
- */
- private void chooseTargets(
- Iterator<BalancerDatanode> targetCandidates, boolean onRackTarget ) {
- for (Iterator<Source> srcIterator = overUtilizedDatanodes.iterator();
- srcIterator.hasNext();) {
- Source source = srcIterator.next();
- while (chooseTarget(source, targetCandidates, onRackTarget)) {
- }
- if (!source.isMoveQuotaFull()) {
- srcIterator.remove();
- }
- }
- return;
- }
-
- /* choose sources from the source candidate list for each under utilized
- * target datanode. onRackSource determines if the chosen source
- * should be on the same rack as the target
- */
- private void chooseSources(
- Iterator<Source> sourceCandidates, boolean onRackSource) {
- for (Iterator<BalancerDatanode> targetIterator =
- underUtilizedDatanodes.iterator(); targetIterator.hasNext();) {
- BalancerDatanode target = targetIterator.next();
- while (chooseSource(target, sourceCandidates, onRackSource)) {
- }
- if (!target.isMoveQuotaFull()) {
- targetIterator.remove();
- }
- }
- return;
+ chooseDatanodes(underUtilizedDatanodes, aboveAvgUtilizedDatanodes, matcher);
}
- /* For the given source, choose targets from the target candidate list.
- * OnRackTarget determines if the chosen target
- * should be on the same rack as the source
+ /**
+ * For each datanode, choose matching nodes from the candidates. Either the
+ * datanodes or the candidates are source nodes with (utilization > Avg), and
+ * the others are target nodes with (utilization < Avg).
*/
- private boolean chooseTarget(Source source,
- Iterator<BalancerDatanode> targetCandidates, boolean onRackTarget) {
- if (!source.isMoveQuotaFull()) {
+ private <D extends BalancerDatanode, C extends BalancerDatanode> void
+ chooseDatanodes(Collection<D> datanodes, Collection<C> candidates,
+ Matcher matcher) {
+ for (Iterator<D> i = datanodes.iterator(); i.hasNext();) {
+ final D datanode = i.next();
+ for(; chooseForOneDatanode(datanode, candidates, matcher); );
+ if (!datanode.hasSpaceForScheduling()) {
+ i.remove();
+ }
+ }
+ }
+
+ /**
+ * For the given datanode, choose a candidate and then schedule it.
+ * @return true if a candidate is chosen; false if no candidates is chosen.
+ */
+ private <C extends BalancerDatanode> boolean chooseForOneDatanode(
+ BalancerDatanode dn, Collection<C> candidates, Matcher matcher) {
+ final Iterator<C> i = candidates.iterator();
+ final C chosen = chooseCandidate(dn, i, matcher);
+
+ if (chosen == null) {
return false;
}
- boolean foundTarget = false;
- BalancerDatanode target = null;
- while (!foundTarget && targetCandidates.hasNext()) {
- target = targetCandidates.next();
- if (!target.isMoveQuotaFull()) {
- targetCandidates.remove();
- continue;
- }
- if (onRackTarget) {
- // choose from on-rack nodes
- if (cluster.isOnSameRack(source.datanode, target.datanode)) {
- foundTarget = true;
- }
- } else {
- // choose from off-rack nodes
- if (!cluster.isOnSameRack(source.datanode, target.datanode)) {
- foundTarget = true;
- }
- }
+ if (dn instanceof Source) {
+ matchSourceWithTargetToMove((Source)dn, chosen);
+ } else {
+ matchSourceWithTargetToMove((Source)chosen, dn);
}
- if (foundTarget) {
- assert(target != null):"Choose a null target";
- long size = Math.min(source.availableSizeToMove(),
- target.availableSizeToMove());
- NodeTask nodeTask = new NodeTask(target, size);
- source.addNodeTask(nodeTask);
- target.incScheduledSize(nodeTask.getSize());
- sources.add(source);
- targets.add(target);
- if (!target.isMoveQuotaFull()) {
- targetCandidates.remove();
- }
- LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
- +source.datanode + " to " + target.datanode);
- return true;
+ if (!chosen.hasSpaceForScheduling()) {
+ i.remove();
}
- return false;
+ return true;
}
- /* For the given target, choose sources from the source candidate list.
- * OnRackSource determines if the chosen source
- * should be on the same rack as the target
- */
- private boolean chooseSource(BalancerDatanode target,
- Iterator<Source> sourceCandidates, boolean onRackSource) {
- if (!target.isMoveQuotaFull()) {
- return false;
- }
- boolean foundSource = false;
- Source source = null;
- while (!foundSource && sourceCandidates.hasNext()) {
- source = sourceCandidates.next();
- if (!source.isMoveQuotaFull()) {
- sourceCandidates.remove();
- continue;
- }
- if (onRackSource) {
- // choose from on-rack nodes
- if ( cluster.isOnSameRack(source.getDatanode(), target.getDatanode())) {
- foundSource = true;
- }
- } else {
- // choose from off-rack nodes
- if (!cluster.isOnSameRack(source.datanode, target.datanode)) {
- foundSource = true;
+ private void matchSourceWithTargetToMove(
+ Source source, BalancerDatanode target) {
+ long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
+ NodeTask nodeTask = new NodeTask(target, size);
+ source.addNodeTask(nodeTask);
+ target.incScheduledSize(nodeTask.getSize());
+ sources.add(source);
+ targets.add(target);
+ LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
+ +source.datanode.getName() + " to " + target.datanode.getName());
+ }
+
+ /** Choose a candidate for the given datanode. */
+ private <D extends BalancerDatanode, C extends BalancerDatanode>
+ C chooseCandidate(D dn, Iterator<C> candidates, Matcher matcher) {
+ if (dn.hasSpaceForScheduling()) {
+ for(; candidates.hasNext(); ) {
+ final C c = candidates.next();
+ if (!c.hasSpaceForScheduling()) {
+ candidates.remove();
+ } else if (matcher.match(cluster, dn.getDatanode(), c.getDatanode())) {
+ return c;
}
}
}
- if (foundSource) {
- assert(source != null):"Choose a null source";
- long size = Math.min(source.availableSizeToMove(),
- target.availableSizeToMove());
- NodeTask nodeTask = new NodeTask(target, size);
- source.addNodeTask(nodeTask);
- target.incScheduledSize(nodeTask.getSize());
- sources.add(source);
- targets.add(target);
- if ( !source.isMoveQuotaFull()) {
- sourceCandidates.remove();
- }
- LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
- +source.datanode + " to " + target.datanode);
- return true;
- }
- return false;
+ return null;
}
private static class BytesMoved {
@@ -1226,6 +1210,10 @@
if (block.isLocatedOnDatanode(target)) {
return false;
}
+ if (cluster.isNodeGroupAware() &&
+ isOnSameNodeGroupWithReplicas(target, block, source)) {
+ return false;
+ }
boolean goodBlock = false;
if (cluster.isOnSameRack(source.getDatanode(), target.getDatanode())) {
@@ -1257,10 +1245,32 @@
}
return goodBlock;
}
-
+
+ /**
+ * Check if there are any replica (other than source) on the same node group
+ * with target. If true, then target is not a good candidate for placing
+ * specific block replica as we don't want 2 replicas under the same nodegroup
+ * after balance.
+ * @param target targetDataNode
+ * @param block dataBlock
+ * @param source sourceDataNode
+ * @return true if there are any replica (other than source) on the same node
+ * group with target
+ */
+ private boolean isOnSameNodeGroupWithReplicas(BalancerDatanode target,
+ BalancerBlock block, Source source) {
+ for (BalancerDatanode loc : block.locations) {
+ if (loc != source &&
+ cluster.isOnSameNodeGroup(loc.getDatanode(), target.getDatanode())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
/* reset all fields in a balancer preparing for the next iteration */
- private void resetData() {
- this.cluster = new NetworkTopology();
+ private void resetData(Configuration conf) {
+ this.cluster = NetworkTopology.getInstance(conf);
this.overUtilizedDatanodes.clear();
this.aboveAvgUtilizedDatanodes.clear();
this.belowAvgUtilizedDatanodes.clear();
@@ -1331,7 +1341,8 @@
}
/** Run an iteration for all datanodes. */
- private ReturnStatus run(int iteration, Formatter formatter) {
+ private ReturnStatus run(int iteration, Formatter formatter,
+ Configuration conf) {
try {
/* get all live datanodes of a cluster and their disk usage
* decide the number of bytes need to be moved
@@ -1385,7 +1396,7 @@
}
// clean all lists
- resetData();
+ resetData(conf);
return ReturnStatus.IN_PROGRESS;
} catch (IllegalArgumentException e) {
System.out.println(e + ". Exiting ...");
@@ -1433,7 +1444,7 @@
Collections.shuffle(connectors);
for(NameNodeConnector nnc : connectors) {
final Balancer b = new Balancer(nnc, p, conf);
- final ReturnStatus r = b.run(iteration, formatter);
+ final ReturnStatus r = b.run(iteration, formatter, conf);
if (r == ReturnStatus.IN_PROGRESS) {
done = false;
} else if (r != ReturnStatus.SUCCESS) {
@@ -1527,7 +1538,7 @@
if (args != null) {
try {
for(int i = 0; i < args.length; i++) {
- checkArgument(args.length >= 2, "args = " + Arrays.toString(args));
+ checkArgument(args.length >= 2, "args = " + Arrays.toString(args));
if ("-threshold".equalsIgnoreCase(args[i])) {
i++;
try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
index f976c99..8383dc2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -152,8 +152,9 @@
List<DatanodeDescriptor> results =
new ArrayList<DatanodeDescriptor>(chosenNodes);
- for (Node node:chosenNodes) {
- excludedNodes.put(node, node);
+ for (DatanodeDescriptor node:chosenNodes) {
+ // add localMachine and related nodes to excludedNodes
+ addToExcludedNodes(node, excludedNodes);
adjustExcludedNodes(excludedNodes, node);
}
@@ -235,7 +236,7 @@
+ totalReplicasExpected + "\n"
+ e.getMessage());
if (avoidStaleNodes) {
- // ecxludedNodes now has - initial excludedNodes, any nodes that were
+ // excludedNodes now has - initial excludedNodes, any nodes that were
// chosen and nodes that were tried but were not chosen because they
// were stale, decommissioned or for any other reason a node is not
// chosen for write. Retry again now not avoiding stale node
@@ -273,6 +274,8 @@
if (isGoodTarget(localMachine, blocksize, maxNodesPerRack, false,
results, avoidStaleNodes)) {
results.add(localMachine);
+ // add localMachine and related nodes to excludedNode
+ addToExcludedNodes(localMachine, excludedNodes);
return localMachine;
}
}
@@ -281,7 +284,19 @@
return chooseLocalRack(localMachine, excludedNodes, blocksize,
maxNodesPerRack, results, avoidStaleNodes);
}
-
+
+ /**
+ * Add <i>localMachine</i> and related nodes to <i>excludedNodes</i>
+ * for next replica choosing. In sub class, we can add more nodes within
+ * the same failure domain of localMachine
+ * @return number of new excluded nodes
+ */
+ protected int addToExcludedNodes(DatanodeDescriptor localMachine,
+ HashMap<Node, Node> excludedNodes) {
+ Node node = excludedNodes.put(localMachine, localMachine);
+ return node == null?1:0;
+ }
+
/* choose one node from the rack that <i>localMachine</i> is on.
* if no such node is available, choose one node from the rack where
* a second replica is on.
@@ -392,6 +407,8 @@
if (isGoodTarget(chosenNode, blocksize,
maxNodesPerRack, results, avoidStaleNodes)) {
results.add(chosenNode);
+ // add chosenNode and related nodes to excludedNode
+ addToExcludedNodes(chosenNode, excludedNodes);
adjustExcludedNodes(excludedNodes, chosenNode);
return chosenNode;
} else {
@@ -441,6 +458,9 @@
maxNodesPerRack, results, avoidStaleNodes)) {
numOfReplicas--;
results.add(chosenNode);
+ // add chosenNode and related nodes to excludedNode
+ int newExcludedNodes = addToExcludedNodes(chosenNode, excludedNodes);
+ numOfAvailableNodes -= newExcludedNodes;
adjustExcludedNodes(excludedNodes, chosenNode);
} else {
badTarget = true;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java
index c575fa8..643d2b4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java
@@ -240,6 +240,27 @@
String nodeGroupString = cur.getNetworkLocation();
return NetworkTopology.getFirstHalf(nodeGroupString);
}
+
+ /**
+ * Find other nodes in the same nodegroup of <i>localMachine</i> and add them
+ * into <i>excludeNodes</i> as replica should not be duplicated for nodes
+ * within the same nodegroup
+ * @return number of new excluded nodes
+ */
+ protected int addToExcludedNodes(DatanodeDescriptor localMachine,
+ HashMap<Node, Node> excludedNodes) {
+ int countOfExcludedNodes = 0;
+ String nodeGroupScope = localMachine.getNetworkLocation();
+ List<Node> leafNodes = clusterMap.getLeaves(nodeGroupScope);
+ for (Node leafNode : leafNodes) {
+ Node node = excludedNodes.put(leafNode, leafNode);
+ if (node == null) {
+ // not a existing node in excludedNodes
+ countOfExcludedNodes++;
+ }
+ }
+ return countOfExcludedNodes;
+ }
/**
* Pick up replica node set for deleting replica as over-replicated.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 804fdf2..c8544f7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -39,7 +39,6 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -151,10 +150,7 @@
this.namesystem = namesystem;
this.blockManager = blockManager;
- Class<? extends NetworkTopology> networkTopologyClass =
- conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
- NetworkTopology.class, NetworkTopology.class);
- networktopology = ReflectionUtils.newInstance(networkTopologyClass, conf);
+ networktopology = NetworkTopology.getInstance(conf);
this.heartbeatManager = new HeartbeatManager(namesystem, blockManager, conf);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index acd8e9c..c170cf9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -26,6 +26,7 @@
import org.apache.commons.logging.Log;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -411,7 +412,7 @@
final long txid = nnHaState.getTxId();
final boolean nnClaimsActive =
- nnHaState.getState() == NNHAStatusHeartbeat.State.ACTIVE;
+ nnHaState.getState() == HAServiceState.ACTIVE;
final boolean bposThinksActive = bpServiceToActive == actor;
final boolean isMoreRecentClaim = txid > lastActiveClaimTxId;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java
new file mode 100644
index 0000000..614eb63
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.net.InetAddress;
+import java.security.Principal;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Interface defining an audit logger.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public interface AuditLogger {
+
+ /**
+ * Called during initialization of the logger.
+ *
+ * @param conf The configuration object.
+ */
+ void initialize(Configuration conf);
+
+ /**
+ * Called to log an audit event.
+ * <p>
+ * This method must return as quickly as possible, since it's called
+ * in a critical section of the NameNode's operation.
+ *
+ * @param succeeded Whether authorization succeeded.
+ * @param userName Name of the user executing the request.
+ * @param addr Remote address of the request.
+ * @param cmd The requested command.
+ * @param src Path of affected source file.
+ * @param dst Path of affected destination file (if any).
+ * @param stat File information for operations that change the file's
+ * metadata (permissions, owner, times, etc).
+ */
+ void logAuditEvent(boolean succeeded, String userName,
+ InetAddress addr, String cmd, String src, String dst,
+ FileStatus stat);
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index dd273a2..077a953 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -24,6 +24,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.NameNodeProxies;
@@ -35,6 +36,7 @@
import org.apache.hadoop.hdfs.protocolPB.JournalProtocolServerSideTranslatorPB;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
import org.apache.hadoop.hdfs.server.protocol.FenceResponse;
import org.apache.hadoop.hdfs.server.protocol.JournalInfo;
import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
@@ -414,14 +416,23 @@
+ HdfsConstants.LAYOUT_VERSION + " actual "+ nsInfo.getLayoutVersion();
return nsInfo;
}
-
+
@Override
+ protected String getNameServiceId(Configuration conf) {
+ return DFSUtil.getBackupNameServiceId(conf);
+ }
+
+ protected HAState createHAState() {
+ return new BackupState();
+ }
+
+ @Override // NameNode
protected NameNodeHAContext createHAContext() {
return new BNHAContext();
}
-
+
private class BNHAContext extends NameNodeHAContext {
- @Override // NameNode
+ @Override // NameNodeHAContext
public void checkOperation(OperationCategory op)
throws StandbyException {
if (op == OperationCategory.UNCHECKED ||
@@ -435,10 +446,42 @@
throw new StandbyException(msg);
}
}
- }
-
- @Override
- protected String getNameServiceId(Configuration conf) {
- return DFSUtil.getBackupNameServiceId(conf);
+
+ @Override // NameNodeHAContext
+ public void prepareToStopStandbyServices() throws ServiceFailedException {
+ }
+
+ /**
+ * Start services for BackupNode.
+ * <p>
+ * The following services should be muted
+ * (not run or not pass any control commands to DataNodes)
+ * on BackupNode:
+ * {@link LeaseManager.Monitor} protected by SafeMode.
+ * {@link BlockManager.ReplicationMonitor} protected by SafeMode.
+ * {@link HeartbeatManager.Monitor} protected by SafeMode.
+ * {@link DecommissionManager.Monitor} need to prohibit refreshNodes().
+ * {@link PendingReplicationBlocks.PendingReplicationMonitor} harmless,
+ * because ReplicationMonitor is muted.
+ */
+ @Override
+ public void startActiveServices() throws IOException {
+ try {
+ namesystem.startActiveServices();
+ } catch (Throwable t) {
+ doImmediateShutdown(t);
+ }
+ }
+
+ @Override
+ public void stopActiveServices() throws IOException {
+ try {
+ if (namesystem != null) {
+ namesystem.stopActiveServices();
+ }
+ } catch (Throwable t) {
+ doImmediateShutdown(t);
+ }
+ }
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java
new file mode 100644
index 0000000..ce11fc9e6
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java
@@ -0,0 +1,53 @@
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
+import org.apache.hadoop.ipc.StandbyException;
+
+@InterfaceAudience.Private
+public class BackupState extends HAState {
+
+ public BackupState() {
+ super(HAServiceState.STANDBY);
+ }
+
+ @Override // HAState
+ public void checkOperation(HAContext context, OperationCategory op)
+ throws StandbyException {
+ context.checkOperation(op);
+ }
+
+ @Override // HAState
+ public boolean shouldPopulateReplQueues() {
+ return false;
+ }
+
+ @Override // HAState
+ public void enterState(HAContext context) throws ServiceFailedException {
+ try {
+ context.startActiveServices();
+ } catch (IOException e) {
+ throw new ServiceFailedException("Failed to start backup services", e);
+ }
+ }
+
+ @Override // HAState
+ public void exitState(HAContext context) throws ServiceFailedException {
+ try {
+ context.stopActiveServices();
+ } catch (IOException e) {
+ throw new ServiceFailedException("Failed to stop backup services", e);
+ }
+ }
+
+ @Override // HAState
+ public void prepareToExitState(HAContext context) throws ServiceFailedException {
+ context.prepareToStopStandbyServices();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index b19963d..05d3b22 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -575,6 +575,8 @@
// update modification time of dst and the parent of src
srcInodes[srcInodes.length-2].setModificationTime(timestamp);
dstInodes[dstInodes.length-2].setModificationTime(timestamp);
+ // update moved leases with new filename
+ getFSNamesystem().unprotectedChangeLease(src, dst);
return true;
}
} finally {
@@ -729,6 +731,8 @@
}
srcInodes[srcInodes.length - 2].setModificationTime(timestamp);
dstInodes[dstInodes.length - 2].setModificationTime(timestamp);
+ // update moved lease with new filename
+ getFSNamesystem().unprotectedChangeLease(src, dst);
// Collect the blocks and remove the lease for previous dst
int filesDeleted = 0;
@@ -1071,31 +1075,39 @@
throws IOException, UnresolvedLinkException {
writeLock();
try {
- //
- // Remove the node from the namespace
- //
- if (!oldnode.removeNode()) {
- NameNode.stateChangeLog.warn("DIR* FSDirectory.replaceNode: " +
- "failed to remove " + path);
- throw new IOException("FSDirectory.replaceNode: " +
- "failed to remove " + path);
- }
-
- /* Currently oldnode and newnode are assumed to contain the same
- * blocks. Otherwise, blocks need to be removed from the blocksMap.
- */
- rootDir.addINode(path, newnode);
-
- int index = 0;
- for (BlockInfo b : newnode.getBlocks()) {
- BlockInfo info = getBlockManager().addBlockCollection(b, newnode);
- newnode.setBlock(index, info); // inode refers to the block in BlocksMap
- index++;
- }
+ unprotectedReplaceNode(path, oldnode, newnode);
} finally {
writeUnlock();
}
}
+
+ void unprotectedReplaceNode(String path, INodeFile oldnode, INodeFile newnode)
+ throws IOException, UnresolvedLinkException {
+ assert hasWriteLock();
+ INodeDirectory parent = oldnode.parent;
+ // Remove the node from the namespace
+ if (!oldnode.removeNode()) {
+ NameNode.stateChangeLog.warn("DIR* FSDirectory.replaceNode: " +
+ "failed to remove " + path);
+ throw new IOException("FSDirectory.replaceNode: " +
+ "failed to remove " + path);
+ }
+
+ // Parent should be non-null, otherwise oldnode.removeNode() will return
+ // false
+ newnode.setLocalName(oldnode.getLocalNameBytes());
+ parent.addChild(newnode, true);
+
+ /* Currently oldnode and newnode are assumed to contain the same
+ * blocks. Otherwise, blocks need to be removed from the blocksMap.
+ */
+ int index = 0;
+ for (BlockInfo b : newnode.getBlocks()) {
+ BlockInfo info = getBlockManager().addBlockCollection(b, newnode);
+ newnode.setBlock(index, info); // inode refers to the block in BlocksMap
+ index++;
+ }
+ }
/**
* Get a partial listing of the indicated directory
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 129ae15..8c3fb70 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -878,6 +878,11 @@
return journalSet;
}
+ @VisibleForTesting
+ synchronized void setJournalSetForTesting(JournalSet js) {
+ this.journalSet = js;
+ }
+
/**
* Used only by tests.
*/
@@ -1031,9 +1036,18 @@
/**
* Archive any log files that are older than the given txid.
+ *
+ * If the edit log is not open for write, then this call returns with no
+ * effect.
*/
@Override
public synchronized void purgeLogsOlderThan(final long minTxIdToKeep) {
+ // Should not purge logs unless they are open for write.
+ // This prevents the SBN from purging logs on shared storage, for example.
+ if (!isOpenForWrite()) {
+ return;
+ }
+
assert curSegmentTxId == HdfsConstants.INVALID_TXID || // on format this is no-op
minTxIdToKeep <= curSegmentTxId :
"cannot purge logs older than txid " + minTxIdToKeep +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 1916348..5b5d761 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -31,7 +31,6 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
@@ -322,7 +321,7 @@
INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
INodeFile newFile = ucFile.convertToInodeFile();
- fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
+ fsDir.unprotectedReplaceNode(addCloseOp.path, ucFile, newFile);
}
break;
}
@@ -360,10 +359,8 @@
}
case OP_RENAME_OLD: {
RenameOldOp renameOp = (RenameOldOp)op;
- HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
renameOp.timestamp);
- fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
break;
}
case OP_DELETE: {
@@ -433,11 +430,8 @@
}
case OP_RENAME: {
RenameOp renameOp = (RenameOp)op;
-
- HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
renameOp.timestamp, renameOp.options);
- fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
break;
}
case OP_GET_DELEGATION_TOKEN: {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 5fe7bc0..6426003 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -34,6 +34,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
@@ -111,6 +113,7 @@
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.Options;
@@ -121,6 +124,7 @@
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HAUtil;
@@ -163,12 +167,10 @@
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.INodesInPath;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
-import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
-import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
@@ -246,32 +248,32 @@
}
};
- private static final void logAuditEvent(UserGroupInformation ugi,
+ private boolean isAuditEnabled() {
+ return !isDefaultAuditLogger || auditLog.isInfoEnabled();
+ }
+
+ private void logAuditEvent(UserGroupInformation ugi,
InetAddress addr, String cmd, String src, String dst,
HdfsFileStatus stat) {
logAuditEvent(true, ugi, addr, cmd, src, dst, stat);
}
- private static final void logAuditEvent(boolean succeeded,
+ private void logAuditEvent(boolean succeeded,
UserGroupInformation ugi, InetAddress addr, String cmd, String src,
String dst, HdfsFileStatus stat) {
- final StringBuilder sb = auditBuffer.get();
- sb.setLength(0);
- sb.append("allowed=").append(succeeded).append("\t");
- sb.append("ugi=").append(ugi).append("\t");
- sb.append("ip=").append(addr).append("\t");
- sb.append("cmd=").append(cmd).append("\t");
- sb.append("src=").append(src).append("\t");
- sb.append("dst=").append(dst).append("\t");
- if (null == stat) {
- sb.append("perm=null");
- } else {
- sb.append("perm=");
- sb.append(stat.getOwner()).append(":");
- sb.append(stat.getGroup()).append(":");
- sb.append(stat.getPermission());
+ FileStatus status = null;
+ if (stat != null) {
+ Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
+ Path path = dst != null ? new Path(dst) : new Path(src);
+ status = new FileStatus(stat.getLen(), stat.isDir(),
+ stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
+ stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
+ stat.getGroup(), symlink, path);
}
- auditLog.info(sb);
+ for (AuditLogger logger : auditLoggers) {
+ logger.logAuditEvent(succeeded, ugi.toString(), addr,
+ cmd, src, dst, status);
+ }
}
/**
@@ -304,6 +306,11 @@
final DelegationTokenSecretManager dtSecretManager;
private final boolean alwaysUseDelegationTokensForTests;
+ // Tracks whether the default audit logger is the only configured audit
+ // logger; this allows isAuditEnabled() to return false in case the
+ // underlying logger is disabled, and avoid some unnecessary work.
+ private final boolean isDefaultAuditLogger;
+ private final List<AuditLogger> auditLoggers;
/** The namespace tree. */
FSDirectory dir;
@@ -536,14 +543,50 @@
this.dtSecretManager = createDelegationTokenSecretManager(conf);
this.dir = new FSDirectory(fsImage, this, conf);
this.safeMode = new SafeModeInfo(conf);
-
+ this.auditLoggers = initAuditLoggers(conf);
+ this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
+ auditLoggers.get(0) instanceof DefaultAuditLogger;
} catch(IOException e) {
LOG.error(getClass().getSimpleName() + " initialization failed.", e);
close();
throw e;
+ } catch (RuntimeException re) {
+ LOG.error(getClass().getSimpleName() + " initialization failed.", re);
+ close();
+ throw re;
}
}
+ private List<AuditLogger> initAuditLoggers(Configuration conf) {
+ // Initialize the custom access loggers if configured.
+ Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
+ List<AuditLogger> auditLoggers = Lists.newArrayList();
+ if (alClasses != null && !alClasses.isEmpty()) {
+ for (String className : alClasses) {
+ try {
+ AuditLogger logger;
+ if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
+ logger = new DefaultAuditLogger();
+ } else {
+ logger = (AuditLogger) Class.forName(className).newInstance();
+ }
+ logger.initialize(conf);
+ auditLoggers.add(logger);
+ } catch (RuntimeException re) {
+ throw re;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ // Make sure there is at least one logger installed.
+ if (auditLoggers.isEmpty()) {
+ auditLoggers.add(new DefaultAuditLogger());
+ }
+ return auditLoggers;
+ }
+
void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled)
throws IOException {
// format before starting up if requested
@@ -1003,8 +1046,8 @@
// start in active.
return haEnabled;
}
-
- return haContext.getState() instanceof StandbyState;
+
+ return HAServiceState.STANDBY == haContext.getState().getServiceState();
}
/**
@@ -1030,7 +1073,8 @@
long totalInodes = this.dir.totalInodes();
long totalBlocks = this.getBlocksTotal();
out.println(totalInodes + " files and directories, " + totalBlocks
- + " blocks = " + (totalInodes + totalBlocks) + " total");
+ + " blocks = " + (totalInodes + totalBlocks)
+ + " total filesystem objects");
blockManager.metaSave(out);
}
@@ -1076,7 +1120,7 @@
try {
setPermissionInt(src, permission);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setPermission", src, null, null);
@@ -1098,14 +1142,14 @@
}
checkOwner(src);
dir.setPermission(src, permission);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
resultingStat = dir.getFileInfo(src, false);
}
} finally {
writeUnlock();
}
getEditLog().logSync();
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setPermission", src, null, resultingStat);
@@ -1122,7 +1166,7 @@
try {
setOwnerInt(src, username, group);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setOwner", src, null, null);
@@ -1153,14 +1197,14 @@
}
}
dir.setOwner(src, username, group);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
resultingStat = dir.getFileInfo(src, false);
}
} finally {
writeUnlock();
}
getEditLog().logSync();
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setOwner", src, null, resultingStat);
@@ -1203,7 +1247,7 @@
return getBlockLocationsInt(src, offset, length, doAccessTime,
needBlockToken, checkSafeMode);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"open", src, null, null);
@@ -1229,7 +1273,7 @@
}
final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
offset, length, doAccessTime, needBlockToken);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"open", src, null, null);
@@ -1310,7 +1354,7 @@
try {
concatInt(target, srcs);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getLoginUser(),
getRemoteIp(),
"concat", Arrays.toString(srcs), target, null);
@@ -1353,14 +1397,14 @@
throw new SafeModeException("Cannot concat " + target, safeMode);
}
concatInternal(target, srcs);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
resultingStat = dir.getFileInfo(target, false);
}
} finally {
writeUnlock();
}
getEditLog().logSync();
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getLoginUser(),
getRemoteIp(),
"concat", Arrays.toString(srcs), target, resultingStat);
@@ -1481,7 +1525,7 @@
try {
setTimesInt(src, mtime, atime);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setTimes", src, null, null);
@@ -1507,7 +1551,7 @@
INode inode = dir.getINode(src);
if (inode != null) {
dir.setTimes(src, inode, mtime, atime, true);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
final HdfsFileStatus stat = dir.getFileInfo(src, false);
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
@@ -1530,7 +1574,7 @@
try {
createSymlinkInt(target, link, dirPerms, createParent);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"createSymlink", link, target, null);
@@ -1551,14 +1595,14 @@
verifyParentDir(link);
}
createSymlinkInternal(target, link, dirPerms, createParent);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
resultingStat = dir.getFileInfo(link, false);
}
} finally {
writeUnlock();
}
getEditLog().logSync();
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"createSymlink", link, target, resultingStat);
@@ -1614,7 +1658,7 @@
try {
return setReplicationInt(src, replication);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setReplication", src, null, null);
@@ -1650,7 +1694,7 @@
}
getEditLog().logSync();
- if (isFile && auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isFile && isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"setReplication", src, null, null);
@@ -1706,7 +1750,7 @@
startFileInt(src, permissions, holder, clientMachine, flag, createParent,
replication, blockSize);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"create", src, null, null);
@@ -1739,7 +1783,7 @@
}
}
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
final HdfsFileStatus stat = dir.getFileInfo(src, false);
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
@@ -2040,7 +2084,7 @@
try {
return appendFileInt(src, holder, clientMachine);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"append", src, null, null);
@@ -2086,7 +2130,7 @@
+" block size " + lb.getBlock().getNumBytes());
}
}
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"append", src, null, null);
@@ -2532,7 +2576,7 @@
try {
return renameToInt(src, dst);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"rename", src, dst, null);
@@ -2554,14 +2598,14 @@
checkOperation(OperationCategory.WRITE);
status = renameToInternal(src, dst);
- if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (status && isAuditEnabled() && isExternalInvocation()) {
resultingStat = dir.getFileInfo(dst, false);
}
} finally {
writeUnlock();
}
getEditLog().logSync();
- if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (status && isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"rename", src, dst, resultingStat);
@@ -2583,15 +2627,15 @@
if (isPermissionEnabled) {
//We should not be doing this. This is move() not renameTo().
//but for now,
+ //NOTE: yes, this is bad! it's assuming much lower level behavior
+ // of rewriting the dst
String actualdst = dir.isDir(dst)?
dst + Path.SEPARATOR + new Path(src).getName(): dst;
checkParentAccess(src, FsAction.WRITE);
checkAncestorAccess(actualdst, FsAction.WRITE);
}
- HdfsFileStatus dinfo = dir.getFileInfo(dst, false);
if (dir.renameTo(src, dst)) {
- unprotectedChangeLease(src, dst, dinfo); // update lease with new filename
return true;
}
return false;
@@ -2611,14 +2655,14 @@
checkOperation(OperationCategory.WRITE);
renameToInternal(src, dst, options);
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
resultingStat = dir.getFileInfo(dst, false);
}
} finally {
writeUnlock();
}
getEditLog().logSync();
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
StringBuilder cmd = new StringBuilder("rename options=");
for (Rename option : options) {
cmd.append(option.value()).append(" ");
@@ -2642,9 +2686,7 @@
checkAncestorAccess(dst, FsAction.WRITE);
}
- HdfsFileStatus dinfo = dir.getFileInfo(dst, false);
dir.renameTo(src, dst, options);
- unprotectedChangeLease(src, dst, dinfo); // update lease with new filename
}
/**
@@ -2659,7 +2701,7 @@
try {
return deleteInt(src, recursive);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"delete", src, null, null);
@@ -2675,7 +2717,7 @@
NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
}
boolean status = deleteInternal(src, recursive, true);
- if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (status && isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"delete", src, null, null);
@@ -2841,7 +2883,7 @@
}
stat = dir.getFileInfo(src, resolveLink);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"getfileinfo", src, null, null);
@@ -2850,7 +2892,7 @@
} finally {
readUnlock();
}
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"getfileinfo", src, null, null);
@@ -2866,7 +2908,7 @@
try {
return mkdirsInt(src, permissions, createParent);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"mkdirs", src, null, null);
@@ -2890,7 +2932,7 @@
writeUnlock();
}
getEditLog().logSync();
- if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (status && isAuditEnabled() && isExternalInvocation()) {
final HdfsFileStatus stat = dir.getFileInfo(src, false);
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
@@ -2979,9 +3021,11 @@
/** Persist all metadata about this file.
* @param src The string representation of the path
* @param clientName The string representation of the client
+ * @param lastBlockLength The length of the last block
+ * under construction reported from client.
* @throws IOException if path does not exist
*/
- void fsync(String src, String clientName)
+ void fsync(String src, String clientName, long lastBlockLength)
throws IOException, UnresolvedLinkException {
NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
writeLock();
@@ -2991,6 +3035,9 @@
throw new SafeModeException("Cannot fsync file " + src, safeMode);
}
INodeFileUnderConstruction pendingFile = checkLease(src, clientName);
+ if (lastBlockLength > 0) {
+ pendingFile.updateLengthOfLastBlock(lastBlockLength);
+ }
dir.persistBlocks(src, pendingFile);
} finally {
writeUnlock();
@@ -3319,7 +3366,7 @@
try {
return getListingInt(src, startAfter, needLocation);
} catch (AccessControlException e) {
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(false, UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"listStatus", src, null, null);
@@ -3343,7 +3390,7 @@
checkTraverse(src);
}
}
- if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+ if (isAuditEnabled() && isExternalInvocation()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
getRemoteIp(),
"listStatus", src, null, null);
@@ -3433,15 +3480,7 @@
private NNHAStatusHeartbeat createHaStatusHeartbeat() {
HAState state = haContext.getState();
- NNHAStatusHeartbeat.State hbState;
- if (state instanceof ActiveState) {
- hbState = NNHAStatusHeartbeat.State.ACTIVE;
- } else if (state instanceof StandbyState) {
- hbState = NNHAStatusHeartbeat.State.STANDBY;
- } else {
- throw new AssertionError("Invalid state: " + state.getClass());
- }
- return new NNHAStatusHeartbeat(hbState,
+ return new NNHAStatusHeartbeat(state.getServiceState(),
getFSImage().getLastAppliedOrWrittenTxId());
}
@@ -3870,7 +3909,7 @@
private synchronized void leave() {
// if not done yet, initialize replication queues.
// In the standby, do not populate repl queues
- if (!isPopulatingReplQueues() && !isInStandbyState()) {
+ if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
initializeReplQueues();
}
long timeInSafemode = now() - startTime;
@@ -3913,7 +3952,8 @@
* initializing replication queues.
*/
private synchronized boolean canInitializeReplQueues() {
- return !isInStandbyState() && blockSafe >= blockReplQueueThreshold;
+ return shouldPopulateReplQueues()
+ && blockSafe >= blockReplQueueThreshold;
}
/**
@@ -4253,7 +4293,7 @@
@Override
public boolean isPopulatingReplQueues() {
- if (isInStandbyState()) {
+ if (!shouldPopulateReplQueues()) {
return false;
}
// safeMode is volatile, and may be set to null at any time
@@ -4262,7 +4302,13 @@
return true;
return safeMode.isPopulatingReplQueues();
}
-
+
+ private boolean shouldPopulateReplQueues() {
+ if(haContext == null || haContext.getState() == null)
+ return false;
+ return haContext.getState().shouldPopulateReplQueues();
+ }
+
@Override
public void incrementSafeBlockCount(int replication) {
// safeMode is volatile, and may be set to null at any time
@@ -4880,31 +4926,9 @@
// rename was successful. If any part of the renamed subtree had
// files that were being written to, update with new filename.
- void unprotectedChangeLease(String src, String dst, HdfsFileStatus dinfo) {
- String overwrite;
- String replaceBy;
+ void unprotectedChangeLease(String src, String dst) {
assert hasWriteLock();
-
- boolean destinationExisted = true;
- if (dinfo == null) {
- destinationExisted = false;
- }
-
- if (destinationExisted && dinfo.isDir()) {
- Path spath = new Path(src);
- Path parent = spath.getParent();
- if (parent.isRoot()) {
- overwrite = parent.toString();
- } else {
- overwrite = parent.toString() + Path.SEPARATOR;
- }
- replaceBy = dst + Path.SEPARATOR;
- } else {
- overwrite = src;
- replaceBy = dst;
- }
-
- leaseManager.changeLease(src, dst, overwrite, replaceBy);
+ leaseManager.changeLease(src, dst);
}
/**
@@ -4915,19 +4939,13 @@
// lock on our behalf. If we took the read lock here, we could block
// for fairness if a writer is waiting on the lock.
synchronized (leaseManager) {
- out.writeInt(leaseManager.countPath()); // write the size
-
- for (Lease lease : leaseManager.getSortedLeases()) {
- for(String path : lease.getPaths()) {
- // verify that path exists in namespace
- final INodeFileUnderConstruction cons;
- try {
- cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path);
- } catch (UnresolvedLinkException e) {
- throw new AssertionError("Lease files should reside on this FS");
- }
- FSImageSerialization.writeINodeUnderConstruction(out, cons, path);
- }
+ Map<String, INodeFileUnderConstruction> nodes =
+ leaseManager.getINodesUnderConstruction();
+ out.writeInt(nodes.size()); // write the size
+ for (Map.Entry<String, INodeFileUnderConstruction> entry
+ : nodes.entrySet()) {
+ FSImageSerialization.writeINodeUnderConstruction(
+ out, entry.getValue(), entry.getKey());
}
}
}
@@ -5286,7 +5304,7 @@
* Log fsck event in the audit log
*/
void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
- if (auditLog.isInfoEnabled()) {
+ if (isAuditEnabled()) {
logAuditEvent(UserGroupInformation.getCurrentUser(),
remoteAddress,
"fsck", src, null, null);
@@ -5541,4 +5559,44 @@
return this.blockManager.getDatanodeManager()
.isAvoidingStaleDataNodesForWrite();
}
+
+ /**
+ * Default AuditLogger implementation; used when no access logger is
+ * defined in the config file. It can also be explicitly listed in the
+ * config file.
+ */
+ private static class DefaultAuditLogger implements AuditLogger {
+
+ @Override
+ public void initialize(Configuration conf) {
+ // Nothing to do.
+ }
+
+ @Override
+ public void logAuditEvent(boolean succeeded, String userName,
+ InetAddress addr, String cmd, String src, String dst,
+ FileStatus status) {
+ if (auditLog.isInfoEnabled()) {
+ final StringBuilder sb = auditBuffer.get();
+ sb.setLength(0);
+ sb.append("allowed=").append(succeeded).append("\t");
+ sb.append("ugi=").append(userName).append("\t");
+ sb.append("ip=").append(addr).append("\t");
+ sb.append("cmd=").append(cmd).append("\t");
+ sb.append("src=").append(src).append("\t");
+ sb.append("dst=").append(dst).append("\t");
+ if (null == status) {
+ sb.append("perm=null");
+ } else {
+ sb.append("perm=");
+ sb.append(status.getOwner()).append(":");
+ sb.append(status.getGroup()).append(":");
+ sb.append(status.getPermission());
+ }
+ auditLog.info(sb);
+ }
+ }
+
+ }
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
index 1b193ed..832ca1a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
@@ -73,6 +73,11 @@
INodeDirectory(INodeDirectory other) {
super(other);
this.children = other.children;
+ if (this.children != null) {
+ for (INode child : children) {
+ child.parent = this;
+ }
+ }
}
/** @return true unconditionally. */
@@ -106,6 +111,7 @@
final int low = searchChildren(newChild);
if (low>=0) { // an old child exists so replace by the newChild
+ children.get(low).parent = null;
children.set(low, newChild);
} else {
throw new IllegalArgumentException("No child exists to be replaced");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
index 9e73bf0..15b4be5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
@@ -171,4 +171,22 @@
setBlock(numBlocks()-1, ucBlock);
return ucBlock;
}
+
+ /**
+ * Update the length for the last block
+ *
+ * @param lastBlockLength
+ * The length of the last block reported from client
+ * @throws IOException
+ */
+ void updateLengthOfLastBlock(long lastBlockLength) throws IOException {
+ BlockInfo lastBlock = this.getLastBlock();
+ assert (lastBlock != null) : "The last block for path "
+ + this.getFullPathName() + " is null when updating its length";
+ assert (lastBlock instanceof BlockInfoUnderConstruction) : "The last block for path "
+ + this.getFullPathName()
+ + " is not a BlockInfoUnderConstruction when updating its length";
+ lastBlock.setNumBytes(lastBlockLength);
+ }
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index fd1cbfc..87d19e5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -17,9 +17,12 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
+import static org.apache.hadoop.util.Time.now;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
@@ -39,8 +42,6 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import static org.apache.hadoop.util.Time.now;
-
/**
* LeaseManager does the lease housekeeping for writing on files.
* This class also provides useful static methods for lease recovery.
@@ -330,21 +331,19 @@
}
}
- synchronized void changeLease(String src, String dst,
- String overwrite, String replaceBy) {
+ synchronized void changeLease(String src, String dst) {
if (LOG.isDebugEnabled()) {
LOG.debug(getClass().getSimpleName() + ".changelease: " +
- " src=" + src + ", dest=" + dst +
- ", overwrite=" + overwrite +
- ", replaceBy=" + replaceBy);
+ " src=" + src + ", dest=" + dst);
}
- final int len = overwrite.length();
- for(Map.Entry<String, Lease> entry : findLeaseWithPrefixPath(src, sortedLeasesByPath)) {
+ final int len = src.length();
+ for(Map.Entry<String, Lease> entry
+ : findLeaseWithPrefixPath(src, sortedLeasesByPath).entrySet()) {
final String oldpath = entry.getKey();
final Lease lease = entry.getValue();
- //overwrite must be a prefix of oldpath
- final String newpath = replaceBy + oldpath.substring(len);
+ // replace stem of src with new destination
+ final String newpath = dst + oldpath.substring(len);
if (LOG.isDebugEnabled()) {
LOG.debug("changeLease: replacing " + oldpath + " with " + newpath);
}
@@ -355,7 +354,8 @@
}
synchronized void removeLeaseWithPrefixPath(String prefix) {
- for(Map.Entry<String, Lease> entry : findLeaseWithPrefixPath(prefix, sortedLeasesByPath)) {
+ for(Map.Entry<String, Lease> entry
+ : findLeaseWithPrefixPath(prefix, sortedLeasesByPath).entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug(LeaseManager.class.getSimpleName()
+ ".removeLeaseWithPrefixPath: entry=" + entry);
@@ -364,13 +364,13 @@
}
}
- static private List<Map.Entry<String, Lease>> findLeaseWithPrefixPath(
+ static private Map<String, Lease> findLeaseWithPrefixPath(
String prefix, SortedMap<String, Lease> path2lease) {
if (LOG.isDebugEnabled()) {
LOG.debug(LeaseManager.class.getSimpleName() + ".findLease: prefix=" + prefix);
}
- List<Map.Entry<String, Lease>> entries = new ArrayList<Map.Entry<String, Lease>>();
+ final Map<String, Lease> entries = new HashMap<String, Lease>();
final int srclen = prefix.length();
for(Map.Entry<String, Lease> entry : path2lease.tailMap(prefix).entrySet()) {
@@ -379,7 +379,7 @@
return entries;
}
if (p.length() == srclen || p.charAt(srclen) == Path.SEPARATOR_CHAR) {
- entries.add(entry);
+ entries.put(entry.getKey(), entry.getValue());
}
}
return entries;
@@ -426,6 +426,26 @@
}
}
+ /**
+ * Get the list of inodes corresponding to valid leases.
+ * @return list of inodes
+ * @throws UnresolvedLinkException
+ */
+ Map<String, INodeFileUnderConstruction> getINodesUnderConstruction() {
+ Map<String, INodeFileUnderConstruction> inodes =
+ new TreeMap<String, INodeFileUnderConstruction>();
+ for (String p : sortedLeasesByPath.keySet()) {
+ // verify that path exists in namespace
+ try {
+ INode node = fsnamesystem.dir.getINode(p);
+ inodes.put(p, INodeFileUnderConstruction.valueOf(node, p));
+ } catch (IOException ioe) {
+ LOG.error(ioe);
+ }
+ }
+ return inodes;
+ }
+
/** Check the leases beginning from the oldest.
* @return true is sync is needed.
*/
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 309811c..f77604a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -598,11 +598,7 @@
String nsId = getNameServiceId(conf);
String namenodeId = HAUtil.getNameNodeId(conf, nsId);
this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
- if (!haEnabled) {
- state = ACTIVE_STATE;
- } else {
- state = STANDBY_STATE;
- }
+ state = createHAState();
this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
this.haContext = createHAContext();
try {
@@ -619,6 +615,10 @@
}
}
+ protected HAState createHAState() {
+ return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
+ }
+
protected HAContext createHAContext() {
return new NameNodeHAContext();
}
@@ -1298,7 +1298,7 @@
* before exit.
* @throws ExitException thrown only for testing.
*/
- private synchronized void doImmediateShutdown(Throwable t)
+ protected synchronized void doImmediateShutdown(Throwable t)
throws ExitException {
String message = "Error encountered requiring NN shutdown. " +
"Shutting down immediately.";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index a51cee4..779c09c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -825,8 +825,9 @@
}
@Override // ClientProtocol
- public void fsync(String src, String clientName) throws IOException {
- namesystem.fsync(src, clientName);
+ public void fsync(String src, String clientName, long lastBlockLength)
+ throws IOException {
+ namesystem.fsync(src, clientName, lastBlockLength);
}
@Override // ClientProtocol
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
index 348f8da..b078afa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
@@ -102,7 +102,7 @@
long usedNonHeap = (totalNonHeap * 100) / commitedNonHeap;
String str = "<div>" + inodes + " files and directories, " + blocks + " blocks = "
- + (inodes + blocks) + " total";
+ + (inodes + blocks) + " total filesystem objects";
if (maxobjects != 0) {
long pct = ((inodes + blocks) * 100) / maxobjects;
str += " / " + maxobjects + " (" + pct + "%)";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
index 337a83c..66ccb3b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
@@ -19,31 +19,26 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class NNHAStatusHeartbeat {
- private State state;
+ private HAServiceState state;
private long txid = HdfsConstants.INVALID_TXID;
- public NNHAStatusHeartbeat(State state, long txid) {
+ public NNHAStatusHeartbeat(HAServiceState state, long txid) {
this.state = state;
this.txid = txid;
}
- public State getState() {
+ public HAServiceState getState() {
return state;
}
public long getTxId() {
return txid;
}
-
- @InterfaceAudience.Private
- public enum State {
- ACTIVE,
- STANDBY;
- }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
index 21fb048..fe09ef7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
@@ -357,6 +357,7 @@
message FsyncRequestProto {
required string src = 1;
required string client = 2;
+ optional sint64 lastBlockLength = 3 [default = -1];
}
message FsyncResponseProto { // void response
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 13dad67..34cd846 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -1184,4 +1184,17 @@
</description>
</property>
+<property>
+ <name>dfs.namenode.audit.loggers</name>
+ <value>default</value>
+ <description>
+ List of classes implementing audit loggers that will receive audit events.
+ These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger.
+ The special value "default" can be used to reference the default audit
+ logger, which uses the configured log system. Installing custom audit loggers
+ may affect the performance and stability of the NameNode. Refer to the custom
+ logger's documentation for more details.
+ </description>
+</property>
+
</configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java
new file mode 100644
index 0000000..da6f192
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestVolumeId {
+
+ @Test
+ public void testEquality() {
+ final VolumeId id1 = new HdfsVolumeId(new byte[] { (byte)0, (byte)0 });
+ testEq(true, id1, id1);
+
+ final VolumeId id2 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 });
+ testEq(true, id2, id2);
+ testEq(false, id1, id2);
+
+ final VolumeId id3 = new HdfsVolumeId(new byte[] { (byte)1, (byte)0 });
+ testEq(true, id3, id3);
+ testEq(false, id1, id3);
+
+ // same as 2, but "invalid":
+ final VolumeId id2copy1 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 });
+
+ testEq(true, id2, id2copy1);
+
+ // same as 2copy1:
+ final VolumeId id2copy2 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 });
+
+ testEq(true, id2, id2copy2);
+
+ testEqMany(true, new VolumeId[] { id2, id2copy1, id2copy2 });
+
+ testEqMany(false, new VolumeId[] { id1, id2, id3 });
+ }
+
+ @SuppressWarnings("unchecked")
+ private <T> void testEq(final boolean eq, Comparable<? super T> id1, Comparable<? super T> id2) {
+ final int h1 = id1.hashCode();
+ final int h2 = id2.hashCode();
+
+ // eq reflectivity:
+ assertTrue(id1.equals(id1));
+ assertTrue(id2.equals(id2));
+ assertEquals(0, id1.compareTo((T)id1));
+ assertEquals(0, id2.compareTo((T)id2));
+
+ // eq symmetry:
+ assertEquals(eq, id1.equals(id2));
+ assertEquals(eq, id2.equals(id1));
+
+ // null comparison:
+ assertFalse(id1.equals(null));
+ assertFalse(id2.equals(null));
+
+ // compareTo:
+ assertEquals(eq, 0 == id1.compareTo((T)id2));
+ assertEquals(eq, 0 == id2.compareTo((T)id1));
+ // compareTo must be antisymmetric:
+ assertEquals(sign(id1.compareTo((T)id2)), -sign(id2.compareTo((T)id1)));
+
+ // compare with null should never return 0 to be consistent with #equals():
+ assertTrue(id1.compareTo(null) != 0);
+ assertTrue(id2.compareTo(null) != 0);
+
+ // check that hash codes did not change:
+ assertEquals(h1, id1.hashCode());
+ assertEquals(h2, id2.hashCode());
+ if (eq) {
+ // in this case the hash codes must be the same:
+ assertEquals(h1, h2);
+ }
+ }
+
+ private static int sign(int x) {
+ if (x == 0) {
+ return 0;
+ } else if (x > 0) {
+ return 1;
+ } else {
+ return -1;
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ private <T> void testEqMany(final boolean eq, Comparable<? super T>... volumeIds) {
+ Comparable<? super T> vidNext;
+ int sum = 0;
+ for (int i=0; i<volumeIds.length; i++) {
+ if (i == volumeIds.length - 1) {
+ vidNext = volumeIds[0];
+ } else {
+ vidNext = volumeIds[i + 1];
+ }
+ testEq(eq, volumeIds[i], vidNext);
+ sum += sign(volumeIds[i].compareTo((T)vidNext));
+ }
+ // the comparison relationship must always be acyclic:
+ assertTrue(sum < volumeIds.length);
+ }
+
+ /*
+ * Test HdfsVolumeId(new byte[0]) instances: show that we permit such
+ * objects, they are still valid, and obey the same equality
+ * rules other objects do.
+ */
+ @Test
+ public void testIdEmptyBytes() {
+ final VolumeId idEmpty1 = new HdfsVolumeId(new byte[0]);
+ assertTrue(idEmpty1.isValid());
+ final VolumeId idEmpty2 = new HdfsVolumeId(new byte[0]);
+ assertTrue(idEmpty2.isValid());
+ final VolumeId idNotEmpty = new HdfsVolumeId(new byte[] { (byte)1 });
+ assertTrue(idNotEmpty.isValid());
+
+ testEq(true, idEmpty1, idEmpty2);
+ testEq(false, idEmpty1, idNotEmpty);
+ testEq(false, idEmpty2, idNotEmpty);
+ }
+
+ /*
+ * Test the VolumeId.INVALID_VOLUME_ID singleton.
+ */
+ @Test
+ public void testInvalidId() {
+ try {
+ new HdfsVolumeId(null);
+ assertTrue("NPE expected.", false);
+ } catch (NullPointerException npe) {
+ // okay
+ }
+ final VolumeId idEmpty = new HdfsVolumeId(new byte[] {});
+ final VolumeId idNotEmpty = new HdfsVolumeId(new byte[] { (byte)1 });
+
+ testEq(false, VolumeId.INVALID_VOLUME_ID, idNotEmpty);
+ testEq(false, VolumeId.INVALID_VOLUME_ID, idEmpty);
+
+ testEqMany(true,
+ new VolumeId[] {
+ VolumeId.INVALID_VOLUME_ID,
+ VolumeId.INVALID_VOLUME_ID,
+ VolumeId.INVALID_VOLUME_ID } );
+ testEqMany(false,
+ new VolumeId[] {
+ VolumeId.INVALID_VOLUME_ID,
+ idEmpty,
+ idNotEmpty });
+ }
+
+ /*
+ * test #toString() for typical VolumeId equality classes
+ */
+ @Test
+ public void testToString() {
+ // The #toString() return value is only checked for != null.
+ // We cannot assert more.
+ String strInvalid = VolumeId.INVALID_VOLUME_ID.toString();
+ assertNotNull(strInvalid);
+
+ String strEmpty = new HdfsVolumeId(new byte[] {}).toString();
+ assertNotNull(strEmpty);
+
+ String strNotEmpty = new HdfsVolumeId(new byte[] { (byte)1 }).toString();
+ assertNotNull(strNotEmpty);
+ }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 851b525..9500834 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -321,7 +321,7 @@
/**
* Used by builder to create and return an instance of MiniDFSCluster
*/
- private MiniDFSCluster(Builder builder) throws IOException {
+ protected MiniDFSCluster(Builder builder) throws IOException {
if (builder.nnTopology == null) {
// If no topology is specified, build a single NN.
builder.nnTopology = MiniDFSNNTopology.simpleSingleNN(
@@ -369,8 +369,8 @@
private Configuration conf;
private NameNodeInfo[] nameNodes;
- private int numDataNodes;
- private ArrayList<DataNodeProperties> dataNodes =
+ protected int numDataNodes;
+ protected List<DataNodeProperties> dataNodes =
new ArrayList<DataNodeProperties>();
private File base_dir;
private File data_dir;
@@ -2303,7 +2303,7 @@
return port;
}
- private void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
+ protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
boolean checkDataNodeAddrConfig) throws IOException {
if (setupHostsFile) {
String hostsFile = conf.get(DFS_HOSTS, "").trim();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
new file mode 100644
index 0000000..ff8c92a
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
+import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter;
+import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
+import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.StaticMapping;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.ssl.SSLFactory;
+
+public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
+
+ private static String[] NODE_GROUPS = null;
+ private static final Log LOG = LogFactory.getLog(MiniDFSClusterWithNodeGroup.class);
+
+ public MiniDFSClusterWithNodeGroup(Builder builder) throws IOException {
+ super(builder);
+ }
+
+ public static void setNodeGroups (String[] nodeGroups) {
+ NODE_GROUPS = nodeGroups;
+ }
+
+ public synchronized void startDataNodes(Configuration conf, int numDataNodes,
+ boolean manageDfsDirs, StartupOption operation,
+ String[] racks, String[] nodeGroups, String[] hosts,
+ long[] simulatedCapacities,
+ boolean setupHostsFile,
+ boolean checkDataNodeAddrConfig,
+ boolean checkDataNodeHostConfig) throws IOException {
+ if (operation == StartupOption.RECOVER) {
+ return;
+ }
+ if (checkDataNodeHostConfig) {
+ conf.setIfUnset(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
+ } else {
+ conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
+ }
+ conf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
+
+ int curDatanodesNum = dataNodes.size();
+ // for mincluster's the default initialDelay for BRs is 0
+ if (conf.get(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY) == null) {
+ conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 0);
+ }
+ // If minicluster's name node is null assume that the conf has been
+ // set with the right address:port of the name node.
+ //
+ if (racks != null && numDataNodes > racks.length ) {
+ throw new IllegalArgumentException( "The length of racks [" + racks.length
+ + "] is less than the number of datanodes [" + numDataNodes + "].");
+ }
+
+ if (nodeGroups != null && numDataNodes > nodeGroups.length ) {
+ throw new IllegalArgumentException( "The length of nodeGroups [" + nodeGroups.length
+ + "] is less than the number of datanodes [" + numDataNodes + "].");
+ }
+
+ if (hosts != null && numDataNodes > hosts.length ) {
+ throw new IllegalArgumentException( "The length of hosts [" + hosts.length
+ + "] is less than the number of datanodes [" + numDataNodes + "].");
+ }
+ //Generate some hostnames if required
+ if (racks != null && hosts == null) {
+ hosts = new String[numDataNodes];
+ for (int i = curDatanodesNum; i < curDatanodesNum + numDataNodes; i++) {
+ hosts[i - curDatanodesNum] = "host" + i + ".foo.com";
+ }
+ }
+
+ if (simulatedCapacities != null
+ && numDataNodes > simulatedCapacities.length) {
+ throw new IllegalArgumentException( "The length of simulatedCapacities ["
+ + simulatedCapacities.length
+ + "] is less than the number of datanodes [" + numDataNodes + "].");
+ }
+
+ String [] dnArgs = (operation == null ||
+ operation != StartupOption.ROLLBACK) ?
+ null : new String[] {operation.getName()};
+
+ for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; i++) {
+ Configuration dnConf = new HdfsConfiguration(conf);
+ // Set up datanode address
+ setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
+ if (manageDfsDirs) {
+ File dir1 = getInstanceStorageDir(i, 0);
+ File dir2 = getInstanceStorageDir(i, 1);
+ dir1.mkdirs();
+ dir2.mkdirs();
+ if (!dir1.isDirectory() || !dir2.isDirectory()) {
+ throw new IOException("Mkdirs failed to create directory for DataNode "
+ + i + ": " + dir1 + " or " + dir2);
+ }
+ String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
+ dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
+ conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
+ }
+ if (simulatedCapacities != null) {
+ SimulatedFSDataset.setFactory(dnConf);
+ dnConf.setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY,
+ simulatedCapacities[i-curDatanodesNum]);
+ }
+ LOG.info("Starting DataNode " + i + " with "
+ + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + ": "
+ + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY));
+ if (hosts != null) {
+ dnConf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, hosts[i - curDatanodesNum]);
+ LOG.info("Starting DataNode " + i + " with hostname set to: "
+ + dnConf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY));
+ }
+ if (racks != null) {
+ String name = hosts[i - curDatanodesNum];
+ if (nodeGroups == null) {
+ LOG.info("Adding node with hostname : " + name + " to rack " +
+ racks[i-curDatanodesNum]);
+ StaticMapping.addNodeToRack(name,racks[i-curDatanodesNum]);
+ } else {
+ LOG.info("Adding node with hostname : " + name + " to serverGroup " +
+ nodeGroups[i-curDatanodesNum] + " and rack " +
+ racks[i-curDatanodesNum]);
+ StaticMapping.addNodeToRack(name,racks[i-curDatanodesNum] +
+ nodeGroups[i-curDatanodesNum]);
+ }
+ }
+ Configuration newconf = new HdfsConfiguration(dnConf); // save config
+ if (hosts != null) {
+ NetUtils.addStaticResolution(hosts[i - curDatanodesNum], "localhost");
+ }
+
+ SecureResources secureResources = null;
+ if (UserGroupInformation.isSecurityEnabled()) {
+ SSLFactory sslFactory = new SSLFactory(SSLFactory.Mode.SERVER, dnConf);
+ try {
+ secureResources = SecureDataNodeStarter.getSecureResources(sslFactory, dnConf);
+ } catch (Exception ex) {
+ ex.printStackTrace();
+ }
+ }
+ DataNode dn = DataNode.instantiateDataNode(dnArgs, dnConf, secureResources);
+ if(dn == null)
+ throw new IOException("Cannot start DataNode in "
+ + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY));
+ //since the HDFS does things based on IP:port, we need to add the mapping
+ //for IP:port to rackId
+ String ipAddr = dn.getXferAddress().getAddress().getHostAddress();
+ if (racks != null) {
+ int port = dn.getXferAddress().getPort();
+ if (nodeGroups == null) {
+ LOG.info("Adding node with IP:port : " + ipAddr + ":" + port +
+ " to rack " + racks[i-curDatanodesNum]);
+ StaticMapping.addNodeToRack(ipAddr + ":" + port,
+ racks[i-curDatanodesNum]);
+ } else {
+ LOG.info("Adding node with IP:port : " + ipAddr + ":" + port + " to nodeGroup " +
+ nodeGroups[i-curDatanodesNum] + " and rack " + racks[i-curDatanodesNum]);
+ StaticMapping.addNodeToRack(ipAddr + ":" + port, racks[i-curDatanodesNum] +
+ nodeGroups[i-curDatanodesNum]);
+ }
+ }
+ dn.runDatanodeDaemon();
+ dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs, secureResources));
+ }
+ curDatanodesNum += numDataNodes;
+ this.numDataNodes += numDataNodes;
+ waitActive();
+ }
+
+ public synchronized void startDataNodes(Configuration conf, int numDataNodes,
+ boolean manageDfsDirs, StartupOption operation,
+ String[] racks, String[] nodeGroups, String[] hosts,
+ long[] simulatedCapacities,
+ boolean setupHostsFile) throws IOException {
+ startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups,
+ hosts, simulatedCapacities, setupHostsFile, false, false);
+ }
+
+ public void startDataNodes(Configuration conf, int numDataNodes,
+ boolean manageDfsDirs, StartupOption operation,
+ String[] racks, long[] simulatedCapacities,
+ String[] nodeGroups) throws IOException {
+ startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups,
+ null, simulatedCapacities, false);
+ }
+
+ // This is for initialize from parent class.
+ @Override
+ public synchronized void startDataNodes(Configuration conf, int numDataNodes,
+ boolean manageDfsDirs, StartupOption operation,
+ String[] racks, String[] hosts,
+ long[] simulatedCapacities,
+ boolean setupHostsFile,
+ boolean checkDataNodeAddrConfig,
+ boolean checkDataNodeHostConfig) throws IOException {
+ startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks,
+ NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile,
+ checkDataNodeAddrConfig, checkDataNodeHostConfig);
+ }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java
index feaca8c..dfe45f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java
@@ -23,12 +23,14 @@
import java.io.IOException;
import java.io.InterruptedIOException;
+import java.util.EnumSet;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.log4j.Level;
import org.junit.Test;
@@ -43,16 +45,21 @@
private final String fName = "hflushtest.dat";
- /** The test uses {@link #doTheJob(Configuration, String, long, short)
+ /**
+ * The test uses
+ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
* to write a file with a standard block size
*/
@Test
public void hFlush_01() throws IOException {
- doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE, (short)2);
+ doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE,
+ (short) 2, false, EnumSet.noneOf(SyncFlag.class));
}
- /** The test uses {@link #doTheJob(Configuration, String, long, short)
- * to write a file with a custom block size so the writes will be
+ /**
+ * The test uses
+ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+ * to write a file with a custom block size so the writes will be
* happening across block' boundaries
*/
@Test
@@ -64,14 +71,17 @@
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
- doTheJob(conf, fName, customBlockSize, (short)2);
+ doTheJob(conf, fName, customBlockSize, (short) 2, false,
+ EnumSet.noneOf(SyncFlag.class));
}
- /** The test uses {@link #doTheJob(Configuration, String, long, short)
- * to write a file with a custom block size so the writes will be
+ /**
+ * The test uses
+ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+ * to write a file with a custom block size so the writes will be
* happening across block's and checksum' boundaries
*/
- @Test
+ @Test
public void hFlush_03() throws IOException {
Configuration conf = new HdfsConfiguration();
int customPerChecksumSize = 400;
@@ -80,22 +90,106 @@
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
- doTheJob(conf, fName, customBlockSize, (short)2);
+ doTheJob(conf, fName, customBlockSize, (short) 2, false,
+ EnumSet.noneOf(SyncFlag.class));
}
/**
- The method starts new cluster with defined Configuration;
- creates a file with specified block_size and writes 10 equal sections in it;
- it also calls hflush() after each write and throws an IOException in case of
- an error.
- @param conf cluster configuration
- @param fileName of the file to be created and processed as required
- @param block_size value to be used for the file's creation
- @param replicas is the number of replicas
- @throws IOException in case of any errors
+ * Test hsync (with updating block length in NameNode) while no data is
+ * actually written yet
+ */
+ @Test
+ public void hSyncUpdateLength_00() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
+ 2).build();
+ DistributedFileSystem fileSystem =
+ (DistributedFileSystem)cluster.getFileSystem();
+
+ try {
+ Path path = new Path(fName);
+ FSDataOutputStream stm = fileSystem.create(path, true, 4096, (short) 2,
+ AppendTestUtil.BLOCK_SIZE);
+ System.out.println("Created file " + path.toString());
+ ((DFSOutputStream) stm.getWrappedStream()).hsync(EnumSet
+ .of(SyncFlag.UPDATE_LENGTH));
+ long currentFileLength = fileSystem.getFileStatus(path).getLen();
+ assertEquals(0L, currentFileLength);
+ stm.close();
+ } finally {
+ fileSystem.close();
+ cluster.shutdown();
+ }
+ }
+
+ /**
+ * The test calls
+ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+ * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
+ */
+ @Test
+ public void hSyncUpdateLength_01() throws IOException {
+ doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE,
+ (short) 2, true, EnumSet.of(SyncFlag.UPDATE_LENGTH));
+ }
+
+ /**
+ * The test calls
+ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+ * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
+ * Similar with {@link #hFlush_02()} , it writes a file with a custom block
+ * size so the writes will be happening across block' boundaries
+ */
+ @Test
+ public void hSyncUpdateLength_02() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+ int customPerChecksumSize = 512;
+ int customBlockSize = customPerChecksumSize * 3;
+ // Modify defaul filesystem settings
+ conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
+ conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
+
+ doTheJob(conf, fName, customBlockSize, (short) 2, true,
+ EnumSet.of(SyncFlag.UPDATE_LENGTH));
+ }
+
+ /**
+ * The test calls
+ * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+ * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
+ * Similar with {@link #hFlush_03()} , it writes a file with a custom block
+ * size so the writes will be happening across block's and checksum'
+ * boundaries.
+ */
+ @Test
+ public void hSyncUpdateLength_03() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+ int customPerChecksumSize = 400;
+ int customBlockSize = customPerChecksumSize * 3;
+ // Modify defaul filesystem settings
+ conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
+ conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
+
+ doTheJob(conf, fName, customBlockSize, (short) 2, true,
+ EnumSet.of(SyncFlag.UPDATE_LENGTH));
+ }
+
+ /**
+ * The method starts new cluster with defined Configuration; creates a file
+ * with specified block_size and writes 10 equal sections in it; it also calls
+ * hflush/hsync after each write and throws an IOException in case of an error.
+ *
+ * @param conf cluster configuration
+ * @param fileName of the file to be created and processed as required
+ * @param block_size value to be used for the file's creation
+ * @param replicas is the number of replicas
+ * @param isSync hsync or hflush
+ * @param syncFlags specify the semantic of the sync/flush
+ * @throws IOException in case of any errors
*/
public static void doTheJob(Configuration conf, final String fileName,
- long block_size, short replicas) throws IOException {
+ long block_size, short replicas, boolean isSync,
+ EnumSet<SyncFlag> syncFlags) throws IOException {
byte[] fileContent;
final int SECTIONS = 10;
@@ -119,8 +213,21 @@
System.out.println("Writing " + (tenth * i) + " to " + (tenth * (i+1)) + " section to file " + fileName);
// write to the file
stm.write(fileContent, tenth * i, tenth);
- // Wait while hflush() pushes all packets through built pipeline
- ((DFSOutputStream)stm.getWrappedStream()).hflush();
+
+ // Wait while hflush/hsync pushes all packets through built pipeline
+ if (isSync) {
+ ((DFSOutputStream)stm.getWrappedStream()).hsync(syncFlags);
+ } else {
+ ((DFSOutputStream)stm.getWrappedStream()).hflush();
+ }
+
+ // Check file length if updatelength is required
+ if (isSync && syncFlags.contains(SyncFlag.UPDATE_LENGTH)) {
+ long currentFileLength = fileSystem.getFileStatus(path).getLen();
+ assertEquals(
+ "File size doesn't match for hsync/hflush with updating the length",
+ tenth * (i + 1), currentFileLength);
+ }
byte [] toRead = new byte[tenth];
byte [] expected = new byte[tenth];
System.arraycopy(fileContent, tenth * i, expected, 0, tenth);
@@ -139,8 +246,6 @@
assertEquals("File size doesn't match ", AppendTestUtil.FILE_SIZE, fileSystem.getFileStatus(path).getLen());
AppendTestUtil.checkFullFile(fileSystem, path, fileContent.length, fileContent, "hflush()");
- } catch (Exception e) {
- e.printStackTrace();
} finally {
fileSystem.close();
cluster.shutdown();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java
index a718d60..1940b6d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java
@@ -30,7 +30,9 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@@ -49,6 +51,10 @@
).getLeaseByPath(src.toString()) != null;
}
+ static int leaseCount(MiniDFSCluster cluster) {
+ return NameNodeAdapter.getLeaseManager(cluster.getNamesystem()).countLease();
+ }
+
static final String dirString = "/test/lease";
final Path dir = new Path(dirString);
static final Log LOG = LogFactory.getLog(TestLease.class);
@@ -127,6 +133,96 @@
}
@Test
+ public void testLeaseAfterRename() throws Exception {
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
+ try {
+ Path p = new Path("/test-file");
+ Path d = new Path("/test-d");
+ Path d2 = new Path("/test-d-other");
+
+ // open a file to get a lease
+ FileSystem fs = cluster.getFileSystem();
+ FSDataOutputStream out = fs.create(p);
+ out.writeBytes("something");
+ //out.hsync();
+ Assert.assertTrue(hasLease(cluster, p));
+ Assert.assertEquals(1, leaseCount(cluster));
+
+ // just to ensure first fs doesn't have any logic to twiddle leases
+ DistributedFileSystem fs2 = (DistributedFileSystem) FileSystem.newInstance(fs.getUri(), fs.getConf());
+
+ // rename the file into an existing dir
+ LOG.info("DMS: rename file into dir");
+ Path pRenamed = new Path(d, p.getName());
+ fs2.mkdirs(d);
+ fs2.rename(p, pRenamed);
+ Assert.assertFalse(p+" exists", fs2.exists(p));
+ Assert.assertTrue(pRenamed+" not found", fs2.exists(pRenamed));
+ Assert.assertFalse("has lease for "+p, hasLease(cluster, p));
+ Assert.assertTrue("no lease for "+pRenamed, hasLease(cluster, pRenamed));
+ Assert.assertEquals(1, leaseCount(cluster));
+
+ // rename the parent dir to a new non-existent dir
+ LOG.info("DMS: rename parent dir");
+ Path pRenamedAgain = new Path(d2, pRenamed.getName());
+ fs2.rename(d, d2);
+ // src gone
+ Assert.assertFalse(d+" exists", fs2.exists(d));
+ Assert.assertFalse("has lease for "+pRenamed, hasLease(cluster, pRenamed));
+ // dst checks
+ Assert.assertTrue(d2+" not found", fs2.exists(d2));
+ Assert.assertTrue(pRenamedAgain+" not found", fs2.exists(pRenamedAgain));
+ Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+ Assert.assertEquals(1, leaseCount(cluster));
+
+ // rename the parent dir to existing dir
+ // NOTE: rename w/o options moves paths into existing dir
+ LOG.info("DMS: rename parent again");
+ pRenamed = pRenamedAgain;
+ pRenamedAgain = new Path(new Path(d, d2.getName()), p.getName());
+ fs2.mkdirs(d);
+ fs2.rename(d2, d);
+ // src gone
+ Assert.assertFalse(d2+" exists", fs2.exists(d2));
+ Assert.assertFalse("no lease for "+pRenamed, hasLease(cluster, pRenamed));
+ // dst checks
+ Assert.assertTrue(d+" not found", fs2.exists(d));
+ Assert.assertTrue(pRenamedAgain +" not found", fs2.exists(pRenamedAgain));
+ Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+ Assert.assertEquals(1, leaseCount(cluster));
+
+ // rename with opts to non-existent dir
+ pRenamed = pRenamedAgain;
+ pRenamedAgain = new Path(d2, p.getName());
+ fs2.rename(pRenamed.getParent(), d2, Options.Rename.OVERWRITE);
+ // src gone
+ Assert.assertFalse(pRenamed.getParent() +" not found", fs2.exists(pRenamed.getParent()));
+ Assert.assertFalse("has lease for "+pRenamed, hasLease(cluster, pRenamed));
+ // dst checks
+ Assert.assertTrue(d2+" not found", fs2.exists(d2));
+ Assert.assertTrue(pRenamedAgain+" not found", fs2.exists(pRenamedAgain));
+ Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+ Assert.assertEquals(1, leaseCount(cluster));
+
+ // rename with opts to existing dir
+ // NOTE: rename with options will not move paths into the existing dir
+ pRenamed = pRenamedAgain;
+ pRenamedAgain = new Path(d, p.getName());
+ fs2.rename(pRenamed.getParent(), d, Options.Rename.OVERWRITE);
+ // src gone
+ Assert.assertFalse(pRenamed.getParent() +" not found", fs2.exists(pRenamed.getParent()));
+ Assert.assertFalse("has lease for "+pRenamed, hasLease(cluster, pRenamed));
+ // dst checks
+ Assert.assertTrue(d+" not found", fs2.exists(d));
+ Assert.assertTrue(pRenamedAgain+" not found", fs2.exists(pRenamedAgain));
+ Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+ Assert.assertEquals(1, leaseCount(cluster));
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
+ @Test
public void testLease() throws Exception {
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
new file mode 100644
index 0000000..15cd7d7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
@@ -0,0 +1,292 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.balancer;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSClusterWithNodeGroup;
+import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.net.NetworkTopology;
+import org.junit.Test;
+
+/**
+ * This class tests if a balancer schedules tasks correctly.
+ */
+public class TestBalancerWithNodeGroup {
+ private static final Log LOG = LogFactory.getLog(
+ "org.apache.hadoop.hdfs.TestBalancerWithNodeGroup");
+
+ final private static long CAPACITY = 500L;
+ final private static String RACK0 = "/rack0";
+ final private static String RACK1 = "/rack1";
+ final private static String NODEGROUP0 = "/nodegroup0";
+ final private static String NODEGROUP1 = "/nodegroup1";
+ final private static String NODEGROUP2 = "/nodegroup2";
+ final static private String fileName = "/tmp.txt";
+ final static private Path filePath = new Path(fileName);
+ MiniDFSClusterWithNodeGroup cluster;
+
+ ClientProtocol client;
+
+ static final long TIMEOUT = 20000L; //msec
+ static final double CAPACITY_ALLOWED_VARIANCE = 0.005; // 0.5%
+ static final double BALANCE_ALLOWED_VARIANCE = 0.11; // 10%+delta
+ static final int DEFAULT_BLOCK_SIZE = 10;
+
+ static {
+ Balancer.setBlockMoveWaitTime(1000L) ;
+ }
+
+ static Configuration createConf() {
+ Configuration conf = new HdfsConfiguration();
+ TestBalancer.initConf(conf);
+ conf.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
+ "org.apache.hadoop.net.NetworkTopologyWithNodeGroup");
+ conf.set("dfs.block.replicator.classname",
+ "org.apache.hadoop.hdfs.server.blockmanagement." +
+ "BlockPlacementPolicyWithNodeGroup");
+ return conf;
+ }
+
+ /**
+ * Wait until heartbeat gives expected results, within CAPACITY_ALLOWED_VARIANCE,
+ * summed over all nodes. Times out after TIMEOUT msec.
+ * @param expectedUsedSpace
+ * @param expectedTotalSpace
+ * @throws IOException - if getStats() fails
+ * @throws TimeoutException
+ */
+ private void waitForHeartBeat(long expectedUsedSpace, long expectedTotalSpace)
+ throws IOException, TimeoutException {
+ long timeout = TIMEOUT;
+ long failtime = (timeout <= 0L) ? Long.MAX_VALUE
+ : System.currentTimeMillis() + timeout;
+
+ while (true) {
+ long[] status = client.getStats();
+ double totalSpaceVariance = Math.abs((double)status[0] - expectedTotalSpace)
+ / expectedTotalSpace;
+ double usedSpaceVariance = Math.abs((double)status[1] - expectedUsedSpace)
+ / expectedUsedSpace;
+ if (totalSpaceVariance < CAPACITY_ALLOWED_VARIANCE
+ && usedSpaceVariance < CAPACITY_ALLOWED_VARIANCE)
+ break; //done
+
+ if (System.currentTimeMillis() > failtime) {
+ throw new TimeoutException("Cluster failed to reached expected values of "
+ + "totalSpace (current: " + status[0]
+ + ", expected: " + expectedTotalSpace
+ + "), or usedSpace (current: " + status[1]
+ + ", expected: " + expectedUsedSpace
+ + "), in more than " + timeout + " msec.");
+ }
+ try {
+ Thread.sleep(100L);
+ } catch(InterruptedException ignored) {
+ }
+ }
+ }
+
+ /**
+ * Wait until balanced: each datanode gives utilization within
+ * BALANCE_ALLOWED_VARIANCE of average
+ * @throws IOException
+ * @throws TimeoutException
+ */
+ private void waitForBalancer(long totalUsedSpace, long totalCapacity)
+ throws IOException, TimeoutException {
+ long timeout = TIMEOUT;
+ long failtime = (timeout <= 0L) ? Long.MAX_VALUE
+ : System.currentTimeMillis() + timeout;
+ final double avgUtilization = ((double)totalUsedSpace) / totalCapacity;
+ boolean balanced;
+ do {
+ DatanodeInfo[] datanodeReport =
+ client.getDatanodeReport(DatanodeReportType.ALL);
+ assertEquals(datanodeReport.length, cluster.getDataNodes().size());
+ balanced = true;
+ for (DatanodeInfo datanode : datanodeReport) {
+ double nodeUtilization = ((double)datanode.getDfsUsed())
+ / datanode.getCapacity();
+ if (Math.abs(avgUtilization - nodeUtilization) >
+ BALANCE_ALLOWED_VARIANCE) {
+ balanced = false;
+ if (System.currentTimeMillis() > failtime) {
+ throw new TimeoutException(
+ "Rebalancing expected avg utilization to become "
+ + avgUtilization + ", but on datanode " + datanode
+ + " it remains at " + nodeUtilization
+ + " after more than " + TIMEOUT + " msec.");
+ }
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException ignored) {
+ }
+ break;
+ }
+ }
+ } while (!balanced);
+ }
+
+ private void runBalancer(Configuration conf,
+ long totalUsedSpace, long totalCapacity) throws Exception {
+ waitForHeartBeat(totalUsedSpace, totalCapacity);
+
+ // start rebalancing
+ Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
+ final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
+ assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+
+ waitForHeartBeat(totalUsedSpace, totalCapacity);
+ LOG.info("Rebalancing with default factor.");
+ waitForBalancer(totalUsedSpace, totalCapacity);
+ }
+
+ /**
+ * Create a cluster with even distribution, and a new empty node is added to
+ * the cluster, then test rack locality for balancer policy.
+ */
+ @Test
+ public void testBalancerWithRackLocality() throws Exception {
+ Configuration conf = createConf();
+ long[] capacities = new long[]{CAPACITY, CAPACITY};
+ String[] racks = new String[]{RACK0, RACK1};
+ String[] nodeGroups = new String[]{NODEGROUP0, NODEGROUP1};
+
+ int numOfDatanodes = capacities.length;
+ assertEquals(numOfDatanodes, racks.length);
+ MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf)
+ .numDataNodes(capacities.length)
+ .racks(racks)
+ .simulatedCapacities(capacities);
+ MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
+ cluster = new MiniDFSClusterWithNodeGroup(builder);
+ try {
+ cluster.waitActive();
+ client = NameNodeProxies.createProxy(conf,
+ cluster.getFileSystem(0).getUri(),
+ ClientProtocol.class).getProxy();
+
+ long totalCapacity = TestBalancer.sum(capacities);
+
+ // fill up the cluster to be 30% full
+ long totalUsedSpace = totalCapacity * 3 / 10;
+ TestBalancer.createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
+ (short) numOfDatanodes, 0);
+
+ long newCapacity = CAPACITY;
+ String newRack = RACK1;
+ String newNodeGroup = NODEGROUP2;
+ // start up an empty node with the same capacity and on the same rack
+ cluster.startDataNodes(conf, 1, true, null, new String[]{newRack},
+ new long[] {newCapacity}, new String[]{newNodeGroup});
+
+ totalCapacity += newCapacity;
+
+ // run balancer and validate results
+ runBalancer(conf, totalUsedSpace, totalCapacity);
+
+ DatanodeInfo[] datanodeReport =
+ client.getDatanodeReport(DatanodeReportType.ALL);
+
+ Map<String, Integer> rackToUsedCapacity = new HashMap<String, Integer>();
+ for (DatanodeInfo datanode: datanodeReport) {
+ String rack = NetworkTopology.getFirstHalf(datanode.getNetworkLocation());
+ int usedCapacity = (int) datanode.getDfsUsed();
+
+ if (rackToUsedCapacity.get(rack) != null) {
+ rackToUsedCapacity.put(rack, usedCapacity + rackToUsedCapacity.get(rack));
+ } else {
+ rackToUsedCapacity.put(rack, usedCapacity);
+ }
+ }
+ assertEquals(rackToUsedCapacity.size(), 2);
+ assertEquals(rackToUsedCapacity.get(RACK0), rackToUsedCapacity.get(RACK1));
+
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
+ /**
+ * Create a cluster with even distribution, and a new empty node is added to
+ * the cluster, then test node-group locality for balancer policy.
+ */
+ @Test
+ public void testBalancerWithNodeGroup() throws Exception {
+ Configuration conf = createConf();
+ long[] capacities = new long[]{CAPACITY, CAPACITY, CAPACITY, CAPACITY};
+ String[] racks = new String[]{RACK0, RACK0, RACK1, RACK1};
+ String[] nodeGroups = new String[]{NODEGROUP0, NODEGROUP0, NODEGROUP1, NODEGROUP2};
+
+ int numOfDatanodes = capacities.length;
+ assertEquals(numOfDatanodes, racks.length);
+ assertEquals(numOfDatanodes, nodeGroups.length);
+ MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf)
+ .numDataNodes(capacities.length)
+ .racks(racks)
+ .simulatedCapacities(capacities);
+ MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
+ cluster = new MiniDFSClusterWithNodeGroup(builder);
+ try {
+ cluster.waitActive();
+ client = NameNodeProxies.createProxy(conf,
+ cluster.getFileSystem(0).getUri(),
+ ClientProtocol.class).getProxy();
+
+ long totalCapacity = TestBalancer.sum(capacities);
+ // fill up the cluster to be 20% full
+ long totalUsedSpace = totalCapacity * 2 / 10;
+ TestBalancer.createFile(cluster, filePath, totalUsedSpace / (numOfDatanodes/2),
+ (short) (numOfDatanodes/2), 0);
+
+ long newCapacity = CAPACITY;
+ String newRack = RACK1;
+ String newNodeGroup = NODEGROUP2;
+ // start up an empty node with the same capacity and on NODEGROUP2
+ cluster.startDataNodes(conf, 1, true, null, new String[]{newRack},
+ new long[] {newCapacity}, new String[]{newNodeGroup});
+
+ totalCapacity += newCapacity;
+
+ // run balancer and validate results
+ runBalancer(conf, totalUsedSpace, totalCapacity);
+
+ } finally {
+ cluster.shutdown();
+ }
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
index d8efd3a..032c2c0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
@@ -21,6 +21,7 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -45,6 +46,8 @@
public class TestReplicationPolicyWithNodeGroup {
private static final int BLOCK_SIZE = 1024;
private static final int NUM_OF_DATANODES = 8;
+ private static final int NUM_OF_DATANODES_BOUNDARY = 6;
+ private static final int NUM_OF_DATANODES_MORE_TARGETS = 12;
private static final Configuration CONF = new HdfsConfiguration();
private static final NetworkTopology cluster;
private static final NameNode namenode;
@@ -61,6 +64,32 @@
DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3/n5"),
DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3/n6")
};
+
+ private final static DatanodeDescriptor dataNodesInBoundaryCase[] =
+ new DatanodeDescriptor[] {
+ DFSTestUtil.getDatanodeDescriptor("1.1.1.1", "/d1/r1/n1"),
+ DFSTestUtil.getDatanodeDescriptor("2.2.2.2", "/d1/r1/n1"),
+ DFSTestUtil.getDatanodeDescriptor("3.3.3.3", "/d1/r1/n1"),
+ DFSTestUtil.getDatanodeDescriptor("4.4.4.4", "/d1/r1/n2"),
+ DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2/n3"),
+ DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d1/r2/n3")
+ };
+
+ private final static DatanodeDescriptor dataNodesInMoreTargetsCase[] =
+ new DatanodeDescriptor[] {
+ DFSTestUtil.getDatanodeDescriptor("1.1.1.1", "/r1/n1"),
+ DFSTestUtil.getDatanodeDescriptor("2.2.2.2", "/r1/n1"),
+ DFSTestUtil.getDatanodeDescriptor("3.3.3.3", "/r1/n2"),
+ DFSTestUtil.getDatanodeDescriptor("4.4.4.4", "/r1/n2"),
+ DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/r1/n3"),
+ DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/r1/n3"),
+ DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/r2/n4"),
+ DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/r2/n4"),
+ DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/r2/n5"),
+ DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/r2/n5"),
+ DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/r2/n6"),
+ DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/r2/n6"),
+ };
private final static DatanodeDescriptor NODE =
new DatanodeDescriptor(DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d2/r4/n7"));
@@ -74,6 +103,12 @@
"org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyWithNodeGroup");
CONF.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
"org.apache.hadoop.net.NetworkTopologyWithNodeGroup");
+
+ File baseDir = new File(System.getProperty(
+ "test.build.data", "build/test/data"), "dfs/");
+ CONF.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
+ new File(baseDir, "name").getPath());
+
DFSTestUtil.formatNameNode(CONF);
namenode = new NameNode(CONF);
} catch (IOException e) {
@@ -97,7 +132,27 @@
2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
}
}
-
+
+ /**
+ * Scan the targets list: all targets should be on different NodeGroups.
+ * Return false if two targets are found on the same NodeGroup.
+ */
+ private static boolean checkTargetsOnDifferentNodeGroup(
+ DatanodeDescriptor[] targets) {
+ if(targets.length == 0)
+ return true;
+ Set<String> targetSet = new HashSet<String>();
+ for(DatanodeDescriptor node:targets) {
+ String nodeGroup = NetworkTopology.getLastHalf(node.getNetworkLocation());
+ if(targetSet.contains(nodeGroup)) {
+ return false;
+ } else {
+ targetSet.add(nodeGroup);
+ }
+ }
+ return true;
+ }
+
/**
* In this testcase, client is dataNodes[0]. So the 1st replica should be
* placed on dataNodes[0], the 2nd replica should be placed on
@@ -497,5 +552,122 @@
null, null, (short)1, first, second);
assertEquals(chosenNode, dataNodes[5]);
}
+
+ /**
+ * Test replica placement policy in case of boundary topology.
+ * Rack 2 has only 1 node group & can't be placed with two replicas
+ * The 1st replica will be placed on writer.
+ * The 2nd replica should be placed on a different rack
+ * The 3rd replica should be placed on the same rack with writer, but on a
+ * different node group.
+ */
+ @Test
+ public void testChooseTargetsOnBoundaryTopology() throws Exception {
+ for(int i=0; i<NUM_OF_DATANODES; i++) {
+ cluster.remove(dataNodes[i]);
+ }
+
+ for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+ cluster.add(dataNodesInBoundaryCase[i]);
+ }
+ for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+ dataNodes[0].updateHeartbeat(
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+ (HdfsConstants.MIN_BLOCKS_FOR_WRITE-1)*BLOCK_SIZE, 0L, 0, 0);
+
+ dataNodesInBoundaryCase[i].updateHeartbeat(
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
+ }
+
+ DatanodeDescriptor[] targets;
+ targets = replicator.chooseTarget(filename, 0, dataNodesInBoundaryCase[0],
+ new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+ assertEquals(targets.length, 0);
+
+ targets = replicator.chooseTarget(filename, 1, dataNodesInBoundaryCase[0],
+ new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+ assertEquals(targets.length, 1);
+
+ targets = replicator.chooseTarget(filename, 2, dataNodesInBoundaryCase[0],
+ new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+ assertEquals(targets.length, 2);
+ assertFalse(cluster.isOnSameRack(targets[0], targets[1]));
+
+ targets = replicator.chooseTarget(filename, 3, dataNodesInBoundaryCase[0],
+ new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+ assertEquals(targets.length, 3);
+ assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+ }
+
+ /**
+ * Test re-replication policy in boundary case.
+ * Rack 2 has only one node group & the node in this node group is chosen
+ * Rack 1 has two nodegroups & one of them is chosen.
+ * Replica policy should choose the node from node group of Rack1 but not the
+ * same nodegroup with chosen nodes.
+ */
+ @Test
+ public void testRereplicateOnBoundaryTopology() throws Exception {
+ for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+ dataNodesInBoundaryCase[i].updateHeartbeat(
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
+ }
+ List<DatanodeDescriptor> chosenNodes = new ArrayList<DatanodeDescriptor>();
+ chosenNodes.add(dataNodesInBoundaryCase[0]);
+ chosenNodes.add(dataNodesInBoundaryCase[5]);
+ DatanodeDescriptor[] targets;
+ targets = replicator.chooseTarget(filename, 1, dataNodesInBoundaryCase[0],
+ chosenNodes, BLOCK_SIZE);
+ assertFalse(cluster.isOnSameNodeGroup(targets[0],
+ dataNodesInBoundaryCase[0]));
+ assertFalse(cluster.isOnSameNodeGroup(targets[0],
+ dataNodesInBoundaryCase[5]));
+ assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+ }
+
+ /**
+ * Test replica placement policy in case of targets more than number of
+ * NodeGroups.
+ * The 12-nodes cluster only has 6 NodeGroups, but in some cases, like:
+ * placing submitted job file, there is requirement to choose more (10)
+ * targets for placing replica. We should test it can return 6 targets.
+ */
+ @Test
+ public void testChooseMoreTargetsThanNodeGroups() throws Exception {
+ // Cleanup nodes in previous tests
+ for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+ DatanodeDescriptor node = dataNodesInBoundaryCase[i];
+ if (cluster.contains(node)) {
+ cluster.remove(node);
+ }
+ }
+
+ for(int i=0; i<NUM_OF_DATANODES_MORE_TARGETS; i++) {
+ cluster.add(dataNodesInMoreTargetsCase[i]);
+ }
+
+ for(int i=0; i<NUM_OF_DATANODES_MORE_TARGETS; i++) {
+ dataNodesInMoreTargetsCase[i].updateHeartbeat(
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+ 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
+ }
+
+ DatanodeDescriptor[] targets;
+ // Test normal case -- 3 replicas
+ targets = replicator.chooseTarget(filename, 3, dataNodesInMoreTargetsCase[0],
+ new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+ assertEquals(targets.length, 3);
+ assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+
+ // Test special case -- replica number over node groups.
+ targets = replicator.chooseTarget(filename, 10, dataNodesInMoreTargetsCase[0],
+ new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+ assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+ // Verify it only can find 6 targets for placing replicas.
+ assertEquals(targets.length, 6);
+ }
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
index 8e01e6d..504e1ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -29,6 +29,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -41,7 +42,6 @@
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
-import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
@@ -123,7 +123,7 @@
Mockito.anyInt(),
Mockito.anyInt(),
Mockito.anyInt());
- mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(State.STANDBY, 0);
+ mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0);
return mock;
}
@@ -255,12 +255,12 @@
assertNull(bpos.getActiveNN());
// Have NN1 claim active at txid 1
- mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 1);
+ mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
bpos.triggerHeartbeatForTests();
assertSame(mockNN1, bpos.getActiveNN());
// NN2 claims active at a higher txid
- mockHaStatuses[1] = new NNHAStatusHeartbeat(State.ACTIVE, 2);
+ mockHaStatuses[1] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 2);
bpos.triggerHeartbeatForTests();
assertSame(mockNN2, bpos.getActiveNN());
@@ -272,12 +272,12 @@
// Even if NN2 goes to standby, DN shouldn't reset to talking to NN1,
// because NN1's txid is lower than the last active txid. Instead,
// it should consider neither active.
- mockHaStatuses[1] = new NNHAStatusHeartbeat(State.STANDBY, 2);
+ mockHaStatuses[1] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 2);
bpos.triggerHeartbeatForTests();
assertNull(bpos.getActiveNN());
// Now if NN1 goes back to a higher txid, it should be considered active
- mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 3);
+ mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 3);
bpos.triggerHeartbeatForTests();
assertSame(mockNN1, bpos.getActiveNN());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
index d496419..a400e85 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
@@ -49,6 +49,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -72,7 +73,6 @@
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
-import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
@@ -157,7 +157,7 @@
Mockito.anyInt()))
.thenReturn(new HeartbeatResponse(
new DatanodeCommand[0],
- new NNHAStatusHeartbeat(State.ACTIVE, 1)));
+ new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1)));
dn = new DataNode(conf, dirs, null) {
@Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index f310959..cf64c33 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -179,6 +179,13 @@
return spy;
}
+ public static JournalSet spyOnJournalSet(NameNode nn) {
+ FSEditLog editLog = nn.getFSImage().getEditLog();
+ JournalSet js = Mockito.spy(editLog.getJournalSet());
+ editLog.setJournalSetForTesting(js);
+ return js;
+ }
+
public static String getMkdirOpPath(FSEditLogOp op) {
if (op.opCode == FSEditLogOpCodes.OP_MKDIR) {
return ((MkdirOp) op).path;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java
new file mode 100644
index 0000000..3de27cb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.net.InetAddress;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Test;
+
+/**
+ * Tests for the {@link AuditLogger} custom audit logging interface.
+ */
+public class TestAuditLogger {
+
+ /**
+ * Tests that AuditLogger works as expected.
+ */
+ @Test
+ public void testAuditLogger() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+ conf.set(DFS_NAMENODE_AUDIT_LOGGERS_KEY,
+ DummyAuditLogger.class.getName());
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+
+ try {
+ cluster.waitClusterUp();
+ assertTrue(DummyAuditLogger.initialized);
+
+ FileSystem fs = cluster.getFileSystem();
+ long time = System.currentTimeMillis();
+ fs.setTimes(new Path("/"), time, time);
+ assertEquals(1, DummyAuditLogger.logCount);
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
+ /**
+ * Tests that a broken audit logger causes requests to fail.
+ */
+ @Test
+ public void testBrokenLogger() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+ conf.set(DFS_NAMENODE_AUDIT_LOGGERS_KEY,
+ BrokenAuditLogger.class.getName());
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+
+ try {
+ cluster.waitClusterUp();
+
+ FileSystem fs = cluster.getFileSystem();
+ long time = System.currentTimeMillis();
+ fs.setTimes(new Path("/"), time, time);
+ fail("Expected exception due to broken audit logger.");
+ } catch (RemoteException re) {
+ // Expected.
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
+ public static class DummyAuditLogger implements AuditLogger {
+
+ static boolean initialized;
+ static int logCount;
+
+ public void initialize(Configuration conf) {
+ initialized = true;
+ }
+
+ public void logAuditEvent(boolean succeeded, String userName,
+ InetAddress addr, String cmd, String src, String dst,
+ FileStatus stat) {
+ logCount++;
+ }
+
+ }
+
+ public static class BrokenAuditLogger implements AuditLogger {
+
+ public void initialize(Configuration conf) {
+ // No op.
+ }
+
+ public void logAuditEvent(boolean succeeded, String userName,
+ InetAddress addr, String cmd, String src, String dst,
+ FileStatus stat) {
+ throw new RuntimeException("uh oh");
+ }
+
+ }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
index 6ade5f2..2230648 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HAUtil;
@@ -103,6 +104,9 @@
BackupNode bn = (BackupNode)NameNode.createNameNode(
new String[]{startupOpt.getName()}, c);
assertTrue(bn.getRole() + " must be in SafeMode.", bn.isInSafeMode());
+ assertTrue(bn.getRole() + " must be in StandbyState",
+ bn.getNamesystem().getHAState()
+ .equalsIgnoreCase(HAServiceState.STANDBY.name()));
return bn;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
index 695490e..2174f94 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
@@ -25,10 +25,15 @@
import java.io.FileNotFoundException;
import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsNotDirectoryException;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.junit.Test;
@@ -157,6 +162,48 @@
}
+ /**
+ * FSDirectory#unprotectedSetQuota creates a new INodeDirectoryWithQuota to
+ * replace the original INodeDirectory. Before HDFS-4243, the parent field of
+ * all the children INodes of the target INodeDirectory is not changed to
+ * point to the new INodeDirectoryWithQuota. This testcase tests this
+ * scenario.
+ */
+ @Test
+ public void testGetFullPathNameAfterSetQuota() throws Exception {
+ long fileLen = 1024;
+ replication = 3;
+ Configuration conf = new Configuration();
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
+ replication).build();
+ cluster.waitActive();
+ FSNamesystem fsn = cluster.getNamesystem();
+ FSDirectory fsdir = fsn.getFSDirectory();
+ DistributedFileSystem dfs = cluster.getFileSystem();
+
+ // Create a file for test
+ final Path dir = new Path("/dir");
+ final Path file = new Path(dir, "file");
+ DFSTestUtil.createFile(dfs, file, fileLen, replication, 0L);
+
+ // Check the full path name of the INode associating with the file
+ INode fnode = fsdir.getINode(file.toString());
+ assertEquals(file.toString(), fnode.getFullPathName());
+
+ // Call FSDirectory#unprotectedSetQuota which calls
+ // INodeDirectory#replaceChild
+ dfs.setQuota(dir, Long.MAX_VALUE - 1, replication * fileLen * 10);
+ final Path newDir = new Path("/newdir");
+ final Path newFile = new Path(newDir, "file");
+ // Also rename dir
+ dfs.rename(dir, newDir, Options.Rename.OVERWRITE);
+ // /dir/file now should be renamed to /newdir/file
+ fnode = fsdir.getINode(newFile.toString());
+ // getFullPathName can return correct result only if the parent field of
+ // child node is set correctly
+ assertEquals(newFile.toString(), fnode.getFullPathName());
+ }
+
@Test
public void testAppendBlocks() {
INodeFile origFile = createINodeFiles(1, "origfile")[0];
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java
index 9befc49..c41a074 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java
@@ -92,7 +92,8 @@
DataInputStream in = new DataInputStream(fstream);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = reader.readLine();
- assertTrue(line.equals("3 files and directories, 2 blocks = 5 total"));
+ assertTrue(line.equals(
+ "3 files and directories, 2 blocks = 5 total filesystem objects"));
line = reader.readLine();
assertTrue(line.equals("Live Datanodes: 1"));
line = reader.readLine();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
index 8af6960..a852a69 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
@@ -603,6 +603,24 @@
}
}
+ @Test
+ public void testSaveNamespaceWithDanglingLease() throws Exception {
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
+ .numDataNodes(1).build();
+ cluster.waitActive();
+ DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
+ try {
+ cluster.getNamesystem().leaseManager.addLease("me", "/non-existent");
+ fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+ cluster.getNameNodeRpc().saveNamespace();
+ fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
// Make an edit
fsn.mkdirs(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 61016c9..c449aca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.JournalSet;
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@@ -66,6 +67,12 @@
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+
+ // Dial down the retention of extra edits and checkpoints. This is to
+ // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0);
+
conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
SlowCodec.class.getCanonicalName());
@@ -99,15 +106,20 @@
@Test
public void testSBNCheckpoints() throws Exception {
- doEdits(0, 10);
+ JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nn1);
+ doEdits(0, 10);
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// Once the standby catches up, it should notice that it needs to
// do a checkpoint and save one to its local directories.
- HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12));
+ HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
// It should also upload it back to the active.
- HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12));
+ HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
+
+ // The standby should never try to purge edit logs on shared storage.
+ Mockito.verify(standbyJournalSet, Mockito.never()).
+ purgeLogsOlderThan(Mockito.anyLong());
}
/**
@@ -129,8 +141,8 @@
// so the standby will catch up. Then, both will be in standby mode
// with enough uncheckpointed txns to cause a checkpoint, and they
// will each try to take a checkpoint and upload to each other.
- HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12));
- HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12));
+ HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+ HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
assertEquals(12, nn0.getNamesystem().getFSImage()
.getMostRecentCheckpointTxId());
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 39bb87a..7621a6e 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -605,6 +605,15 @@
MAPREDUCE-4764. repair TestBinaryTokenFile (Ivan A. Veselovsky via bobby)
+ MAPREDUCE-4825. JobImpl.finished doesn't expect ERROR as a final job state
+ (jlowe via bobby)
+
+ MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large
+ amounts of data (tgraves)
+
+ MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0
+ (Ravi Prakash via jeagles)
+
Release 0.23.5 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
index ab084cf..e9f890a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
@@ -274,7 +274,6 @@
@Override
public boolean ping(TaskAttemptID taskAttemptID) throws IOException {
LOG.info("Ping from " + taskAttemptID.toString());
- taskHeartbeatHandler.pinged(TypeConverter.toYarn(taskAttemptID));
return true;
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
index b83f663..153b5a8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
@@ -46,33 +46,22 @@
public class TaskHeartbeatHandler extends AbstractService {
private static class ReportTime {
- private long lastPing;
private long lastProgress;
public ReportTime(long time) {
setLastProgress(time);
}
- public synchronized void setLastPing(long time) {
- lastPing = time;
- }
-
public synchronized void setLastProgress(long time) {
lastProgress = time;
- lastPing = time;
}
-
- public synchronized long getLastPing() {
- return lastPing;
- }
-
+
public synchronized long getLastProgress() {
return lastProgress;
}
}
private static final Log LOG = LogFactory.getLog(TaskHeartbeatHandler.class);
- private static final int PING_TIMEOUT = 5 * 60 * 1000;
//thread which runs periodically to see the last time since a heartbeat is
//received from a task.
@@ -127,14 +116,6 @@
}
}
- public void pinged(TaskAttemptId attemptID) {
- //only put for the registered attempts
- //TODO throw an exception if the task isn't registered.
- ReportTime time = runningAttempts.get(attemptID);
- if(time != null) {
- time.setLastPing(clock.getTime());
- }
- }
public void register(TaskAttemptId attemptID) {
runningAttempts.put(attemptID, new ReportTime(clock.getTime()));
@@ -159,10 +140,8 @@
Map.Entry<TaskAttemptId, ReportTime> entry = iterator.next();
boolean taskTimedOut = (taskTimeOut > 0) &&
(currentTime > (entry.getValue().getLastProgress() + taskTimeOut));
- boolean pingTimedOut =
- (currentTime > (entry.getValue().getLastPing() + PING_TIMEOUT));
-
- if(taskTimedOut || pingTimedOut) {
+
+ if(taskTimedOut) {
// task is lost, remove from the list and raise lost event
iterator.remove();
eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(entry
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
index ad30c1e..e4000a6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -828,6 +828,7 @@
case KILLED:
metrics.killedJob(this);
break;
+ case ERROR:
case FAILED:
metrics.failedJob(this);
break;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java
index aab8b56..4098340 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java
@@ -59,11 +59,12 @@
TaskReport report = task.getReport();
this.startTime = report.getStartTime();
this.finishTime = report.getFinishTime();
- this.elapsedTime = Times.elapsed(this.startTime, this.finishTime, false);
+ this.state = report.getTaskState();
+ this.elapsedTime = Times.elapsed(this.startTime, this.finishTime,
+ this.state == TaskState.RUNNING);
if (this.elapsedTime == -1) {
this.elapsedTime = 0;
}
- this.state = report.getTaskState();
this.progress = report.getProgress() * 100;
this.id = MRApps.toString(task.getID());
this.taskNum = task.getID().getId();
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 1216a3e..6ba341a 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -115,6 +115,14 @@
MAPREDUCE-4778. Fair scheduler event log is only written if directory
exists on HDFS. (Sandy Ryza via tomwhite)
+ YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy)
+
+ YARN-187. Add hierarchical queues to the fair scheduler.
+ (Sandy Ryza via tomwhite)
+
+ YARN-72. NM should handle cleaning up containers when it shuts down.
+ (Sandy Ryza via tomwhite)
+
Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES
@@ -192,6 +200,12 @@
YARN-204. test coverage for org.apache.hadoop.tools (Aleksey Gorshkov via
bobby)
+ YARN-251. Proxy URI generation fails for blank tracking URIs (Tom White
+ via jlowe)
+
+ YARN-258. RM web page UI shows Invalid Date for start and finish times
+ (Ravi Prakash via jlowe)
+
Release 0.23.5 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
index 9f2e554..2cb6973 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
@@ -28,7 +28,7 @@
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.service.Service.STATE;
import org.junit.AfterClass;
@@ -46,7 +46,7 @@
@BeforeClass
public static void setUpResourceManager() throws IOException, InterruptedException {
conf = new YarnConfiguration();
- Store store = StoreFactory.getStore(conf);
+ RMStateStore store = StoreFactory.getStore(conf);
resourceManager = new ResourceManager(store) {
@Override
protected void doSecureLogin() throws IOException {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index cbe63fd..9203626 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -228,16 +228,7 @@
/** The class to use as the persistent store.*/
public static final String RM_STORE = RM_PREFIX + "store.class";
-
- /** The address of the zookeeper instance to use with ZK store.*/
- public static final String RM_ZK_STORE_ADDRESS =
- RM_PREFIX + "zookeeper-store.address";
-
- /** The zookeeper session timeout for the zookeeper store.*/
- public static final String RM_ZK_STORE_TIMEOUT_MS =
- RM_PREFIX + "zookeeper-store.session.timeout-ms";
- public static final int DEFAULT_RM_ZK_STORE_TIMEOUT_MS = 60000;
-
+
/** The maximum number of completed applications RM keeps. */
public static final String RM_MAX_COMPLETED_APPLICATIONS =
RM_PREFIX + "max-completed-applications";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java
index da334eb..115338e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java
@@ -28,17 +28,6 @@
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.HTML;
public class JQueryUI extends HtmlBlock {
- // Render choices (mostly for dataTables)
- public enum Render {
- /** small (<~100 rows) table as html, most gracefully degradable */
- HTML,
- /** medium (<~2000 rows) table as js array */
- JS_ARRAY,
- /** large (<~10000 rows) table loading from server */
- JS_LOAD,
- /** huge (>~10000 rows) table processing from server */
- JS_SERVER
- };
// UI params
public static final String ACCORDION = "ui.accordion";
@@ -197,12 +186,4 @@
append("sPaginationType: 'full_numbers', iDisplayLength:20, ").
append("aLengthMenu:[20, 40, 60, 80, 100]");
}
-
- public static StringBuilder tableInitProgress(StringBuilder init,
- long numCells) {
- return init.append(", bProcessing:true, ").
- append("oLanguage:{sProcessing:'Processing ").
- append(numCells).append(" cells...").
- append("<p><img src=\"/static/busy.gif\">'}");
- }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java
deleted file mode 100644
index 8e17940..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.webapp.view;
-
-import java.io.PrintWriter;
-
-import static org.apache.hadoop.yarn.util.StringHelper.*;
-import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*;
-
-/**
- * JSON helpers
- */
-public class Jsons {
- public static final String _SEP = "\",\"";
-
- public static PrintWriter appendProgressBar(PrintWriter out, String pct) {
- return out.append("<br title='").append(pct).append("'>").
- append("<div class='").append(C_PROGRESSBAR).
- append("' title='").append(pct).append('%').
- append("'><div class='").append(C_PROGRESSBAR_VALUE).
- append("' style='width: ").append(pct).
- append("%'>").append("<\\/div><\\/div>");
- }
-
- public static PrintWriter appendProgressBar(PrintWriter out,
- float progress) {
- return appendProgressBar(out, String.format("%.1f", progress));
- }
-
- public static PrintWriter appendSortable(PrintWriter out, Object value) {
- return out.append("<br title='").append(String.valueOf(value)).append("'>");
- }
-
- public static PrintWriter appendLink(PrintWriter out, Object anchor,
- String prefix, String... parts) {
- String anchorText = String.valueOf(anchor);
- return out.append("<a href='").append(anchor == null ? "#" :
- ujoin(prefix, parts)).append("'>").append(anchorText).append("<\\/a>");
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index adcf8d5..2b49417 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -210,17 +210,6 @@
</property>
<property>
- <description>The address of the zookeeper instance to use with ZK store.</description>
- <name>yarn.resourcemanager.zookeeper-store.address</name>
- </property>
-
- <property>
- <description>The zookeeper session timeout for the zookeeper store.</description>
- <name>yarn.resourcemanager.zookeeper-store.session.timeout-ms</name>
- <value>60000</value>
- </property>
-
- <property>
<description>The maximum number of completed applications RM keeps. </description>
<name>yarn.resourcemanager.max-completed-applications</name>
<value>10000</value>
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
index 28e9632..675b605 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
@@ -25,13 +25,23 @@
public class CMgrCompletedContainersEvent extends ContainerManagerEvent {
private List<ContainerId> containerToCleanup;
-
- public CMgrCompletedContainersEvent(List<ContainerId> containersToCleanup) {
+ private Reason reason;
+
+ public CMgrCompletedContainersEvent(List<ContainerId> containersToCleanup, Reason reason) {
super(ContainerManagerEventType.FINISH_CONTAINERS);
this.containerToCleanup = containersToCleanup;
+ this.reason = reason;
}
public List<ContainerId> getContainersToCleanup() {
return this.containerToCleanup;
}
+
+ public Reason getReason() {
+ return reason;
+ }
+
+ public static enum Reason {
+ ON_SHUTDOWN, BY_RESOURCEMANAGER
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index cbb3d29..51b81f2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -19,6 +19,9 @@
package org.apache.hadoop.yarn.server.nodemanager;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
@@ -61,14 +64,24 @@
* Priority of the NodeManager shutdown hook.
*/
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
+
+ /**
+ * Extra duration to wait for containers to be killed on shutdown.
+ */
+ private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000;
private static final Log LOG = LogFactory.getLog(NodeManager.class);
protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
private ApplicationACLsManager aclsManager;
private NodeHealthCheckerService nodeHealthChecker;
private LocalDirsHandlerService dirsHandler;
+ private Context context;
+ private AsyncDispatcher dispatcher;
+ private ContainerManagerImpl containerManager;
private static CompositeServiceShutdownHook nodeManagerShutdownHook;
+ private long waitForContainersOnShutdownMillis;
+
public NodeManager() {
super(NodeManager.class.getName());
}
@@ -115,7 +128,7 @@
containerTokenSecretManager = new NMContainerTokenSecretManager(conf);
}
- Context context = new NMContext(containerTokenSecretManager);
+ this.context = new NMContext(containerTokenSecretManager);
this.aclsManager = new ApplicationACLsManager(conf);
@@ -131,7 +144,7 @@
addService(del);
// NodeManager level dispatcher
- AsyncDispatcher dispatcher = new AsyncDispatcher();
+ this.dispatcher = new AsyncDispatcher();
nodeHealthChecker = new NodeHealthCheckerService();
addService(nodeHealthChecker);
@@ -144,7 +157,7 @@
NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
addService(nodeResourceMonitor);
- ContainerManagerImpl containerManager =
+ containerManager =
createContainerManager(context, exec, del, nodeStatusUpdater,
this.aclsManager, dirsHandler);
addService(containerManager);
@@ -155,13 +168,20 @@
dispatcher.register(ContainerManagerEventType.class, containerManager);
addService(dispatcher);
-
+
DefaultMetricsSystem.initialize("NodeManager");
// StatusUpdater should be added last so that it get started last
// so that we make sure everything is up before registering with RM.
addService(nodeStatusUpdater);
-
+
+ waitForContainersOnShutdownMillis =
+ conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
+ YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) +
+ conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS,
+ YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) +
+ SHUTDOWN_CLEANUP_SLOP_MS;
+
super.init(conf);
// TODO add local dirs to del
}
@@ -178,9 +198,44 @@
@Override
public void stop() {
+ cleanupContainers();
super.stop();
DefaultMetricsSystem.shutdown();
}
+
+ @SuppressWarnings("unchecked")
+ private void cleanupContainers() {
+ Map<ContainerId, Container> containers = context.getContainers();
+ if (containers.isEmpty()) {
+ return;
+ }
+ LOG.info("Containers still running on shutdown: " + containers.keySet());
+
+ List<ContainerId> containerIds = new ArrayList<ContainerId>(containers.keySet());
+ dispatcher.getEventHandler().handle(
+ new CMgrCompletedContainersEvent(containerIds,
+ CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN));
+
+ LOG.info("Waiting for containers to be killed");
+
+ long waitStartTime = System.currentTimeMillis();
+ while (!containers.isEmpty() &&
+ System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException ex) {
+ LOG.warn("Interrupted while sleeping on container kill", ex);
+ }
+ }
+
+ // All containers killed
+ if (containers.isEmpty()) {
+ LOG.info("All containers in DONE state");
+ } else {
+ LOG.info("Done waiting for containers to be killed. Still alive: " +
+ containers.keySet());
+ }
+ }
public static class NMContext implements Context {
@@ -282,6 +337,11 @@
NodeManager createNewNodeManager() {
return new NodeManager();
}
+
+ // For testing
+ ContainerManagerImpl getContainerManager() {
+ return containerManager;
+ }
public static void main(String[] args) {
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 819e22d..22ec10a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -363,7 +363,8 @@
.getContainersToCleanupList();
if (containersToCleanup.size() != 0) {
dispatcher.getEventHandler().handle(
- new CMgrCompletedContainersEvent(containersToCleanup));
+ new CMgrCompletedContainersEvent(containersToCleanup,
+ CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER));
}
List<ApplicationId> appsToCleanup =
response.getApplicationsToCleanupList();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index 7ca6a2c..b4a0034 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -23,6 +23,8 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -593,9 +595,16 @@
(CMgrCompletedContainersEvent) event;
for (ContainerId container : containersFinishedEvent
.getContainersToCleanup()) {
+ String diagnostic = "";
+ if (containersFinishedEvent.getReason() ==
+ CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) {
+ diagnostic = "Container Killed on Shutdown";
+ } else if (containersFinishedEvent.getReason() ==
+ CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) {
+ diagnostic = "Container Killed by ResourceManager";
+ }
this.dispatcher.getEventHandler().handle(
- new ContainerKillEvent(container,
- "Container Killed by ResourceManager"));
+ new ContainerKillEvent(container, diagnostic));
}
break;
default:
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java
new file mode 100644
index 0000000..e2ea821
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java
@@ -0,0 +1,92 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.ResourceTracker;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
+import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.records.NodeStatus;
+import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+
+/**
+ * This class allows a node manager to run without without communicating with a
+ * real RM.
+ */
+public class MockNodeStatusUpdater extends NodeStatusUpdaterImpl {
+ static final Log LOG = LogFactory.getLog(MockNodeStatusUpdater.class);
+
+ private static final RecordFactory recordFactory = RecordFactoryProvider
+ .getRecordFactory(null);
+
+ private ResourceTracker resourceTracker;
+
+ public MockNodeStatusUpdater(Context context, Dispatcher dispatcher,
+ NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
+ super(context, dispatcher, healthChecker, metrics);
+ resourceTracker = new MockResourceTracker();
+ }
+
+ @Override
+ protected ResourceTracker getRMClient() {
+ return resourceTracker;
+ }
+
+ private static class MockResourceTracker implements ResourceTracker {
+ private int heartBeatID;
+
+ @Override
+ public RegisterNodeManagerResponse registerNodeManager(
+ RegisterNodeManagerRequest request) throws YarnRemoteException {
+ RegistrationResponse regResponse = recordFactory
+ .newRecordInstance(RegistrationResponse.class);
+
+ RegisterNodeManagerResponse response = recordFactory
+ .newRecordInstance(RegisterNodeManagerResponse.class);
+ response.setRegistrationResponse(regResponse);
+ return response;
+ }
+
+ @Override
+ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
+ throws YarnRemoteException {
+ NodeStatus nodeStatus = request.getNodeStatus();
+ LOG.info("Got heartbeat number " + heartBeatID);
+ nodeStatus.setResponseId(heartBeatID++);
+
+ HeartbeatResponse response = recordFactory
+ .newRecordInstance(HeartbeatResponse.class);
+ response.setResponseId(heartBeatID);
+
+ NodeHeartbeatResponse nhResponse = recordFactory
+ .newRecordInstance(NodeHeartbeatResponse.class);
+ nhResponse.setHeartbeatResponse(response);
+ return nhResponse;
+ }
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
new file mode 100644
index 0000000..c5b4881
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
@@ -0,0 +1,222 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNodeManagerShutdown {
+ static final File basedir =
+ new File("target", TestNodeManagerShutdown.class.getName());
+ static final File tmpDir = new File(basedir, "tmpDir");
+ static final File logsDir = new File(basedir, "logs");
+ static final File remoteLogsDir = new File(basedir, "remotelogs");
+ static final File nmLocalDir = new File(basedir, "nm0");
+ static final File processStartFile = new File(tmpDir, "start_file.txt")
+ .getAbsoluteFile();
+
+ static final RecordFactory recordFactory = RecordFactoryProvider
+ .getRecordFactory(null);
+ static final String user = "nobody";
+ private FileContext localFS;
+
+ @Before
+ public void setup() throws UnsupportedFileSystemException {
+ localFS = FileContext.getLocalFSFileContext();
+ tmpDir.mkdirs();
+ logsDir.mkdirs();
+ remoteLogsDir.mkdirs();
+ nmLocalDir.mkdirs();
+ }
+
+ @After
+ public void tearDown() throws IOException, InterruptedException {
+ localFS.delete(new Path(basedir.getPath()), true);
+ }
+
+ @Test
+ public void testKillContainersOnShutdown() throws IOException {
+ NodeManager nm = getNodeManager();
+ nm.init(createNMConfig());
+ nm.start();
+
+ ContainerManagerImpl containerManager = nm.getContainerManager();
+ File scriptFile = createUnhaltingScriptFile();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ // Construct the Container-id
+ ContainerId cId = createContainerId();
+ containerLaunchContext.setContainerId(cId);
+
+ containerLaunchContext.setUser(user);
+
+ URL localResourceUri =
+ ConverterUtils.getYarnUrlFromPath(localFS
+ .makeQualified(new Path(scriptFile.getAbsolutePath())));
+ LocalResource localResource =
+ recordFactory.newRecordInstance(LocalResource.class);
+ localResource.setResource(localResourceUri);
+ localResource.setSize(-1);
+ localResource.setVisibility(LocalResourceVisibility.APPLICATION);
+ localResource.setType(LocalResourceType.FILE);
+ localResource.setTimestamp(scriptFile.lastModified());
+ String destinationFile = "dest_file";
+ Map<String, LocalResource> localResources =
+ new HashMap<String, LocalResource>();
+ localResources.put(destinationFile, localResource);
+ containerLaunchContext.setLocalResources(localResources);
+ containerLaunchContext.setUser(containerLaunchContext.getUser());
+ List<String> commands = new ArrayList<String>();
+ commands.add("/bin/bash");
+ commands.add(scriptFile.getAbsolutePath());
+ containerLaunchContext.setCommands(commands);
+ containerLaunchContext.setResource(recordFactory
+ .newRecordInstance(Resource.class));
+ containerLaunchContext.getResource().setMemory(1024);
+ StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
+ startRequest.setContainerLaunchContext(containerLaunchContext);
+ containerManager.startContainer(startRequest);
+
+ GetContainerStatusRequest request =
+ recordFactory.newRecordInstance(GetContainerStatusRequest.class);
+ request.setContainerId(cId);
+ ContainerStatus containerStatus =
+ containerManager.getContainerStatus(request).getStatus();
+ Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState());
+
+ try {Thread.sleep(5000);} catch (InterruptedException ex) {ex.printStackTrace();}
+
+ nm.stop();
+
+ // Now verify the contents of the file
+ // Script generates a message when it receives a sigterm
+ // so we look for that
+ BufferedReader reader =
+ new BufferedReader(new FileReader(processStartFile));
+
+ boolean foundSigTermMessage = false;
+ while (true) {
+ String line = reader.readLine();
+ if (line == null) {
+ break;
+ }
+ if (line.contains("SIGTERM")) {
+ foundSigTermMessage = true;
+ break;
+ }
+ }
+ Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
+ reader.close();
+ }
+
+ private ContainerId createContainerId() {
+ ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
+ appId.setClusterTimestamp(0);
+ appId.setId(0);
+ ApplicationAttemptId appAttemptId =
+ recordFactory.newRecordInstance(ApplicationAttemptId.class);
+ appAttemptId.setApplicationId(appId);
+ appAttemptId.setAttemptId(1);
+ ContainerId containerId =
+ recordFactory.newRecordInstance(ContainerId.class);
+ containerId.setApplicationAttemptId(appAttemptId);
+ return containerId;
+ }
+
+ private YarnConfiguration createNMConfig() {
+ YarnConfiguration conf = new YarnConfiguration();
+ conf.setInt(YarnConfiguration.NM_PMEM_MB, 5*1024); // 5GB
+ conf.set(YarnConfiguration.NM_ADDRESS, "127.0.0.1:12345");
+ conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "127.0.0.1:12346");
+ conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath());
+ conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath());
+ conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath());
+ return conf;
+ }
+
+ /**
+ * Creates a script to run a container that will run forever unless
+ * stopped by external means.
+ */
+ private File createUnhaltingScriptFile() throws IOException {
+ File scriptFile = new File(tmpDir, "scriptFile.sh");
+ BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile));
+ fileWriter.write("#!/bin/bash\n\n");
+ fileWriter.write("echo \"Running testscript for delayed kill\"\n");
+ fileWriter.write("hello=\"Got SIGTERM\"\n");
+ fileWriter.write("umask 0\n");
+ fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n");
+ fileWriter.write("echo \"Writing pid to start file\"\n");
+ fileWriter.write("echo $$ >> " + processStartFile + "\n");
+ fileWriter.write("while true; do\nsleep 1s;\ndone\n");
+
+ fileWriter.close();
+ return scriptFile;
+ }
+
+ private NodeManager getNodeManager() {
+ return new NodeManager() {
+ @Override
+ protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+ Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+ MockNodeStatusUpdater myNodeStatusUpdater = new MockNodeStatusUpdater(
+ context, dispatcher, healthChecker, metrics);
+ return myNodeStatusUpdater;
+ }
+ };
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index dbc28a8..4475392 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -36,7 +36,6 @@
import org.apache.hadoop.yarn.ipc.RPCUtil;
import org.apache.hadoop.yarn.security.client.ClientTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@@ -251,17 +250,12 @@
YarnConfiguration.DEFAULT_APPLICATION_NAME);
}
- // Store application for recovery
- ApplicationStore appStore = rmContext.getApplicationsStore()
- .createApplicationStore(submissionContext.getApplicationId(),
- submissionContext);
-
// Create RMApp
application =
new RMAppImpl(applicationId, rmContext, this.conf,
submissionContext.getApplicationName(),
submissionContext.getUser(), submissionContext.getQueue(),
- submissionContext, clientTokenStr, appStore, this.scheduler,
+ submissionContext, clientTokenStr, this.scheduler,
this.masterService, submitTime);
// Sanity check - duplicate?
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
index ef81d9e..ccb3deb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
@@ -23,8 +23,6 @@
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.NodeStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
@@ -41,10 +39,6 @@
Dispatcher getDispatcher();
- NodeStore getNodeStore();
-
- ApplicationsStore getApplicationsStore();
-
ConcurrentMap<ApplicationId, RMApp> getRMApps();
ConcurrentMap<String, RMNode> getInactiveRMNodes();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
index d92b1a4..632b6c34 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
@@ -24,9 +24,6 @@
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.NodeStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
@@ -39,7 +36,6 @@
public class RMContextImpl implements RMContext {
private final Dispatcher rmDispatcher;
- private final Store store;
private final ConcurrentMap<ApplicationId, RMApp> applications
= new ConcurrentHashMap<ApplicationId, RMApp>();
@@ -58,7 +54,7 @@
private final RMContainerTokenSecretManager containerTokenSecretManager;
private final ClientToAMTokenSecretManagerInRM clientToAMTokenSecretManager;
- public RMContextImpl(Store store, Dispatcher rmDispatcher,
+ public RMContextImpl(Dispatcher rmDispatcher,
ContainerAllocationExpirer containerAllocationExpirer,
AMLivelinessMonitor amLivelinessMonitor,
AMLivelinessMonitor amFinishingMonitor,
@@ -66,7 +62,6 @@
ApplicationTokenSecretManager appTokenSecretManager,
RMContainerTokenSecretManager containerTokenSecretManager,
ClientToAMTokenSecretManagerInRM clientTokenSecretManager) {
- this.store = store;
this.rmDispatcher = rmDispatcher;
this.containerAllocationExpirer = containerAllocationExpirer;
this.amLivelinessMonitor = amLivelinessMonitor;
@@ -83,16 +78,6 @@
}
@Override
- public NodeStore getNodeStore() {
- return store;
- }
-
- @Override
- public ApplicationsStore getApplicationsStore() {
- return store;
- }
-
- @Override
public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
return this.applications;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 2b5f73c..c7d2b26 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -46,8 +46,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@@ -119,12 +119,12 @@
protected RMDelegationTokenSecretManager rmDTSecretManager;
private WebApp webApp;
protected RMContext rmContext;
- private final Store store;
+ private final RMStateStore store;
protected ResourceTrackerService resourceTracker;
private Configuration conf;
- public ResourceManager(Store store) {
+ public ResourceManager(RMStateStore store) {
super("ResourceManager");
this.store = store;
}
@@ -161,7 +161,7 @@
this.containerTokenSecretManager = createContainerTokenSecretManager(conf);
this.rmContext =
- new RMContextImpl(this.store, this.rmDispatcher,
+ new RMContextImpl(this.rmDispatcher,
this.containerAllocationExpirer, amLivelinessMonitor,
amFinishingMonitor, tokenRenewer, this.appTokenSecretManager,
this.containerTokenSecretManager, this.clientToAMSecretManager);
@@ -643,8 +643,6 @@
@Override
public void recover(RMState state) throws Exception {
- resourceTracker.recover(state);
- scheduler.recover(state);
}
public static void main(String argv[]) {
@@ -652,14 +650,13 @@
StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
try {
Configuration conf = new YarnConfiguration();
- Store store = StoreFactory.getStore(conf);
+ RMStateStore store = StoreFactory.getStore(conf);
ResourceManager resourceManager = new ResourceManager(store);
ShutdownHookManager.get().addShutdownHook(
new CompositeServiceShutdownHook(resourceManager),
SHUTDOWN_HOOK_PRIORITY);
resourceManager.init(conf);
//resourceManager.recover(store.restore());
- //store.doneWithRecovery();
resourceManager.start();
} catch (Throwable t) {
LOG.fatal("Error starting ResourceManager", t);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
index ed4a021..7888b10 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
@@ -44,7 +44,7 @@
import org.apache.hadoop.yarn.server.api.records.NodeAction;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
@@ -297,28 +297,6 @@
return nodeHeartBeatResponse;
}
- public void recover(RMState state) {
-//
-// List<RMNode> nodeManagers = state.getStoredNodeManagers();
-// for (RMNode nm : nodeManagers) {
-// createNewNode(nm.getNodeID(), nm.getNodeHostName(), nm
-// .getCommandPort(), nm.getHttpPort(), nm.getNode(), nm
-// .getTotalCapability());
-// }
-// for (Map.Entry<ApplicationId, ApplicationInfo> entry : state
-// .getStoredApplications().entrySet()) {
-// List<Container> containers = entry.getValue().getContainers();
-// List<Container> containersToAdd = new ArrayList<Container>();
-// for (Container c : containers) {
-// RMNode containerNode = this.rmContext.getNodesCollection()
-// .getNodeInfo(c.getNodeId());
-// containersToAdd.add(c);
-// containerNode.allocateContainer(entry.getKey(), containersToAdd);
-// containersToAdd.clear();
-// }
-// }
- }
-
/**
* resolving the network topology.
* @param hostName the hostname of this node.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ApplicationsStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ApplicationsStore.java
deleted file mode 100644
index 1160c88..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ApplicationsStore.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-
-import java.io.IOException;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-
-public interface ApplicationsStore {
- public ApplicationStore createApplicationStore(ApplicationId applicationId,
- ApplicationSubmissionContext context) throws IOException;
- public void removeApplication(ApplicationId application) throws IOException;
-
- public interface ApplicationStore {
- public void storeContainer(Container container) throws IOException;
- public void removeContainer(Container container) throws IOException;
- public void storeMasterContainer(Container container) throws IOException;
- public void updateApplicationState(ApplicationMaster master) throws IOException;
- public boolean isLoggable();
- }
-}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java
similarity index 69%
copy from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java
copy to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java
index cc11eec..d1d0ee6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java
@@ -15,18 +15,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-import java.io.IOException;
+public class FileRMStateStore implements RMStateStore {
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
-
-public interface NodeStore {
- public void storeNode(RMNode node) throws IOException;
- public void removeNode(RMNode node) throws IOException;
- public NodeId getNextNodeId() throws IOException;
- public boolean isLoggable();
-}
\ No newline at end of file
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemStore.java
deleted file mode 100644
index 8c23f30..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemStore.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
-public class MemStore implements Store {
- RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
- private NodeId nodeId;
- private boolean doneWithRecovery = false;
-
- public MemStore() {
- nodeId = recordFactory.newRecordInstance(NodeId.class);
- nodeId.setHost("TODO");
- nodeId.setPort(-1);
- }
-
- @Override
- public void storeNode(RMNode node) throws IOException {}
-
- @Override
- public void removeNode(RMNode node) throws IOException {}
-
- private class ApplicationStoreImpl implements ApplicationStore {
- @Override
- public void storeContainer(Container container) throws IOException {}
-
- @Override
- public void removeContainer(Container container) throws IOException {}
-
- @Override
- public void storeMasterContainer(Container container) throws IOException {}
-
- @Override
- public void updateApplicationState(
- ApplicationMaster master) throws IOException {}
-
- @Override
- public boolean isLoggable() {
- return doneWithRecovery;
- }
-
- }
-
- @Override
- public ApplicationStore createApplicationStore(ApplicationId application,
- ApplicationSubmissionContext context) throws IOException {
- return new ApplicationStoreImpl();
- }
-
-
- @Override
- public void removeApplication(ApplicationId application) throws IOException {}
-
- @Override
- public RMState restore() throws IOException {
- MemRMState state = new MemRMState();
- return state;
- }
-
- @Override
- public synchronized NodeId getNextNodeId() throws IOException {
- // TODO: FIXMEVinodkv
-// int num = nodeId.getId();
-// num++;
-// nodeId.setId(num);
- return nodeId;
- }
-
- private class MemRMState implements RMState {
-
- public MemRMState() {
- nodeId = recordFactory.newRecordInstance(NodeId.class);
- }
-
- @Override
- public List<RMNode> getStoredNodeManagers() {
- return new ArrayList<RMNode>();
- }
-
- @Override
- public NodeId getLastLoggedNodeId() {
- return nodeId;
- }
-
- @Override
- public Map<ApplicationId, ApplicationInfo> getStoredApplications() {
- return new HashMap<ApplicationId, Store.ApplicationInfo>();
- }
- }
-
- @Override
- public boolean isLoggable() {
- return doneWithRecovery;
- }
-
- @Override
- public void doneWithRecovery() {
- doneWithRecovery = true;
- }
-}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
similarity index 69%
rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java
rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index cc11eec..4e1e41e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -15,18 +15,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-import java.io.IOException;
-
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
-
-public interface NodeStore {
- public void storeNode(RMNode node) throws IOException;
- public void removeNode(RMNode node) throws IOException;
- public NodeId getNextNodeId() throws IOException;
- public boolean isLoggable();
+public interface RMStateStore {
+ public interface RMState {
+
+ }
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Recoverable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Recoverable.java
index 2dccf59..23ca92e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Recoverable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Recoverable.java
@@ -17,7 +17,7 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
public interface Recoverable {
public void recover(RMState state) throws Exception;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Store.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Store.java
deleted file mode 100644
index b784c71..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Store.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
-
-public interface Store extends NodeStore, ApplicationsStore {
- public interface ApplicationInfo {
- public ApplicationMaster getApplicationMaster();
- public Container getMasterContainer();
- public ApplicationSubmissionContext getApplicationSubmissionContext();
- public List<Container> getContainers();
- }
- public interface RMState {
- public List<RMNode> getStoredNodeManagers() ;
- public Map<ApplicationId, ApplicationInfo> getStoredApplications();
- public NodeId getLastLoggedNodeId();
- }
- public RMState restore() throws IOException;
- public void doneWithRecovery();
-}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java
index 91b33ca..b314989 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java
@@ -17,53 +17,17 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-import java.io.IOException;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
public class StoreFactory {
- public static Store getStore(Configuration conf) {
- Store store = ReflectionUtils.newInstance(
+ public static RMStateStore getStore(Configuration conf) {
+ RMStateStore store = ReflectionUtils.newInstance(
conf.getClass(YarnConfiguration.RM_STORE,
- MemStore.class, Store.class),
+ FileRMStateStore.class, RMStateStore.class),
conf);
return store;
}
-
- public static ApplicationStore createVoidAppStore() {
- return new VoidApplicationStore();
- }
-
- private static class VoidApplicationStore implements ApplicationStore {
-
- public VoidApplicationStore() {}
-
- @Override
- public void storeContainer(Container container) throws IOException {
- }
-
- @Override
- public void removeContainer(Container container) throws IOException {
- }
-
- @Override
- public void storeMasterContainer(Container container) throws IOException {
- }
-
- @Override
- public void updateApplicationState(ApplicationMaster master)
- throws IOException {
- }
-
- @Override
- public boolean isLoggable() {
- return false;
- }
- }
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java
deleted file mode 100644
index f2e401a..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.NodeReport;
-import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationMasterPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.NodeReportPBImpl;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationMasterProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.apache.zookeeper.CreateMode;
-import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.WatchedEvent;
-import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
-import org.apache.zookeeper.data.Stat;
-
-public class ZKStore implements Store {
- private final Configuration conf;
- private final ZooKeeper zkClient;
- private static final Log LOG = LogFactory.getLog(ZKStore.class);
- private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
- private static final String NODES = "nodes/";
- private static final String APPS = "apps/";
- private static final String ZK_PATH_SEPARATOR = "/";
- private static final String NODE_ID = "nodeid";
- private static final String APP_MASTER = "master";
- private static final String APP_MASTER_CONTAINER = "mastercontainer";
- private final String ZK_ADDRESS;
- private final int ZK_TIMEOUT;
- private boolean doneWithRecovery = false;
-
- /** TODO make this generic **/
- private NodeIdPBImpl nodeId = new NodeIdPBImpl();
-
- /**
- * TODO fix this for later to handle all kinds of events
- * of connection and session events.
- *
- */
- private static class ZKWatcher implements Watcher {
- @Override
- public void process(WatchedEvent arg0) {
- }
- }
-
- public ZKStore(Configuration conf) throws IOException {
- this.conf = conf;
- this.ZK_ADDRESS = conf.get(YarnConfiguration.RM_ZK_STORE_ADDRESS);
- this.ZK_TIMEOUT = conf.getInt(YarnConfiguration.RM_ZK_STORE_TIMEOUT_MS,
- YarnConfiguration.DEFAULT_RM_ZK_STORE_TIMEOUT_MS);
- zkClient = new ZooKeeper(this.ZK_ADDRESS,
- this.ZK_TIMEOUT,
- createZKWatcher()
- );
- // TODO: FIXMEVinodkv
-// this.nodeId.setId(0);
- }
-
- protected Watcher createZKWatcher() {
- return new ZKWatcher();
- }
-
- private NodeReportPBImpl createNodeManagerInfo(RMNode rmNode) {
- NodeReport node =
- recordFactory.newRecordInstance(NodeReport.class);
- node.setNodeId(rmNode.getNodeID());
- node.setRackName(rmNode.getRackName());
- node.setCapability(rmNode.getTotalCapability());
- // TODO: FIXME
-// node.setUsed(nodeInfo.getUsedResource());
- // TODO: acm: refactor2 FIXME
-// node.setNumContainers(rmNode.getNumContainers());
- return (NodeReportPBImpl)node;
- }
-
- @Override
- public synchronized void storeNode(RMNode node) throws IOException {
- /** create a storage node and store it in zk **/
- if (!doneWithRecovery) return;
- // TODO: FIXMEVinodkv
-// NodeReportPBImpl nodeManagerInfo = createNodeManagerInfo(node);
-// byte[] bytes = nodeManagerInfo.getProto().toByteArray();
-// try {
-// zkClient.create(NODES + Integer.toString(node.getNodeID().getId()), bytes, null,
-// CreateMode.PERSISTENT);
-// } catch(InterruptedException ie) {
-// LOG.info("Interrupted", ie);
-// throw new InterruptedIOException("Interrupted");
-// } catch(KeeperException ke) {
-// LOG.info("Keeper exception", ke);
-// throw convertToIOException(ke);
-// }
- }
-
- @Override
- public synchronized void removeNode(RMNode node) throws IOException {
- if (!doneWithRecovery) return;
-
-// TODO: FIXME VINODKV
-// /** remove a storage node **/
-// try {
-// zkClient.delete(NODES + Integer.toString(node.getNodeID().getId()), -1);
-// } catch(InterruptedException ie) {
-// LOG.info("Interrupted", ie);
-// throw new InterruptedIOException("Interrupted");
-// } catch(KeeperException ke) {
-// LOG.info("Keeper exception", ke);
-// throw convertToIOException(ke);
-// }
-
- }
-
- private static IOException convertToIOException(KeeperException ke) {
- IOException io = new IOException();
- io.setStackTrace(ke.getStackTrace());
- return io;
- }
-
- @Override
- public synchronized NodeId getNextNodeId() throws IOException {
-// TODO: FIXME VINODKV
-// int num = nodeId.getId();
-// num++;
-// nodeId.setId(num);
-// try {
-// zkClient.setData(NODES + NODE_ID, nodeId.getProto().toByteArray() , -1);
-// } catch(InterruptedException ie) {
-// LOG.info("Interrupted", ie);
-// throw new InterruptedIOException(ie.getMessage());
-// } catch(KeeperException ke) {
-// throw convertToIOException(ke);
-// }
- return nodeId;
- }
-
- private String containerPathFromContainerId(ContainerId containerId) {
- String appString = ConverterUtils.toString(
- containerId.getApplicationAttemptId().getApplicationId());
- return appString + "/" + containerId.getId();
- }
-
- private class ZKApplicationStore implements ApplicationStore {
- private final ApplicationId applicationId;
-
- public ZKApplicationStore(ApplicationId applicationId) {
- this.applicationId = applicationId;
- }
-
- @Override
- public void storeMasterContainer(Container container) throws IOException {
- if (!doneWithRecovery) return;
-
- ContainerPBImpl containerPBImpl = (ContainerPBImpl) container;
- try {
- zkClient.setData(APPS +
- ConverterUtils.toString(
- container.getId().getApplicationAttemptId().getApplicationId())
- +
- ZK_PATH_SEPARATOR + APP_MASTER_CONTAINER
- , containerPBImpl.getProto().toByteArray(), -1);
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper exception", ke);
- throw convertToIOException(ke);
- }
- }
- @Override
- public synchronized void storeContainer(Container container) throws IOException {
- if (!doneWithRecovery) return;
-
- ContainerPBImpl containerPBImpl = (ContainerPBImpl) container;
- try {
- zkClient.create(APPS + containerPathFromContainerId(container.getId())
- , containerPBImpl.getProto().toByteArray(), null, CreateMode.PERSISTENT);
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper exception", ke);
- throw convertToIOException(ke);
- }
- }
-
- @Override
- public synchronized void removeContainer(Container container) throws IOException {
- if (!doneWithRecovery) return;
- try {
- zkClient.delete(APPS + containerPathFromContainerId(container.getId()),
- -1);
- } catch(InterruptedException ie) {
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper exception", ke);
- throw convertToIOException(ke);
- }
- }
-
- @Override
- public void updateApplicationState(
- ApplicationMaster master) throws IOException {
- if (!doneWithRecovery) return;
-
- String appString = APPS + ConverterUtils.toString(applicationId);
- ApplicationMasterPBImpl masterPBImpl = (ApplicationMasterPBImpl) master;
- try {
- zkClient.setData(appString, masterPBImpl.getProto().toByteArray(), -1);
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper exception", ke);
- throw convertToIOException(ke);
- }
- }
-
- @Override
- public boolean isLoggable() {
- return doneWithRecovery;
- }
- }
-
- @Override
- public synchronized ApplicationStore createApplicationStore(ApplicationId application,
- ApplicationSubmissionContext context) throws IOException {
- if (!doneWithRecovery) return new ZKApplicationStore(application);
-
- ApplicationSubmissionContextPBImpl contextPBImpl = (ApplicationSubmissionContextPBImpl) context;
- String appString = APPS + ConverterUtils.toString(application);
-
- ApplicationMasterPBImpl masterPBImpl = new ApplicationMasterPBImpl();
- ContainerPBImpl container = new ContainerPBImpl();
- try {
- zkClient.create(appString, contextPBImpl.getProto()
- .toByteArray(), null, CreateMode.PERSISTENT);
- zkClient.create(appString + ZK_PATH_SEPARATOR + APP_MASTER,
- masterPBImpl.getProto().toByteArray(), null, CreateMode.PERSISTENT);
- zkClient.create(appString + ZK_PATH_SEPARATOR + APP_MASTER_CONTAINER,
- container.getProto().toByteArray(), null, CreateMode.PERSISTENT);
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper exception", ke);
- throw convertToIOException(ke);
- }
- return new ZKApplicationStore(application);
- }
-
- @Override
- public synchronized void removeApplication(ApplicationId application) throws IOException {
- if (!doneWithRecovery) return;
-
- try {
- zkClient.delete(APPS + ConverterUtils.toString(application), -1);
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper Exception", ke);
- throw convertToIOException(ke);
- }
- }
-
- @Override
- public boolean isLoggable() {
- return doneWithRecovery;
- }
-
- @Override
- public void doneWithRecovery() {
- this.doneWithRecovery = true;
- }
-
-
- @Override
- public synchronized RMState restore() throws IOException {
- ZKRMState rmState = new ZKRMState();
- rmState.load();
- return rmState;
- }
-
- private static class ApplicationInfoImpl implements ApplicationInfo {
- private ApplicationMaster master;
- private Container masterContainer;
-
- private final ApplicationSubmissionContext context;
- private final List<Container> containers = new ArrayList<Container>();
-
- public ApplicationInfoImpl(ApplicationSubmissionContext context) {
- this.context = context;
- }
-
- public void setApplicationMaster(ApplicationMaster master) {
- this.master = master;
- }
-
- public void setMasterContainer(Container container) {
- this.masterContainer = container;
- }
-
- @Override
- public ApplicationMaster getApplicationMaster() {
- return this.master;
- }
-
- @Override
- public ApplicationSubmissionContext getApplicationSubmissionContext() {
- return this.context;
- }
-
- @Override
- public Container getMasterContainer() {
- return this.masterContainer;
- }
-
- @Override
- public List<Container> getContainers() {
- return this.containers;
- }
-
- public void addContainer(Container container) {
- containers.add(container);
- }
- }
-
- private class ZKRMState implements RMState {
- private List<RMNode> nodeManagers = new ArrayList<RMNode>();
- private Map<ApplicationId, ApplicationInfo> applications = new
- HashMap<ApplicationId, ApplicationInfo>();
-
- public ZKRMState() {
- LOG.info("Restoring RM state from ZK");
- }
-
- private synchronized List<NodeReport> listStoredNodes() throws IOException {
- /** get the list of nodes stored in zk **/
- //TODO PB
- List<NodeReport> nodes = new ArrayList<NodeReport>();
- Stat stat = new Stat();
- try {
- List<String> children = zkClient.getChildren(NODES, false);
- for (String child: children) {
- byte[] data = zkClient.getData(NODES + child, false, stat);
- NodeReportPBImpl nmImpl = new NodeReportPBImpl(
- NodeReportProto.parseFrom(data));
- nodes.add(nmImpl);
- }
- } catch (InterruptedException ie) {
- LOG.info("Interrupted" , ie);
- throw new InterruptedIOException("Interrupted");
- } catch(KeeperException ke) {
- LOG.error("Failed to list nodes", ke);
- throw convertToIOException(ke);
- }
- return nodes;
- }
-
- @Override
- public List<RMNode> getStoredNodeManagers() {
- return nodeManagers;
- }
-
- @Override
- public NodeId getLastLoggedNodeId() {
- return nodeId;
- }
-
- private void readLastNodeId() throws IOException {
- Stat stat = new Stat();
- try {
- byte[] data = zkClient.getData(NODES + NODE_ID, false, stat);
- nodeId = new NodeIdPBImpl(NodeIdProto.parseFrom(data));
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- LOG.info("Keeper Exception", ke);
- throw convertToIOException(ke);
- }
- }
-
- private ApplicationInfo getAppInfo(String app) throws IOException {
- ApplicationInfoImpl info = null;
- Stat stat = new Stat();
- try {
- ApplicationSubmissionContext context = null;
- byte[] data = zkClient.getData(APPS + app, false, stat);
- context = new ApplicationSubmissionContextPBImpl(
- ApplicationSubmissionContextProto.parseFrom(data));
- info = new ApplicationInfoImpl(context);
- List<String> children = zkClient.getChildren(APPS + app, false, stat);
- ApplicationMaster master = null;
- for (String child: children) {
- byte[] childdata = zkClient.getData(APPS + app + ZK_PATH_SEPARATOR + child, false, stat);
- if (APP_MASTER.equals(child)) {
- master = new ApplicationMasterPBImpl(ApplicationMasterProto.parseFrom(childdata));
- info.setApplicationMaster(master);
- } else if (APP_MASTER_CONTAINER.equals(child)) {
- Container masterContainer = new ContainerPBImpl(ContainerProto.parseFrom(data));
- info.setMasterContainer(masterContainer);
- } else {
- Container container = new ContainerPBImpl(ContainerProto.parseFrom(data));
- info.addContainer(container);
- }
- }
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- throw convertToIOException(ke);
- }
- return info;
- }
-
- private void load() throws IOException {
- List<NodeReport> nodeInfos = listStoredNodes();
- final Pattern trackerPattern = Pattern.compile(".*:.*");
- final Matcher m = trackerPattern.matcher("");
- for (NodeReport node: nodeInfos) {
- m.reset(node.getNodeId().getHost());
- if (!m.find()) {
- LOG.info("Skipping node, bad node-address "
- + node.getNodeId().getHost());
- continue;
- }
- String hostName = m.group(0);
- int cmPort = Integer.valueOf(m.group(1));
- m.reset(node.getHttpAddress());
- if (!m.find()) {
- LOG.info("Skipping node, bad http-address " + node.getHttpAddress());
- continue;
- }
- int httpPort = Integer.valueOf(m.group(1));
- // TODO: FindBugs warns passing null below. Commenting this for later.
-// RMNode nm = new RMNodeImpl(node.getNodeId(), null,
-// hostName, cmPort, httpPort,
-// ResourceTrackerService.resolve(node.getNodeId().getHost()),
-// node.getCapability());
-// nodeManagers.add(nm);
- }
- readLastNodeId();
- /* make sure we get all the applications */
- List<String> apps = null;
- try {
- apps = zkClient.getChildren(APPS, false);
- } catch(InterruptedException ie) {
- LOG.info("Interrupted", ie);
- throw new InterruptedIOException(ie.getMessage());
- } catch(KeeperException ke) {
- throw convertToIOException(ke);
- }
- for (String app: apps) {
- ApplicationInfo info = getAppInfo(app);
- applications.put(info.getApplicationMaster().getApplicationId(), info);
- }
- }
-
- @Override
- public Map<ApplicationId, ApplicationInfo> getStoredApplications() {
- return applications;
- }
- }
-}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java
new file mode 100644
index 0000000..5e5a07e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+package org.apache.hadoop.yarn.server.resourcemanager.recovery;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
index 63ff844..2533d46 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
@@ -29,7 +29,6 @@
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -132,13 +131,6 @@
int pullRMNodeUpdates(Collection<RMNode> updatedNodes);
/**
- * Application level metadata is stored in {@link ApplicationStore} which
- * can persist the information.
- * @return the {@link ApplicationStore} for this {@link RMApp}.
- */
- ApplicationStore getApplicationStore();
-
- /**
* The finish time of the {@link RMApp}
* @return the finish time of the application.,
*/
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index f3fe054..0317a3c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -49,7 +49,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNodeUpdateEvent.RMAppNodeUpdateType;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
@@ -81,7 +80,6 @@
private final String name;
private final ApplicationSubmissionContext submissionContext;
private final String clientTokenStr;
- private final ApplicationStore appStore;
private final Dispatcher dispatcher;
private final YarnScheduler scheduler;
private final ApplicationMasterService masterService;
@@ -213,7 +211,6 @@
public RMAppImpl(ApplicationId applicationId, RMContext rmContext,
Configuration config, String name, String user, String queue,
ApplicationSubmissionContext submissionContext, String clientTokenStr,
- ApplicationStore appStore,
YarnScheduler scheduler, ApplicationMasterService masterService,
long submitTime) {
@@ -227,7 +224,6 @@
this.queue = queue;
this.submissionContext = submissionContext;
this.clientTokenStr = clientTokenStr;
- this.appStore = appStore;
this.scheduler = scheduler;
this.masterService = masterService;
this.submitTime = submitTime;
@@ -340,11 +336,6 @@
}
}
- @Override
- public ApplicationStore getApplicationStore() {
- return this.appStore;
- }
-
private YarnApplicationState createApplicationState(RMAppState rmAppState) {
switch(rmAppState) {
case NEW:
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index eebbecb..da8a6ed 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -33,6 +33,7 @@
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
+import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -399,7 +400,7 @@
final String trackingUriWithoutScheme) {
this.readLock.lock();
try {
- URI trackingUri = trackingUriWithoutScheme == null ? null :
+ URI trackingUri = StringUtils.isEmpty(trackingUriWithoutScheme) ? null :
ProxyUriUtils.getUriFromAMUrl(trackingUriWithoutScheme);
String proxy = YarnConfiguration.getProxyHostAndPort(conf);
URI proxyUri = ProxyUriUtils.getUriFromAMUrl(proxy);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
index 7c44748..452cb9b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
@@ -36,7 +36,6 @@
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -70,14 +69,12 @@
boolean pending = true; // for app metrics
public AppSchedulingInfo(ApplicationAttemptId appAttemptId,
- String user, Queue queue, ActiveUsersManager activeUsersManager,
- ApplicationStore store) {
+ String user, Queue queue, ActiveUsersManager activeUsersManager) {
this.applicationAttemptId = appAttemptId;
this.applicationId = appAttemptId.getApplicationId();
this.queue = queue;
this.queueName = queue.getQueueName();
this.user = user;
- //this.store = store;
this.activeUsersManager = activeUsersManager;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index 4534b27..2bac243 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -49,7 +49,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
@@ -365,7 +365,7 @@
// TODO: Fix store
FiCaSchedulerApp SchedulerApp =
new FiCaSchedulerApp(applicationAttemptId, user, queue,
- queue.getActiveUsersManager(), rmContext, null);
+ queue.getActiveUsersManager(), rmContext);
// Submit to the queue
try {
@@ -767,18 +767,7 @@
@Override
@Lock(Lock.NoLock.class)
public void recover(RMState state) throws Exception {
- // TODO: VINDOKVFIXME recovery
-// applications.clear();
-// for (Map.Entry<ApplicationId, ApplicationInfo> entry : state.getStoredApplications().entrySet()) {
-// ApplicationId appId = entry.getKey();
-// ApplicationInfo appInfo = entry.getValue();
-// SchedulerApp app = applications.get(appId);
-// app.allocate(appInfo.getContainers());
-// for (Container c: entry.getValue().getContainers()) {
-// Queue queue = queues.get(appInfo.getApplicationSubmissionContext().getQueue());
-// queue.recoverContainer(clusterResource, applications.get(appId), c);
-// }
-// }
+ // NOT IMPLEMENTED
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
index e53ca82..317f2a2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
@@ -43,7 +43,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@@ -111,11 +110,11 @@
private final RMContext rmContext;
public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId,
String user, Queue queue, ActiveUsersManager activeUsersManager,
- RMContext rmContext, ApplicationStore store) {
+ RMContext rmContext) {
this.rmContext = rmContext;
this.appSchedulingInfo =
new AppSchedulingInfo(applicationAttemptId, user, queue,
- activeUsersManager, store);
+ activeUsersManager);
this.queue = queue;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
index acad730..52ac0a2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
@@ -50,10 +50,10 @@
private long startTime;
private static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
private static final Log LOG = LogFactory.getLog(AppSchedulable.class);
- private FSQueue queue;
+ private FSLeafQueue queue;
private RMContainerTokenSecretManager containerTokenSecretManager;
- public AppSchedulable(FairScheduler scheduler, FSSchedulerApp app, FSQueue queue) {
+ public AppSchedulable(FairScheduler scheduler, FSSchedulerApp app, FSLeafQueue queue) {
this.scheduler = scheduler;
this.app = app;
this.startTime = System.currentTimeMillis();
@@ -97,9 +97,6 @@
}
@Override
- public void redistributeShare() {}
-
- @Override
public Resource getResourceUsage() {
return app.getCurrentConsumption();
}
@@ -114,7 +111,7 @@
* Get metrics reference from containing queue.
*/
public QueueMetrics getMetrics() {
- return queue.getQueueSchedulable().getMetrics();
+ return queue.getMetrics();
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
new file mode 100644
index 0000000..8b3e134
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.AccessControlList;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+
+public class FSLeafQueue extends FSQueue {
+ private static final Log LOG = LogFactory.getLog(
+ FSLeafQueue.class.getName());
+
+ private final List<AppSchedulable> appScheds =
+ new ArrayList<AppSchedulable>();
+
+ /** Scheduling mode for jobs inside the queue (fair or FIFO) */
+ private SchedulingMode schedulingMode;
+
+ private final FairScheduler scheduler;
+ private final QueueManager queueMgr;
+ private Resource demand = Resources.createResource(0);
+
+ // Variables used for preemption
+ private long lastTimeAtMinShare;
+ private long lastTimeAtHalfFairShare;
+
+ public FSLeafQueue(String name, QueueManager queueMgr, FairScheduler scheduler,
+ FSParentQueue parent) {
+ super(name, queueMgr, scheduler, parent);
+ this.scheduler = scheduler;
+ this.queueMgr = queueMgr;
+ this.lastTimeAtMinShare = scheduler.getClock().getTime();
+ this.lastTimeAtHalfFairShare = scheduler.getClock().getTime();
+ }
+
+ public void addApp(FSSchedulerApp app) {
+ AppSchedulable appSchedulable = new AppSchedulable(scheduler, app, this);
+ app.setAppSchedulable(appSchedulable);
+ appScheds.add(appSchedulable);
+ }
+
+ // for testing
+ void addAppSchedulable(AppSchedulable appSched) {
+ appScheds.add(appSched);
+ }
+
+ public void removeApp(FSSchedulerApp app) {
+ for (Iterator<AppSchedulable> it = appScheds.iterator(); it.hasNext();) {
+ AppSchedulable appSched = it.next();
+ if (appSched.getApp() == app) {
+ it.remove();
+ break;
+ }
+ }
+ }
+
+ public Collection<AppSchedulable> getAppSchedulables() {
+ return appScheds;
+ }
+
+ public void setSchedulingMode(SchedulingMode mode) {
+ this.schedulingMode = mode;
+ }
+
+ @Override
+ public void recomputeFairShares() {
+ if (schedulingMode == SchedulingMode.FAIR) {
+ SchedulingAlgorithms.computeFairShares(appScheds, getFairShare());
+ } else {
+ for (AppSchedulable sched: appScheds) {
+ sched.setFairShare(Resources.createResource(0));
+ }
+ }
+ }
+
+ @Override
+ public Resource getDemand() {
+ return demand;
+ }
+
+ @Override
+ public Resource getResourceUsage() {
+ Resource usage = Resources.createResource(0);
+ for (AppSchedulable app : appScheds) {
+ Resources.addTo(usage, app.getResourceUsage());
+ }
+ return usage;
+ }
+
+ @Override
+ public void updateDemand() {
+ // Compute demand by iterating through apps in the queue
+ // Limit demand to maxResources
+ Resource maxRes = queueMgr.getMaxResources(getName());
+ demand = Resources.createResource(0);
+ for (AppSchedulable sched : appScheds) {
+ sched.updateDemand();
+ Resource toAdd = sched.getDemand();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Counting resource from " + sched.getName() + " " + toAdd
+ + "; Total resource consumption for " + getName() + " now "
+ + demand);
+ }
+ demand = Resources.add(demand, toAdd);
+ if (Resources.greaterThanOrEqual(demand, maxRes)) {
+ demand = maxRes;
+ break;
+ }
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("The updated demand for " + getName() + " is " + demand
+ + "; the max is " + maxRes);
+ }
+ }
+
+ @Override
+ public Resource assignContainer(FSSchedulerNode node, boolean reserved) {
+ LOG.debug("Node offered to queue: " + getName() + " reserved: " + reserved);
+ // If this queue is over its limit, reject
+ if (Resources.greaterThan(getResourceUsage(),
+ queueMgr.getMaxResources(getName()))) {
+ return Resources.none();
+ }
+
+ // If this node already has reserved resources for an app, first try to
+ // finish allocating resources for that app.
+ if (reserved) {
+ for (AppSchedulable sched : appScheds) {
+ if (sched.getApp().getApplicationAttemptId() ==
+ node.getReservedContainer().getApplicationAttemptId()) {
+ return sched.assignContainer(node, reserved);
+ }
+ }
+ return Resources.none(); // We should never get here
+ }
+
+ // Otherwise, chose app to schedule based on given policy (fair vs fifo).
+ else {
+ Comparator<Schedulable> comparator;
+ if (schedulingMode == SchedulingMode.FIFO) {
+ comparator = new SchedulingAlgorithms.FifoComparator();
+ } else if (schedulingMode == SchedulingMode.FAIR) {
+ comparator = new SchedulingAlgorithms.FairShareComparator();
+ } else {
+ throw new RuntimeException("Unsupported queue scheduling mode " +
+ schedulingMode);
+ }
+
+ Collections.sort(appScheds, comparator);
+ for (AppSchedulable sched: appScheds) {
+ return sched.assignContainer(node, reserved);
+ }
+
+ return Resources.none();
+ }
+ }
+
+ @Override
+ public Collection<FSQueue> getChildQueues() {
+ return new ArrayList<FSQueue>(1);
+ }
+
+ @Override
+ public List<QueueUserACLInfo> getQueueUserAclInfo(UserGroupInformation user) {
+ QueueUserACLInfo userAclInfo =
+ recordFactory.newRecordInstance(QueueUserACLInfo.class);
+ List<QueueACL> operations = new ArrayList<QueueACL>();
+ for (QueueACL operation : QueueACL.values()) {
+ Map<QueueACL, AccessControlList> acls = queueMgr.getQueueAcls(getName());
+ if (acls.get(operation).isUserAllowed(user)) {
+ operations.add(operation);
+ }
+ }
+
+ userAclInfo.setQueueName(getQueueName());
+ userAclInfo.setUserAcls(operations);
+ return Collections.singletonList(userAclInfo);
+ }
+
+ public long getLastTimeAtMinShare() {
+ return lastTimeAtMinShare;
+ }
+
+ public void setLastTimeAtMinShare(long lastTimeAtMinShare) {
+ this.lastTimeAtMinShare = lastTimeAtMinShare;
+ }
+
+ public long getLastTimeAtHalfFairShare() {
+ return lastTimeAtHalfFairShare;
+ }
+
+ public void setLastTimeAtHalfFairShare(long lastTimeAtHalfFairShare) {
+ this.lastTimeAtHalfFairShare = lastTimeAtHalfFairShare;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java
new file mode 100644
index 0000000..4ae6b36
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+
+public class FSParentQueue extends FSQueue {
+ private static final Log LOG = LogFactory.getLog(
+ FSParentQueue.class.getName());
+
+
+ private final List<FSQueue> childQueues =
+ new ArrayList<FSQueue>();
+ private final QueueManager queueMgr;
+ private Resource demand = Resources.createResource(0);
+
+ public FSParentQueue(String name, QueueManager queueMgr, FairScheduler scheduler,
+ FSParentQueue parent) {
+ super(name, queueMgr, scheduler, parent);
+ this.queueMgr = queueMgr;
+ }
+
+ public void addChildQueue(FSQueue child) {
+ childQueues.add(child);
+ }
+
+ @Override
+ public void recomputeFairShares() {
+ SchedulingAlgorithms.computeFairShares(childQueues, getFairShare());
+ for (FSQueue childQueue : childQueues) {
+ childQueue.getMetrics().setAvailableResourcesToQueue(childQueue.getFairShare());
+ childQueue.recomputeFairShares();
+ }
+ }
+
+ @Override
+ public Resource getDemand() {
+ return demand;
+ }
+
+ @Override
+ public Resource getResourceUsage() {
+ Resource usage = Resources.createResource(0);
+ for (FSQueue child : childQueues) {
+ Resources.addTo(usage, child.getResourceUsage());
+ }
+ return usage;
+ }
+
+ @Override
+ public void updateDemand() {
+ // Compute demand by iterating through apps in the queue
+ // Limit demand to maxResources
+ Resource maxRes = queueMgr.getMaxResources(getName());
+ demand = Resources.createResource(0);
+ for (FSQueue childQueue : childQueues) {
+ childQueue.updateDemand();
+ Resource toAdd = childQueue.getDemand();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Counting resource from " + childQueue.getName() + " " +
+ toAdd + "; Total resource consumption for " + getName() +
+ " now " + demand);
+ }
+ demand = Resources.add(demand, toAdd);
+ if (Resources.greaterThanOrEqual(demand, maxRes)) {
+ demand = maxRes;
+ break;
+ }
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("The updated demand for " + getName() + " is " + demand +
+ "; the max is " + maxRes);
+ }
+ }
+
+ public boolean hasAccess(QueueACL acl, UserGroupInformation user) {
+ synchronized (this) {
+ if (getQueueAcls().get(acl).isUserAllowed(user)) {
+ return true;
+ }
+ }
+
+ if (parent != null) {
+ return parent.hasAccess(acl, user);
+ }
+
+ return false;
+ }
+
+ private synchronized QueueUserACLInfo getUserAclInfo(
+ UserGroupInformation user) {
+ QueueUserACLInfo userAclInfo =
+ recordFactory.newRecordInstance(QueueUserACLInfo.class);
+ List<QueueACL> operations = new ArrayList<QueueACL>();
+ for (QueueACL operation : QueueACL.values()) {
+ if (hasAccess(operation, user)) {
+ operations.add(operation);
+ }
+ }
+
+ userAclInfo.setQueueName(getQueueName());
+ userAclInfo.setUserAcls(operations);
+ return userAclInfo;
+ }
+
+ @Override
+ public synchronized List<QueueUserACLInfo> getQueueUserAclInfo(
+ UserGroupInformation user) {
+ List<QueueUserACLInfo> userAcls = new ArrayList<QueueUserACLInfo>();
+
+ // Add queue acls
+ userAcls.add(getUserAclInfo(user));
+
+ // Add children queue acls
+ for (FSQueue child : childQueues) {
+ userAcls.addAll(child.getQueueUserAclInfo(user));
+ }
+
+ return userAcls;
+ }
+
+ @Override
+ public Resource assignContainer(FSSchedulerNode node, boolean reserved) {
+ throw new IllegalStateException(
+ "Parent queue should not be assigned container");
+ }
+
+ @Override
+ public Collection<FSQueue> getChildQueues() {
+ return childQueues;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java
index 79395b0..0a85cbc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java
@@ -20,65 +20,112 @@
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.security.authorize.AccessControlList;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.api.records.QueueState;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
-/**
- * A queue containing several applications.
- */
-@Private
-@Unstable
-public class FSQueue {
- /** Queue name. */
- private String name;
-
- /** Applications in this specific queue; does not include children queues' jobs. */
- private Collection<FSSchedulerApp> applications =
- new ArrayList<FSSchedulerApp>();
-
- /** Scheduling mode for jobs inside the queue (fair or FIFO) */
- private SchedulingMode schedulingMode;
-
- private FairScheduler scheduler;
-
- private FSQueueSchedulable queueSchedulable;
-
- public FSQueue(FairScheduler scheduler, String name) {
+public abstract class FSQueue extends Schedulable implements Queue {
+ private final String name;
+ private final QueueManager queueMgr;
+ private final FairScheduler scheduler;
+ private final QueueMetrics metrics;
+
+ protected final FSParentQueue parent;
+ protected final RecordFactory recordFactory =
+ RecordFactoryProvider.getRecordFactory(null);
+
+ public FSQueue(String name, QueueManager queueMgr,
+ FairScheduler scheduler, FSParentQueue parent) {
this.name = name;
- this.queueSchedulable = new FSQueueSchedulable(scheduler, this);
+ this.queueMgr = queueMgr;
this.scheduler = scheduler;
+ this.metrics = QueueMetrics.forQueue(getName(), parent, true, scheduler.getConf());
+ this.parent = parent;
}
-
- public Collection<FSSchedulerApp> getApplications() {
- return applications;
- }
-
- public void addApp(FSSchedulerApp app) {
- applications.add(app);
- AppSchedulable appSchedulable = new AppSchedulable(scheduler, app, this);
- app.setAppSchedulable(appSchedulable);
- queueSchedulable.addApp(appSchedulable);
- }
-
- public void removeApp(FSSchedulerApp app) {
- applications.remove(app);
- queueSchedulable.removeApp(app);
- }
-
+
public String getName() {
return name;
}
-
- public SchedulingMode getSchedulingMode() {
- return schedulingMode;
+
+ @Override
+ public String getQueueName() {
+ return name;
+ }
+
+ @Override
+ public double getWeight() {
+ return queueMgr.getQueueWeight(getName());
+ }
+
+ @Override
+ public Resource getMinShare() {
+ return queueMgr.getMinResources(getName());
}
- public void setSchedulingMode(SchedulingMode schedulingMode) {
- this.schedulingMode = schedulingMode;
+ @Override
+ public long getStartTime() {
+ return 0;
}
- public FSQueueSchedulable getQueueSchedulable() {
- return queueSchedulable;
+ @Override
+ public Priority getPriority() {
+ Priority p = recordFactory.newRecordInstance(Priority.class);
+ p.setPriority(1);
+ return p;
}
+
+ @Override
+ public QueueInfo getQueueInfo(boolean includeChildQueues, boolean recursive) {
+ QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
+ queueInfo.setQueueName(getQueueName());
+ // TODO: we might change these queue metrics around a little bit
+ // to match the semantics of the fair scheduler.
+ queueInfo.setCapacity((float) getFairShare().getMemory() /
+ scheduler.getClusterCapacity().getMemory());
+ queueInfo.setCapacity((float) getResourceUsage().getMemory() /
+ scheduler.getClusterCapacity().getMemory());
+
+ ArrayList<QueueInfo> childQueueInfos = new ArrayList<QueueInfo>();
+ if (includeChildQueues) {
+ Collection<FSQueue> childQueues = getChildQueues();
+ for (FSQueue child : childQueues) {
+ childQueueInfos.add(child.getQueueInfo(recursive, recursive));
+ }
+ }
+ queueInfo.setChildQueues(childQueueInfos);
+ queueInfo.setQueueState(QueueState.RUNNING);
+ return queueInfo;
+ }
+
+ @Override
+ public Map<QueueACL, AccessControlList> getQueueAcls() {
+ Map<QueueACL, AccessControlList> acls = queueMgr.getQueueAcls(getName());
+ return new HashMap<QueueACL, AccessControlList>(acls);
+ }
+
+ @Override
+ public QueueMetrics getMetrics() {
+ return metrics;
+ }
+
+ /**
+ * Recomputes the fair shares for all queues and applications
+ * under this queue.
+ */
+ public abstract void recomputeFairShares();
+
+ /**
+ * Gets the children of this queue, if any.
+ */
+ public abstract Collection<FSQueue> getChildQueues();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueSchedulable.java
deleted file mode 100644
index ccac112..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueSchedulable.java
+++ /dev/null
@@ -1,302 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.authorize.AccessControlList;
-import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.QueueACL;
-import org.apache.hadoop.yarn.api.records.QueueInfo;
-import org.apache.hadoop.yarn.api.records.QueueState;
-import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
-
-@Private
-@Unstable
-public class FSQueueSchedulable extends Schedulable implements Queue {
- public static final Log LOG = LogFactory.getLog(
- FSQueueSchedulable.class.getName());
-
- private FairScheduler scheduler;
- private FSQueue queue;
- private QueueManager queueMgr;
- private List<AppSchedulable> appScheds = new LinkedList<AppSchedulable>();
- private Resource demand = Resources.createResource(0);
- private QueueMetrics metrics;
- private RecordFactory recordFactory =
- RecordFactoryProvider.getRecordFactory(null);
-
- // Variables used for preemption
- long lastTimeAtMinShare;
- long lastTimeAtHalfFairShare;
-
- // Constructor for tests
- protected FSQueueSchedulable(FairScheduler scheduler, FSQueue fsQueue,
- QueueManager qMgr, QueueMetrics metrics, long minShare, long fairShare) {
- this.scheduler = scheduler;
- this.queueMgr = qMgr;
- this.queue = fsQueue;
- this.metrics = metrics;
- this.lastTimeAtMinShare = minShare;
- this.lastTimeAtHalfFairShare = fairShare;
- }
-
- public FSQueueSchedulable(FairScheduler scheduler, FSQueue queue) {
- this.scheduler = scheduler;
- this.queue = queue;
- this.queueMgr = scheduler.getQueueManager();
- this.metrics = QueueMetrics.forQueue(getName(), null, true, scheduler.getConf());
- this.lastTimeAtMinShare = scheduler.getClock().getTime();
- this.lastTimeAtHalfFairShare = scheduler.getClock().getTime();
- }
-
- public void addApp(AppSchedulable app) {
- appScheds.add(app);
- }
-
- public void removeApp(FSSchedulerApp app) {
- for (Iterator<AppSchedulable> it = appScheds.iterator(); it.hasNext();) {
- AppSchedulable appSched = it.next();
- if (appSched.getApp() == app) {
- it.remove();
- break;
- }
- }
- }
-
- /**
- * Update demand by asking apps in the queue to update
- */
- @Override
- public void updateDemand() {
- // Compute demand by iterating through apps in the queue
- // Limit demand to maxResources
- Resource maxRes = queueMgr.getMaxResources(queue.getName());
- demand = Resources.createResource(0);
- for (AppSchedulable sched: appScheds) {
- sched.updateDemand();
- Resource toAdd = sched.getDemand();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Counting resource from " + sched.getName() + " " + toAdd
- + "; Total resource consumption for " + getName() + " now "
- + demand);
- }
- demand = Resources.add(demand, toAdd);
- if (Resources.greaterThanOrEqual(demand, maxRes)) {
- demand = maxRes;
- break;
- }
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("The updated demand for " + getName() + " is " + demand
- + "; the max is " + maxRes);
- }
- }
-
- /**
- * Distribute the queue's fair share among its jobs
- */
- @Override
- public void redistributeShare() {
- if (queue.getSchedulingMode() == SchedulingMode.FAIR) {
- SchedulingAlgorithms.computeFairShares(appScheds, getFairShare());
- } else {
- for (AppSchedulable sched: appScheds) {
- sched.setFairShare(Resources.createResource(0));
- }
- }
- }
-
- @Override
- public Resource getDemand() {
- return demand;
- }
-
- @Override
- public Resource getMinShare() {
- return queueMgr.getMinResources(queue.getName());
- }
-
- @Override
- public double getWeight() {
- return queueMgr.getQueueWeight(queue.getName());
- }
-
- @Override
- public long getStartTime() {
- return 0;
- }
-
- @Override
- public Resource assignContainer(FSSchedulerNode node, boolean reserved) {
- LOG.debug("Node offered to queue: " + getName() + " reserved: " + reserved);
- // If this queue is over its limit, reject
- if (Resources.greaterThan(getResourceUsage(),
- queueMgr.getMaxResources(queue.getName()))) {
- return Resources.none();
- }
-
- // If this node already has reserved resources for an app, first try to
- // finish allocating resources for that app.
- if (reserved) {
- for (AppSchedulable sched : appScheds) {
- if (sched.getApp().getApplicationAttemptId() ==
- node.getReservedContainer().getApplicationAttemptId()) {
- return sched.assignContainer(node, reserved);
- }
- }
- return Resources.none(); // We should never get here
- }
-
- // Otherwise, chose app to schedule based on given policy (fair vs fifo).
- else {
- SchedulingMode mode = queue.getSchedulingMode();
-
- Comparator<Schedulable> comparator;
- if (mode == SchedulingMode.FIFO) {
- comparator = new SchedulingAlgorithms.FifoComparator();
- } else if (mode == SchedulingMode.FAIR) {
- comparator = new SchedulingAlgorithms.FairShareComparator();
- } else {
- throw new RuntimeException("Unsupported queue scheduling mode " + mode);
- }
-
- Collections.sort(appScheds, comparator);
- for (AppSchedulable sched: appScheds) {
- return sched.assignContainer(node, reserved);
- }
-
- return Resources.none();
- }
-
- }
-
- @Override
- public String getName() {
- return queue.getName();
- }
-
- FSQueue getQueue() {
- return queue;
- }
-
- public Collection<AppSchedulable> getAppSchedulables() {
- return appScheds;
- }
-
- public long getLastTimeAtMinShare() {
- return lastTimeAtMinShare;
- }
-
- public void setLastTimeAtMinShare(long lastTimeAtMinShare) {
- this.lastTimeAtMinShare = lastTimeAtMinShare;
- }
-
- public long getLastTimeAtHalfFairShare() {
- return lastTimeAtHalfFairShare;
- }
-
- public void setLastTimeAtHalfFairShare(long lastTimeAtHalfFairShare) {
- this.lastTimeAtHalfFairShare = lastTimeAtHalfFairShare;
- }
-
- @Override
- public QueueMetrics getMetrics() {
- return metrics;
- }
-
- @Override
- public Resource getResourceUsage() {
- Resource usage = Resources.createResource(0);
- for (AppSchedulable app : appScheds) {
- Resources.addTo(usage, app.getResourceUsage());
- }
- return usage;
- }
-
- @Override
- public Priority getPriority() {
- Priority p = recordFactory.newRecordInstance(Priority.class);
- p.setPriority(1);
- return p;
- }
-
- @Override
- public Map<QueueACL, AccessControlList> getQueueAcls() {
- Map<QueueACL, AccessControlList> acls = queueMgr.getQueueAcls(getName());
- return new HashMap<QueueACL, AccessControlList>(acls);
- }
-
- @Override
- public QueueInfo getQueueInfo(boolean includeChildQueues, boolean recursive) {
- QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
- queueInfo.setQueueName(getQueueName());
- // TODO: we might change these queue metrics around a little bit
- // to match the semantics of the fair scheduler.
- queueInfo.setCapacity((float) getFairShare().getMemory() /
- scheduler.getClusterCapacity().getMemory());
- queueInfo.setCapacity((float) getResourceUsage().getMemory() /
- scheduler.getClusterCapacity().getMemory());
-
- queueInfo.setChildQueues(new ArrayList<QueueInfo>());
- queueInfo.setQueueState(QueueState.RUNNING);
- return queueInfo;
- }
-
- @Override
- public List<QueueUserACLInfo> getQueueUserAclInfo(UserGroupInformation user) {
- QueueUserACLInfo userAclInfo =
- recordFactory.newRecordInstance(QueueUserACLInfo.class);
- List<QueueACL> operations = new ArrayList<QueueACL>();
- for (QueueACL operation : QueueACL.values()) {
- Map<QueueACL, AccessControlList> acls = queueMgr.getQueueAcls(getName());
- if (acls.get(operation).isUserAllowed(user)) {
- operations.add(operation);
- }
- }
-
- userAclInfo.setQueueName(getQueueName());
- userAclInfo.setUserAcls(operations);
- return Collections.singletonList(userAclInfo);
- }
-
- @Override
- public String getQueueName() {
- return getName();
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java
index 4e164e5..49a52bb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java
@@ -42,7 +42,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@@ -103,11 +102,11 @@
private final RMContext rmContext;
public FSSchedulerApp(ApplicationAttemptId applicationAttemptId,
String user, Queue queue, ActiveUsersManager activeUsersManager,
- RMContext rmContext, ApplicationStore store) {
+ RMContext rmContext) {
this.rmContext = rmContext;
this.appSchedulingInfo =
new AppSchedulingInfo(applicationAttemptId, user, queue,
- activeUsersManager, store);
+ activeUsersManager);
this.queue = queue;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
index f1921ad..f0b9949 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@@ -50,7 +51,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
@@ -75,6 +76,25 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
+/**
+ * A scheduler that schedules resources between a set of queues. The scheduler
+ * keeps track of the resources used by each queue, and attempts to maintain
+ * fairness by scheduling tasks at queues whose allocations are farthest below
+ * an ideal fair distribution.
+ *
+ * The fair scheduler supports hierarchical queues. All queues descend from a
+ * queue named "root". Available resources are distributed among the children
+ * of the root queue in the typical fair scheduling fashion. Then, the children
+ * distribute the resources assigned to them to their children in the same
+ * fashion. Applications may only be scheduled on leaf queues. Queues can be
+ * specified as children of other queues by placing them as sub-elements of their
+ * parents in the fair scheduler configuration file.
+ *
+ * A queue's name starts with the names of its parents, with periods as
+ * separators. So a queue named "queue1" under the root named, would be
+ * referred to as "root.queue1", and a queue named "queue2" under a queue
+ * named "parent1" would be referred to as "root.parent1.queue2".
+ */
@LimitedPrivate("yarn")
@Unstable
@SuppressWarnings("unchecked")
@@ -105,23 +125,22 @@
// Aggregate metrics
QueueMetrics rootMetrics;
- //Time when we last updated preemption vars
+ // Time when we last updated preemption vars
protected long lastPreemptionUpdateTime;
- //Time we last ran preemptTasksIfNecessary
+ // Time we last ran preemptTasksIfNecessary
private long lastPreemptCheckTime;
-
// This stores per-application scheduling information, indexed by
// attempt ID's for fast lookup.
- protected Map<ApplicationAttemptId, FSSchedulerApp> applications
- = new HashMap<ApplicationAttemptId, FSSchedulerApp>();
+ protected Map<ApplicationAttemptId, FSSchedulerApp> applications =
+ new HashMap<ApplicationAttemptId, FSSchedulerApp>();
// Nodes in the cluster, indexed by NodeId
- private Map<NodeId, FSSchedulerNode> nodes =
+ private Map<NodeId, FSSchedulerNode> nodes =
new ConcurrentHashMap<NodeId, FSSchedulerNode>();
// Aggregate capacity of the cluster
- private Resource clusterCapacity =
+ private Resource clusterCapacity =
RecordFactoryProvider.getRecordFactory(null).newRecordInstance(Resource.class);
// How often tasks are preempted (must be longer than a couple
@@ -131,10 +150,11 @@
protected boolean preemptionEnabled;
protected boolean sizeBasedWeight; // Give larger weights to larger jobs
protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster
- protected double nodeLocalityThreshold; // Cluster threshold for node locality
- protected double rackLocalityThreshold; // Cluster threshold for rack locality
- private FairSchedulerEventLog eventLog; // Machine-readable event log
- protected boolean assignMultiple; // Allocate multiple containers per heartbeat
+ protected double nodeLocalityThreshold; // Cluster threshold for node locality
+ protected double rackLocalityThreshold; // Cluster threshold for rack locality
+ private FairSchedulerEventLog eventLog; // Machine-readable event log
+ protected boolean assignMultiple; // Allocate multiple containers per
+ // heartbeat
protected int maxAssign; // Max containers to assign per heartbeat
public FairScheduler() {
@@ -150,16 +170,8 @@
return queueMgr;
}
- public List<FSQueueSchedulable> getQueueSchedulables() {
- List<FSQueueSchedulable> scheds = new ArrayList<FSQueueSchedulable>();
- for (FSQueue queue: queueMgr.getQueues()) {
- scheds.add(queue.getQueueSchedulable());
- }
- return scheds;
- }
-
private RMContainer getRMContainer(ContainerId containerId) {
- FSSchedulerApp application =
+ FSSchedulerApp application =
applications.get(containerId.getApplicationAttemptId());
return (application == null) ? null : application.getRMContainer(containerId);
}
@@ -183,34 +195,24 @@
}
/**
- * Recompute the internal variables used by the scheduler - per-job weights,
- * fair shares, deficits, minimum slot allocations, and amount of used and
- * required resources per job.
- */
+ * Recompute the internal variables used by the scheduler - per-job weights,
+ * fair shares, deficits, minimum slot allocations, and amount of used and
+ * required resources per job.
+ */
protected synchronized void update() {
queueMgr.reloadAllocsIfNecessary(); // Relaod alloc file
updateRunnability(); // Set job runnability based on user/queue limits
updatePreemptionVariables(); // Determine if any queues merit preemption
- // Update demands of apps and queues
- for (FSQueue queue: queueMgr.getQueues()) {
- queue.getQueueSchedulable().updateDemand();
- }
+ FSQueue rootQueue = queueMgr.getRootQueue();
- // Compute fair shares based on updated demands
- List<FSQueueSchedulable> queueScheds = getQueueSchedulables();
- SchedulingAlgorithms.computeFairShares(
- queueScheds, clusterCapacity);
+ // Recursively update demands for all queues
+ rootQueue.updateDemand();
- // Update queue metrics for this queue
- for (FSQueueSchedulable sched : queueScheds) {
- sched.getMetrics().setAvailableResourcesToQueue(sched.getFairShare());
- }
-
- // Use the computed shares to assign shares within each queue
- for (FSQueue queue: queueMgr.getQueues()) {
- queue.getQueueSchedulable().redistributeShare();
- }
+ rootQueue.setFairShare(clusterCapacity);
+ // Recursively compute fair shares for all queues
+ // and update metrics
+ rootQueue.recomputeFairShares();
// Update recorded capacity of root queue (child queues are updated
// when fair share is calculated).
@@ -225,7 +227,7 @@
private void updatePreemptionVariables() {
long now = clock.getTime();
lastPreemptionUpdateTime = now;
- for (FSQueueSchedulable sched: getQueueSchedulables()) {
+ for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
if (!isStarvedForMinShare(sched)) {
sched.setLastTimeAtMinShare(now);
}
@@ -238,16 +240,16 @@
/**
* Is a queue below its min share for the given task type?
*/
- boolean isStarvedForMinShare(FSQueueSchedulable sched) {
+ boolean isStarvedForMinShare(FSLeafQueue sched) {
Resource desiredShare = Resources.min(sched.getMinShare(), sched.getDemand());
return Resources.lessThan(sched.getResourceUsage(), desiredShare);
}
/**
- * Is a queue being starved for fair share for the given task type?
- * This is defined as being below half its fair share.
+ * Is a queue being starved for fair share for the given task type? This is
+ * defined as being below half its fair share.
*/
- boolean isStarvedForFairShare(FSQueueSchedulable sched) {
+ boolean isStarvedForFairShare(FSLeafQueue sched) {
Resource desiredFairShare = Resources.max(
Resources.multiply(sched.getFairShare(), .5), sched.getDemand());
return Resources.lessThan(sched.getResourceUsage(), desiredFairShare);
@@ -255,10 +257,10 @@
/**
* Check for queues that need tasks preempted, either because they have been
- * below their guaranteed share for minSharePreemptionTimeout or they
- * have been below half their fair share for the fairSharePreemptionTimeout.
- * If such queues exist, compute how many tasks of each type need to be
- * preempted and then select the right ones using preemptTasks.
+ * below their guaranteed share for minSharePreemptionTimeout or they have
+ * been below half their fair share for the fairSharePreemptionTimeout. If
+ * such queues exist, compute how many tasks of each type need to be preempted
+ * and then select the right ones using preemptTasks.
*/
protected synchronized void preemptTasksIfNecessary() {
if (!preemptionEnabled) {
@@ -273,35 +275,37 @@
Resource resToPreempt = Resources.none();
- for (FSQueueSchedulable sched: getQueueSchedulables()) {
+ for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
resToPreempt = Resources.add(resToPreempt, resToPreempt(sched, curTime));
}
if (Resources.greaterThan(resToPreempt, Resources.none())) {
- preemptResources(getQueueSchedulables(), resToPreempt);
+ preemptResources(queueMgr.getLeafQueues(), resToPreempt);
}
}
/**
- * Preempt a quantity of resources from a list of QueueSchedulables.
- * The policy for this is to pick apps from queues that are over their fair
- * share, but make sure that no queue is placed below its fair share in the
- * process. We further prioritize preemption by choosing containers with
- * lowest priority to preempt.
+ * Preempt a quantity of resources from a list of QueueSchedulables. The
+ * policy for this is to pick apps from queues that are over their fair share,
+ * but make sure that no queue is placed below its fair share in the process.
+ * We further prioritize preemption by choosing containers with lowest
+ * priority to preempt.
*/
- protected void preemptResources(List<FSQueueSchedulable> scheds, Resource toPreempt) {
+ protected void preemptResources(Collection<FSLeafQueue> scheds,
+ Resource toPreempt) {
if (scheds.isEmpty() || Resources.equals(toPreempt, Resources.none())) {
return;
}
Map<RMContainer, FSSchedulerApp> apps =
new HashMap<RMContainer, FSSchedulerApp>();
- Map<RMContainer, FSQueueSchedulable> queues = new HashMap<RMContainer, FSQueueSchedulable>();
+ Map<RMContainer, FSLeafQueue> queues =
+ new HashMap<RMContainer, FSLeafQueue>();
// Collect running containers from over-scheduled queues
List<RMContainer> runningContainers = new ArrayList<RMContainer>();
- for (FSQueueSchedulable sched: scheds) {
+ for (FSLeafQueue sched : scheds) {
if (Resources.greaterThan(sched.getResourceUsage(), sched.getFairShare())) {
- for (AppSchedulable as: sched.getAppSchedulables()) {
+ for (AppSchedulable as : sched.getAppSchedulables()) {
for (RMContainer c : as.getApp().getLiveContainers()) {
runningContainers.add(c);
apps.put(c, as.getApp());
@@ -321,12 +325,12 @@
// Scan down the sorted list of task statuses until we've killed enough
// tasks, making sure we don't kill too many from any queue
- for (RMContainer container: runningContainers) {
- FSQueueSchedulable sched = queues.get(container);
+ for (RMContainer container : runningContainers) {
+ FSLeafQueue sched = queues.get(container);
if (Resources.greaterThan(sched.getResourceUsage(), sched.getFairShare())) {
LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
"res=" + container.getContainer().getResource() +
- ") from queue " + sched.getQueue().getName());
+ ") from queue " + sched.getName());
ContainerStatus status = SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);
@@ -348,12 +352,12 @@
* If the queue has been below its min share for at least its preemption
* timeout, it should preempt the difference between its current share and
* this min share. If it has been below half its fair share for at least the
- * fairSharePreemptionTimeout, it should preempt enough tasks to get up to
- * its full fair share. If both conditions hold, we preempt the max of the
- * two amounts (this shouldn't happen unless someone sets the timeouts to
- * be identical for some reason).
+ * fairSharePreemptionTimeout, it should preempt enough tasks to get up to its
+ * full fair share. If both conditions hold, we preempt the max of the two
+ * amounts (this shouldn't happen unless someone sets the timeouts to be
+ * identical for some reason).
*/
- protected Resource resToPreempt(FSQueueSchedulable sched, long curTime) {
+ protected Resource resToPreempt(FSLeafQueue sched, long curTime) {
String queue = sched.getName();
long minShareTimeout = queueMgr.getMinSharePreemptionTimeout(queue);
long fairShareTimeout = queueMgr.getFairSharePreemptionTimeout();
@@ -362,7 +366,7 @@
if (curTime - sched.getLastTimeAtMinShare() > minShareTimeout) {
Resource target = Resources.min(sched.getMinShare(), sched.getDemand());
resDueToMinShare = Resources.max(Resources.none(),
- Resources.subtract(target, sched.getResourceUsage()));
+ Resources.subtract(target, sched.getResourceUsage()));
}
if (curTime - sched.getLastTimeAtHalfFairShare() > fairShareTimeout) {
Resource target = Resources.min(sched.getFairShare(), sched.getDemand());
@@ -380,15 +384,15 @@
}
/**
- * This updates the runnability of all apps based on whether or not
- * any users/queues have exceeded their capacity.
+ * This updates the runnability of all apps based on whether or not any
+ * users/queues have exceeded their capacity.
*/
private void updateRunnability() {
List<AppSchedulable> apps = new ArrayList<AppSchedulable>();
// Start by marking everything as not runnable
- for (FSQueue p: queueMgr.getQueues()) {
- for (AppSchedulable a: p.getQueueSchedulable().getAppSchedulables()) {
+ for (FSLeafQueue leafQueue : queueMgr.getLeafQueues()) {
+ for (AppSchedulable a : leafQueue.getAppSchedulables()) {
a.setRunnable(false);
apps.add(a);
}
@@ -400,7 +404,7 @@
Map<String, Integer> userApps = new HashMap<String, Integer>();
Map<String, Integer> queueApps = new HashMap<String, Integer>();
- for (AppSchedulable app: apps) {
+ for (AppSchedulable app : apps) {
String user = app.getApp().getUser();
String queue = app.getApp().getQueueName();
int userCount = userApps.containsKey(user) ? userApps.get(user) : 0;
@@ -473,22 +477,25 @@
}
/**
- * Add a new application to the scheduler, with a given id, queue name,
- * and user. This will accept a new app even if the user or queue is above
+ * Add a new application to the scheduler, with a given id, queue name, and
+ * user. This will accept a new app even if the user or queue is above
* configured limits, but the app will not be marked as runnable.
*/
- protected synchronized void
- addApplication(ApplicationAttemptId applicationAttemptId,
- String queueName, String user) {
+ protected synchronized void addApplication(
+ ApplicationAttemptId applicationAttemptId, String queueName, String user) {
- FSQueue queue = queueMgr.getQueue(queueName);
+ FSLeafQueue queue = queueMgr.getLeafQueue(queueName);
+ if (queue == null) {
+ // queue is not an existing or createable leaf queue
+ queue = queueMgr.getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
+ }
FSSchedulerApp schedulerApp =
new FSSchedulerApp(applicationAttemptId, user,
- queue.getQueueSchedulable(), new ActiveUsersManager(getRootQueueMetrics()),
- rmContext, null);
-
- // Inforce ACLs
+ queue, new ActiveUsersManager(getRootQueueMetrics()),
+ rmContext);
+
+ // Enforce ACLs
UserGroupInformation userUgi;
try {
userUgi = UserGroupInformation.getCurrentUser();
@@ -497,8 +504,8 @@
return;
}
- List<QueueUserACLInfo> info = queue.getQueueSchedulable().getQueueUserAclInfo(
- userUgi); // Always a signleton list
+ // Always a singleton list
+ List<QueueUserACLInfo> info = queue.getQueueUserAclInfo(userUgi);
if (!info.get(0).getUserAcls().contains(QueueACL.SUBMIT_APPLICATIONS)) {
LOG.info("User " + userUgi.getUserName() +
" cannot submit" + " applications to queue " + queue.getName());
@@ -506,14 +513,13 @@
}
queue.addApp(schedulerApp);
- queue.getQueueSchedulable().getMetrics().submitApp(user,
- applicationAttemptId.getAttemptId());
+ queue.getMetrics().submitApp(user, applicationAttemptId.getAttemptId());
rootMetrics.submitApp(user, applicationAttemptId.getAttemptId());
applications.put(applicationAttemptId, schedulerApp);
LOG.info("Application Submission: " + applicationAttemptId +
- ", user: " + user +
+ ", user: "+ user +
", currently active: " + applications.size());
rmContext.getDispatcher().getEventHandler().handle(
@@ -540,10 +546,10 @@
SchedulerUtils.createAbnormalContainerStatus(
rmContainer.getContainerId(),
SchedulerUtils.COMPLETED_APPLICATION),
- RMContainerEventType.KILL);
+ RMContainerEventType.KILL);
}
- // Release all reserved containers
+ // Release all reserved containers
for (RMContainer rmContainer : application.getReservedContainers()) {
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
@@ -556,7 +562,8 @@
application.stop(rmAppAttemptFinalState);
// Inform the queue
- FSQueue queue = queueMgr.getQueue(application.getQueue().getQueueName());
+ FSLeafQueue queue = queueMgr.getLeafQueue(application.getQueue()
+ .getQueueName());
queue.removeApp(application);
// Remove from our data-structure
@@ -658,11 +665,11 @@
for (ContainerId releasedContainerId : release) {
RMContainer rmContainer = getRMContainer(releasedContainerId);
if (rmContainer == null) {
- RMAuditLogger.logFailure(application.getUser(),
- AuditConstants.RELEASE_CONTAINER,
- "Unauthorized access or invalid container", "FairScheduler",
- "Trying to release container not owned by app or with invalid id",
- application.getApplicationId(), releasedContainerId);
+ RMAuditLogger.logFailure(application.getUser(),
+ AuditConstants.RELEASE_CONTAINER,
+ "Unauthorized access or invalid container", "FairScheduler",
+ "Trying to release container not owned by app or with invalid id",
+ application.getApplicationId(), releasedContainerId);
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
@@ -675,8 +682,8 @@
if (!ask.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("allocate: pre-update" +
- " applicationAttemptId=" + appAttemptId +
- " application=" + application.getApplicationId());
+ " applicationAttemptId=" + appAttemptId +
+ " application=" + application.getApplicationId());
}
application.showRequests();
@@ -689,19 +696,17 @@
if (LOG.isDebugEnabled()) {
LOG.debug("allocate:" +
- " applicationAttemptId=" + appAttemptId +
- " #ask=" + ask.size());
+ " applicationAttemptId=" + appAttemptId +
+ " #ask=" + ask.size());
}
- return new Allocation(
- application.pullNewlyAllocatedContainers(),
+ return new Allocation(application.pullNewlyAllocatedContainers(),
application.getHeadroom());
}
}
/**
- * Process a container which has launched on a node, as reported by the
- * node.
+ * Process a container which has launched on a node, as reported by the node.
*/
private void containerLaunchedOnNode(ContainerId containerId, FSSchedulerNode node) {
// Get the application for the finished container
@@ -757,20 +762,20 @@
LOG.info("Trying to fulfill reservation for application " +
reservedApplication.getApplicationId() + " on node: " + nm);
- FSQueue queue = queueMgr.getQueue(reservedApplication.getQueueName());
- queue.getQueueSchedulable().assignContainer(node, true);
+ FSLeafQueue queue = queueMgr.getLeafQueue(reservedApplication.getQueueName());
+ queue.assignContainer(node, true);
}
-
// Otherwise, schedule at queue which is furthest below fair share
else {
int assignedContainers = 0;
while (true) {
// At most one task is scheduled each iteration of this loop
- List<FSQueueSchedulable> scheds = getQueueSchedulables();
+ List<FSLeafQueue> scheds = new ArrayList<FSLeafQueue>(
+ queueMgr.getLeafQueues());
Collections.sort(scheds, new SchedulingAlgorithms.FairShareComparator());
boolean assignedContainer = false;
- for (FSQueueSchedulable sched : scheds) {
+ for (FSLeafQueue sched : scheds) {
Resource assigned = sched.assignContainer(node, false);
if (Resources.greaterThan(assigned, Resources.none())) {
eventLog.log("ASSIGN", nm.getHostName(), assigned);
@@ -813,7 +818,7 @@
@Override
public void handle(SchedulerEvent event) {
- switch(event.getType()) {
+ switch (event.getType()) {
case NODE_ADDED:
if (!(event instanceof NodeAddedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
@@ -832,8 +837,7 @@
if (!(event instanceof NodeUpdateSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
- NodeUpdateSchedulerEvent nodeUpdatedEvent =
- (NodeUpdateSchedulerEvent)event;
+ NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
nodeUpdate(nodeUpdatedEvent.getRMNode(),
nodeUpdatedEvent.getNewlyLaunchedContainers(),
nodeUpdatedEvent.getCompletedContainers());
@@ -842,7 +846,7 @@
if (!(event instanceof AppAddedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
- AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
+ AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent)event;
String queue = appAddedEvent.getQueue();
// Potentially set queue to username if configured to do so
@@ -867,7 +871,7 @@
throw new RuntimeException("Unexpected event type: " + event);
}
ContainerExpiredSchedulerEvent containerExpiredEvent =
- (ContainerExpiredSchedulerEvent) event;
+ (ContainerExpiredSchedulerEvent)event;
ContainerId containerId = containerExpiredEvent.getContainerId();
completedContainer(getRMContainer(containerId),
SchedulerUtils.createAbnormalContainerStatus(
@@ -886,8 +890,8 @@
}
@Override
- public synchronized void
- reinitialize(Configuration conf, RMContext rmContext) throws IOException {
+ public synchronized void reinitialize(Configuration conf, RMContext rmContext)
+ throws IOException {
if (!initialized) {
this.conf = new FairSchedulerConfiguration(conf);
rootMetrics = QueueMetrics.forQueue("root", null, true, conf);
@@ -909,11 +913,10 @@
try {
queueMgr.initialize();
- }
- catch (Exception e) {
+ } catch (Exception e) {
throw new IOException("Failed to start FairScheduler", e);
}
-
+
Thread updateThread = new Thread(new UpdateThread());
updateThread.setName("FairSchedulerUpdateThread");
updateThread.setDaemon(true);
@@ -925,10 +928,9 @@
rackLocalityThreshold = this.conf.getLocalityThresholdRack();
preemptionEnabled = this.conf.getPreemptionEnabled();
try {
- queueMgr.reloadAllocs();
+ queueMgr.reloadAllocs();
- }
- catch (Exception e) {
+ } catch (Exception e) {
throw new IOException("Failed to initialize FairScheduler", e);
}
}
@@ -940,8 +942,8 @@
if (!queueMgr.exists(queueName)) {
return null;
}
- return queueMgr.getQueue(queueName).getQueueSchedulable().getQueueInfo(
- includeChildQueues, recursive);
+ return queueMgr.getQueue(queueName).getQueueInfo(includeChildQueues,
+ recursive);
}
@Override
@@ -953,12 +955,7 @@
return new ArrayList<QueueUserACLInfo>();
}
- List<QueueUserACLInfo> userAcls = new ArrayList<QueueUserACLInfo>();
-
- for (FSQueue queue : queueMgr.getQueues()) {
- userAcls.addAll(queue.getQueueSchedulable().getQueueUserAclInfo(user));
- }
- return userAcls;
+ return queueMgr.getRootQueue().getQueueUserAclInfo(user);
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java
index 2da306e..5da1d4f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java
@@ -27,6 +27,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.CopyOnWriteArrayList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
@@ -52,6 +53,7 @@
/**
* Maintains a list of queues as well as scheduling parameters for each queue,
* such as guaranteed share allocations, from the fair scheduler config file.
+ *
*/
@Private
@Unstable
@@ -59,6 +61,8 @@
public static final Log LOG = LogFactory.getLog(
QueueManager.class.getName());
+ public static final String ROOT_QUEUE = "root";
+
/** Time to wait between checks of the allocation file */
public static final long ALLOC_RELOAD_INTERVAL = 10 * 1000;
@@ -76,7 +80,10 @@
// used) or a String to specify an absolute path (if
// mapred.fairscheduler.allocation.file is used).
- private Map<String, FSQueue> queues = new HashMap<String, FSQueue>();
+ private final Collection<FSLeafQueue> leafQueues =
+ new CopyOnWriteArrayList<FSLeafQueue>();
+ private final Map<String, FSQueue> queues = new HashMap<String, FSQueue>();
+ private FSParentQueue rootQueue;
private volatile QueueManagerInfo info = new QueueManagerInfo();
@@ -87,10 +94,17 @@
public QueueManager(FairScheduler scheduler) {
this.scheduler = scheduler;
}
+
+ public FSParentQueue getRootQueue() {
+ return rootQueue;
+ }
public void initialize() throws IOException, SAXException,
AllocationConfigurationException, ParserConfigurationException {
FairSchedulerConfiguration conf = scheduler.getConf();
+ rootQueue = new FSParentQueue("root", this, scheduler, null);
+ queues.put(rootQueue.getName(), rootQueue);
+
this.allocFile = conf.getAllocationFile();
if (allocFile == null) {
// No allocation file specified in jobconf. Use the default allocation
@@ -106,21 +120,106 @@
lastSuccessfulReload = scheduler.getClock().getTime();
lastReloadAttempt = scheduler.getClock().getTime();
// Create the default queue
- getQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
+ getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
}
-
+
/**
- * Get a queue by name, creating it if necessary
+ * Get a queue by name, creating it if necessary. If the queue
+ * is not or can not be a leaf queue, i.e. it already exists as a parent queue,
+ * or one of the parents in its name is already a leaf queue, null is returned.
+ *
+ * The root part of the name is optional, so a queue underneath the root
+ * named "queue1" could be referred to as just "queue1", and a queue named
+ * "queue2" underneath a parent named "parent1" that is underneath the root
+ * could be referred to as just "parent1.queue2".
*/
- public FSQueue getQueue(String name) {
+ public FSLeafQueue getLeafQueue(String name) {
+ if (!name.startsWith(ROOT_QUEUE + ".")) {
+ name = ROOT_QUEUE + "." + name;
+ }
synchronized (queues) {
FSQueue queue = queues.get(name);
if (queue == null) {
- queue = new FSQueue(scheduler, name);
- queue.setSchedulingMode(info.defaultSchedulingMode);
- queues.put(name, queue);
+ FSLeafQueue leafQueue = createLeafQueue(name);
+ if (leafQueue == null) {
+ return null;
+ }
+ leafQueue.setSchedulingMode(info.defaultSchedulingMode);
+ queue = leafQueue;
+ } else if (queue instanceof FSParentQueue) {
+ return null;
}
- return queue;
+ return (FSLeafQueue)queue;
+ }
+ }
+
+ /**
+ * Creates a leaf queue and places it in the tree. Creates any
+ * parents that don't already exist.
+ *
+ * @return
+ * the created queue, if successful. null if not allowed (one of the parent
+ * queues in the queue name is already a leaf queue)
+ */
+ private FSLeafQueue createLeafQueue(String name) {
+ List<String> newQueueNames = new ArrayList<String>();
+ newQueueNames.add(name);
+ int sepIndex = name.length();
+ FSParentQueue parent = null;
+
+ // Move up the queue tree until we reach one that exists.
+ while (sepIndex != -1) {
+ sepIndex = name.lastIndexOf('.', sepIndex-1);
+ FSQueue queue;
+ String curName = null;
+ curName = name.substring(0, sepIndex);
+ queue = queues.get(curName);
+
+ if (queue == null) {
+ newQueueNames.add(curName);
+ } else {
+ if (queue instanceof FSParentQueue) {
+ parent = (FSParentQueue)queue;
+ break;
+ } else {
+ return null;
+ }
+ }
+ }
+
+ // At this point, parent refers to the deepest existing parent of the
+ // queue to create.
+ // Now that we know everything worked out, make all the queues
+ // and add them to the map.
+ FSLeafQueue leafQueue = null;
+ for (int i = newQueueNames.size()-1; i >= 0; i--) {
+ String queueName = newQueueNames.get(i);
+ if (i == 0) {
+ // First name added was the leaf queue
+ leafQueue = new FSLeafQueue(name, this, scheduler, parent);
+ parent.addChildQueue(leafQueue);
+ queues.put(leafQueue.getName(), leafQueue);
+ leafQueues.add(leafQueue);
+ } else {
+ FSParentQueue newParent = new FSParentQueue(queueName, this, scheduler, parent);
+ parent.addChildQueue(newParent);
+ queues.put(newParent.getName(), newParent);
+ parent = newParent;
+ }
+ }
+
+ return leafQueue;
+ }
+
+ /**
+ * Gets a queue by name.
+ */
+ public FSQueue getQueue(String name) {
+ if (!name.startsWith(ROOT_QUEUE + ".") && !name.equals(ROOT_QUEUE)) {
+ name = ROOT_QUEUE + "." + name;
+ }
+ synchronized (queues) {
+ return queues.get(name);
}
}
@@ -136,8 +235,8 @@
/**
* Get the queue for a given AppSchedulable.
*/
- public FSQueue getQueueForApp(AppSchedulable app) {
- return getQueue(app.getApp().getQueueName());
+ public FSLeafQueue getQueueForApp(AppSchedulable app) {
+ return getLeafQueue(app.getApp().getQueueName());
}
/**
@@ -237,54 +336,9 @@
Element element = (Element)node;
if ("queue".equals(element.getTagName()) ||
"pool".equals(element.getTagName())) {
- String queueName = element.getAttribute("name");
- Map<QueueACL, AccessControlList> acls =
- new HashMap<QueueACL, AccessControlList>();
- queueNamesInAllocFile.add(queueName);
- NodeList fields = element.getChildNodes();
- for (int j = 0; j < fields.getLength(); j++) {
- Node fieldNode = fields.item(j);
- if (!(fieldNode instanceof Element))
- continue;
- Element field = (Element) fieldNode;
- if ("minResources".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- int val = Integer.parseInt(text);
- minQueueResources.put(queueName, Resources.createResource(val));
- } else if ("maxResources".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- int val = Integer.parseInt(text);
- maxQueueResources.put(queueName, Resources.createResource(val));
- } else if ("maxRunningApps".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- int val = Integer.parseInt(text);
- queueMaxApps.put(queueName, val);
- } else if ("weight".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- double val = Double.parseDouble(text);
- queueWeights.put(queueName, val);
- } else if ("minSharePreemptionTimeout".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- long val = Long.parseLong(text) * 1000L;
- minSharePreemptionTimeouts.put(queueName, val);
- } else if ("schedulingMode".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- queueModes.put(queueName, parseSchedulingMode(text));
- } else if ("aclSubmitApps".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- acls.put(QueueACL.SUBMIT_APPLICATIONS, new AccessControlList(text));
- } else if ("aclAdministerApps".equals(field.getTagName())) {
- String text = ((Text)field.getFirstChild()).getData().trim();
- acls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(text));
- }
- }
- queueAcls.put(queueName, acls);
- if (maxQueueResources.containsKey(queueName) && minQueueResources.containsKey(queueName)
- && Resources.lessThan(maxQueueResources.get(queueName),
- minQueueResources.get(queueName))) {
- LOG.warn(String.format("Queue %s has max resources %d less than min resources %d",
- queueName, maxQueueResources.get(queueName), minQueueResources.get(queueName)));
- }
+ loadQueue("root", element, minQueueResources, maxQueueResources, queueMaxApps,
+ userMaxApps, queueWeights, queueModes, minSharePreemptionTimeouts,
+ queueAcls, queueNamesInAllocFile);
} else if ("user".equals(element.getTagName())) {
String userName = element.getAttribute("name");
NodeList fields = element.getChildNodes();
@@ -331,7 +385,7 @@
queueMaxAppsDefault, defaultSchedulingMode, minSharePreemptionTimeouts,
queueAcls, fairSharePreemptionTimeout, defaultMinSharePreemptionTimeout);
for (String name: queueNamesInAllocFile) {
- FSQueue queue = getQueue(name);
+ FSLeafQueue queue = getLeafQueue(name);
if (queueModes.containsKey(name)) {
queue.setSchedulingMode(queueModes.get(name));
} else {
@@ -340,6 +394,75 @@
}
}
}
+
+ /**
+ * Loads a queue from a queue element in the configuration file
+ */
+ private void loadQueue(String parentName, Element element, Map<String, Resource> minQueueResources,
+ Map<String, Resource> maxQueueResources, Map<String, Integer> queueMaxApps,
+ Map<String, Integer> userMaxApps, Map<String, Double> queueWeights,
+ Map<String, SchedulingMode> queueModes, Map<String, Long> minSharePreemptionTimeouts,
+ Map<String, Map<QueueACL, AccessControlList>> queueAcls, List<String> queueNamesInAllocFile)
+ throws AllocationConfigurationException {
+ String queueName = parentName + "." + element.getAttribute("name");
+ Map<QueueACL, AccessControlList> acls =
+ new HashMap<QueueACL, AccessControlList>();
+ NodeList fields = element.getChildNodes();
+ boolean isLeaf = true;
+
+ for (int j = 0; j < fields.getLength(); j++) {
+ Node fieldNode = fields.item(j);
+ if (!(fieldNode instanceof Element))
+ continue;
+ Element field = (Element) fieldNode;
+ if ("minResources".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ int val = Integer.parseInt(text);
+ minQueueResources.put(queueName, Resources.createResource(val));
+ } else if ("maxResources".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ int val = Integer.parseInt(text);
+ maxQueueResources.put(queueName, Resources.createResource(val));
+ } else if ("maxRunningApps".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ int val = Integer.parseInt(text);
+ queueMaxApps.put(queueName, val);
+ } else if ("weight".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ double val = Double.parseDouble(text);
+ queueWeights.put(queueName, val);
+ } else if ("minSharePreemptionTimeout".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ long val = Long.parseLong(text) * 1000L;
+ minSharePreemptionTimeouts.put(queueName, val);
+ } else if ("schedulingMode".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ queueModes.put(queueName, parseSchedulingMode(text));
+ } else if ("aclSubmitApps".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ acls.put(QueueACL.SUBMIT_APPLICATIONS, new AccessControlList(text));
+ } else if ("aclAdministerApps".equals(field.getTagName())) {
+ String text = ((Text)field.getFirstChild()).getData().trim();
+ acls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(text));
+ } else if ("queue".endsWith(field.getTagName()) ||
+ "pool".equals(field.getTagName())) {
+ loadQueue(queueName, field, minQueueResources, maxQueueResources, queueMaxApps,
+ userMaxApps, queueWeights, queueModes, minSharePreemptionTimeouts,
+ queueAcls, queueNamesInAllocFile);
+ isLeaf = false;
+ }
+ }
+ if (isLeaf) {
+ queueNamesInAllocFile.add(queueName);
+ }
+ queueAcls.put(queueName, acls);
+ if (maxQueueResources.containsKey(queueName) && minQueueResources.containsKey(queueName)
+ && Resources.lessThan(maxQueueResources.get(queueName),
+ minQueueResources.get(queueName))) {
+ LOG.warn(String.format("Queue %s has max resources %d less than min resources %d",
+ queueName, maxQueueResources.get(queueName), minQueueResources.get(queueName)));
+ }
+ }
private SchedulingMode parseSchedulingMode(String text)
throws AllocationConfigurationException {
@@ -384,9 +507,9 @@
/**
* Get a collection of all queues
*/
- public Collection<FSQueue> getQueues() {
+ public Collection<FSLeafQueue> getLeafQueues() {
synchronized (queues) {
- return new ArrayList<FSQueue>(queues.values());
+ return leafQueues;
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java
index c7f111aa..cbcbd46 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java
@@ -92,12 +92,6 @@
public abstract void updateDemand();
/**
- * Distribute the fair share assigned to this Schedulable among its
- * children (used in queues where the internal scheduler is fair sharing).
- */
- public abstract void redistributeShare();
-
- /**
* Assign a container on this node if possible, and return the amount of
* resources assigned. If {@code reserved} is true, it means a reservation
* already exists on this node, and the schedulable should fulfill that
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index a69374c4..01b85b2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -56,7 +56,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
@@ -292,7 +292,7 @@
// TODO: Fix store
FiCaSchedulerApp schedulerApp =
new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager,
- this.rmContext, null);
+ this.rmContext);
applications.put(appAttemptId, schedulerApp);
metrics.submitApp(user, appAttemptId.getAttemptId());
LOG.info("Application Submission: " + appAttemptId.getApplicationId() +
@@ -763,13 +763,7 @@
@Override
public void recover(RMState state) {
- // TODO fix recovery
-// for (Map.Entry<ApplicationId, ApplicationInfo> entry: state.getStoredApplications().entrySet()) {
-// ApplicationId appId = entry.getKey();
-// ApplicationInfo appInfo = entry.getValue();
-// SchedulerApp app = applications.get(appId);
-// app.allocate(appInfo.getContainers());
-// }
+ // NOT IMPLEMENTED
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java
index fad5dd8..e90edae 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java
@@ -25,26 +25,27 @@
import java.util.Collection;
import java.util.HashSet;
+import java.util.concurrent.ConcurrentMap;
import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
-import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
-import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render;
import com.google.inject.Inject;
class AppsBlock extends HtmlBlock {
- final AppsList list;
+ final ConcurrentMap<ApplicationId, RMApp> apps;
- @Inject AppsBlock(AppsList list, ViewContext ctx) {
+@Inject AppsBlock(RMContext rmContext, ViewContext ctx) {
super(ctx);
- this.list = list;
+ apps = rmContext.getRMApps();
}
@Override public void render(Block html) {
@@ -63,7 +64,6 @@
th(".progress", "Progress").
th(".ui", "Tracking UI")._()._().
tbody();
- int i = 0;
Collection<RMAppState> reqAppStates = null;
String reqStateString = $(APP_STATE);
if (reqStateString != null && !reqStateString.isEmpty()) {
@@ -74,7 +74,7 @@
}
}
StringBuilder appsTableData = new StringBuilder("[\n");
- for (RMApp app : list.apps.values()) {
+ for (RMApp app : apps.values()) {
if (reqAppStates != null && !reqAppStates.contains(app.getState())) {
continue;
}
@@ -108,7 +108,6 @@
appsTableData.append(trackingURL).append("'>")
.append(appInfo.getTrackingUI()).append("</a>\"],\n");
- if (list.rendering != Render.HTML && ++i >= 20) break;
}
if(appsTableData.charAt(appsTableData.length() - 2) == ',') {
appsTableData.delete(appsTableData.length()-2, appsTableData.length()-1);
@@ -118,12 +117,5 @@
_("var appsTableData=" + appsTableData)._();
tbody._()._();
-
- if (list.rendering == Render.JS_ARRAY) {
- echo("<script type='text/javascript'>\n",
- "var appsData=");
- list.toDataTableArrays(reqAppStates, writer());
- echo("\n</script>\n");
- }
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java
deleted file mode 100644
index 415f915..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.server.resourcemanager.webapp;
-
-import static org.apache.commons.lang.StringEscapeUtils.escapeHtml;
-import static org.apache.commons.lang.StringEscapeUtils.escapeJavaScript;
-import static org.apache.hadoop.yarn.webapp.view.Jsons._SEP;
-import static org.apache.hadoop.yarn.webapp.view.Jsons.appendLink;
-import static org.apache.hadoop.yarn.webapp.view.Jsons.appendProgressBar;
-import static org.apache.hadoop.yarn.webapp.view.Jsons.appendSortable;
-
-import java.io.PrintWriter;
-import java.util.Collection;
-import java.util.concurrent.ConcurrentMap;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
-import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
-import org.apache.hadoop.yarn.util.Times;
-import org.apache.hadoop.yarn.webapp.Controller.RequestContext;
-import org.apache.hadoop.yarn.webapp.ToJSON;
-import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render;
-
-import com.google.inject.Inject;
-import com.google.inject.servlet.RequestScoped;
-
-// So we only need to do asm.getApplications once in a request
-@RequestScoped
-class AppsList implements ToJSON {
- final RequestContext rc;
- final ConcurrentMap<ApplicationId, RMApp> apps;
- Render rendering;
-
- @Inject AppsList(RequestContext ctx, RMContext rmContext) {
- rc = ctx;
- apps = rmContext.getRMApps();
- }
-
- void toDataTableArrays(Collection<RMAppState> requiredAppStates, PrintWriter out) {
- out.append('[');
- boolean first = true;
- for (RMApp app : apps.values()) {
- if (requiredAppStates != null &&
- !requiredAppStates.contains(app.getState())) {
- continue;
- }
- AppInfo appInfo = new AppInfo(app, true);
- String startTime = Times.format(appInfo.getStartTime());
- String finishTime = Times.format(appInfo.getFinishTime());
- if (first) {
- first = false;
- } else {
- out.append(",\n");
- }
- out.append("[\"");
- appendSortable(out, appInfo.getAppIdNum());
- appendLink(out, appInfo.getAppId(), rc.prefix(), "app",
- appInfo.getAppId()).append(_SEP).
- append(escapeHtml(appInfo.getUser())).append(_SEP).
- append(escapeJavaScript(escapeHtml(appInfo.getName()))).append(_SEP).
- append(escapeHtml(appInfo.getQueue())).append(_SEP);
- appendSortable(out, appInfo.getStartTime()).
- append(startTime).append(_SEP);
- appendSortable(out, appInfo.getFinishTime()).
- append(finishTime).append(_SEP).
- append(appInfo.getState()).append(_SEP).
- append(appInfo.getFinalStatus()).append(_SEP);
- appendProgressBar(out, appInfo.getProgress()).append(_SEP);
- appendLink(out, appInfo.getTrackingUI(), rc.prefix(),
- !appInfo.isTrackingUrlReady() ?
- "#" : appInfo.getTrackingUrlPretty()).
- append("\"]");
- }
- out.append(']');
- }
-
- @Override
- public void toJSON(PrintWriter out) {
- out.print("{\"aaData\":");
- toDataTableArrays(null, out);
- out.print("}\n");
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java
index efbe64a..9860e18 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java
@@ -25,8 +25,11 @@
import java.util.Collection;
import java.util.HashSet;
+import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
@@ -38,7 +41,6 @@
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
-import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render;
import com.google.inject.Inject;
@@ -47,15 +49,15 @@
* scheduler as part of the fair scheduler page.
*/
public class FairSchedulerAppsBlock extends HtmlBlock {
- final AppsList list;
+ final ConcurrentMap<ApplicationId, RMApp> apps;
final FairSchedulerInfo fsinfo;
- @Inject public FairSchedulerAppsBlock(AppsList list,
+ @Inject public FairSchedulerAppsBlock(RMContext rmContext,
ResourceManager rm, ViewContext ctx) {
super(ctx);
- this.list = list;
FairScheduler scheduler = (FairScheduler) rm.getResourceScheduler();
fsinfo = new FairSchedulerInfo(scheduler);
+ apps = rmContext.getRMApps();
}
@Override public void render(Block html) {
@@ -75,7 +77,6 @@
th(".progress", "Progress").
th(".ui", "Tracking UI")._()._().
tbody();
- int i = 0;
Collection<RMAppState> reqAppStates = null;
String reqStateString = $(APP_STATE);
if (reqStateString != null && !reqStateString.isEmpty()) {
@@ -85,7 +86,7 @@
reqAppStates.add(RMAppState.valueOf(stateString));
}
}
- for (RMApp app : list.apps.values()) {
+ for (RMApp app : apps.values()) {
if (reqAppStates != null && !reqAppStates.contains(app.getState())) {
continue;
}
@@ -122,15 +123,7 @@
td().
a(!appInfo.isTrackingUrlReady()?
"#" : appInfo.getTrackingUrlPretty(), appInfo.getTrackingUI())._()._();
- if (list.rendering != Render.HTML && ++i >= 20) break;
}
tbody._()._();
-
- if (list.rendering == Render.JS_ARRAY) {
- echo("<script type='text/javascript'>\n",
- "var appsData=");
- list.toDataTableArrays(reqAppStates, writer());
- echo("\n</script>\n");
- }
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
index 753e197..a4826e8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
@@ -93,8 +93,4 @@
public void submit() {
setTitle("Application Submission Not Allowed");
}
-
- public void json() {
- renderJSON(AppsList.class);
- }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java
index 0ad1190..f9ad782 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java
@@ -60,9 +60,8 @@
}
private String appsTableInit() {
- AppsList list = getInstance(AppsList.class);
// id, user, name, queue, starttime, finishtime, state, status, progress, ui
- StringBuilder init = tableInit()
+ return tableInit()
.append(", 'aaData': appsTableData")
.append(", bDeferRender: true")
.append(", bProcessing: true")
@@ -78,18 +77,6 @@
.append(", 'mRender': parseHadoopProgress }]")
// Sort by id upon page load
- .append(", aaSorting: [[0, 'desc']]");
-
- String rows = $("rowlimit");
- int rowLimit = rows.isEmpty() ? MAX_DISPLAY_ROWS : Integer.parseInt(rows);
- if (list.apps.size() < rowLimit) {
- list.rendering = Render.HTML;
- return init.append('}').toString();
- }
- if (list.apps.size() > MAX_FAST_ROWS) {
- tableInitProgress(init, list.apps.size() * 6);
- }
- list.rendering = Render.JS_ARRAY;
- return init.append(", aaData:appsData}").toString();
+ .append(", aaSorting: [[0, 'desc']]}").toString();
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java
index 0591683..4fe19ca 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java
@@ -23,7 +23,7 @@
import java.util.List;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
public class FairSchedulerInfo {
@@ -32,9 +32,9 @@
public FairSchedulerInfo(FairScheduler fs) {
scheduler = fs;
- Collection<FSQueue> queues = fs.getQueueManager().getQueues();
+ Collection<FSLeafQueue> queues = fs.getQueueManager().getLeafQueues();
queueInfos = new ArrayList<FairSchedulerQueueInfo>();
- for (FSQueue queue : queues) {
+ for (FSLeafQueue queue : queues) {
queueInfos.add(new FairSchedulerQueueInfo(queue, fs));
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java
index ab22408..3574942 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java
@@ -22,9 +22,8 @@
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueueSchedulable;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSSchedulerApp;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AppSchedulable;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager;
@@ -49,17 +48,16 @@
private String queueName;
- public FairSchedulerQueueInfo(FSQueue queue, FairScheduler scheduler) {
- Collection<FSSchedulerApp> apps = queue.getApplications();
- for (FSSchedulerApp app : apps) {
- if (app.isPending()) {
+ public FairSchedulerQueueInfo(FSLeafQueue queue, FairScheduler scheduler) {
+ Collection<AppSchedulable> apps = queue.getAppSchedulables();
+ for (AppSchedulable app : apps) {
+ if (app.getApp().isPending()) {
numPendingApps++;
} else {
numActiveApps++;
}
}
- FSQueueSchedulable schedulable = queue.getQueueSchedulable();
QueueManager manager = scheduler.getQueueManager();
queueName = queue.getName();
@@ -67,11 +65,11 @@
Resource clusterMax = scheduler.getClusterCapacity();
clusterMaxMem = clusterMax.getMemory();
- usedResources = schedulable.getResourceUsage();
+ usedResources = queue.getResourceUsage();
fractionUsed = (float)usedResources.getMemory() / clusterMaxMem;
- fairShare = schedulable.getFairShare().getMemory();
- minResources = schedulable.getMinShare();
+ fairShare = queue.getFairShare().getMemory();
+ minResources = queue.getMinShare();
minShare = minResources.getMemory();
maxResources = scheduler.getQueueManager().getMaxResources(queueName);
if (maxResources.getMemory() > clusterMaxMem) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
index 3e83e0b..b85d482 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
@@ -37,7 +37,6 @@
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@@ -93,7 +92,7 @@
rmDispatcher);
AMLivelinessMonitor amFinishingMonitor = new AMLivelinessMonitor(
rmDispatcher);
- return new RMContextImpl(new MemStore(), rmDispatcher,
+ return new RMContextImpl(rmDispatcher,
containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
null, null, null, null) {
@Override
@@ -366,7 +365,6 @@
YarnConfiguration.DEFAULT_QUEUE_NAME,
app.getQueue());
Assert.assertEquals("app state doesn't match", RMAppState.NEW, app.getState());
- Assert.assertNotNull("app store is null", app.getApplicationStore());
// wait for event to be processed
int timeoutSecs = 0;
@@ -413,7 +411,6 @@
Assert.assertEquals("app name doesn't match", "testApp1", app.getName());
Assert.assertEquals("app queue doesn't match", "testQueue", app.getQueue());
Assert.assertEquals("app state doesn't match", RMAppState.NEW, app.getState());
- Assert.assertNotNull("app store is null", app.getApplicationStore());
// wait for event to be processed
int timeoutSecs = 0;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
index cc251e1..e0852c3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
@@ -50,7 +50,7 @@
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.ipc.YarnRPC;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.service.Service.STATE;
@@ -85,7 +85,7 @@
@BeforeClass
public static void setup() throws InterruptedException, IOException {
- Store store = StoreFactory.getStore(conf);
+ RMStateStore store = StoreFactory.getStore(conf);
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
AccessControlList adminACL = new AccessControlList("");
adminACL.addGroup(SUPER_GROUP);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
index 555c822..f7f2a0d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
@@ -182,7 +182,7 @@
private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler,
ApplicationId applicationId3, YarnConfiguration config, String queueName) {
return new RMAppImpl(applicationId3, rmContext, config, null, null,
- queueName, null, null, null, yarnScheduler, null, System
+ queueName, null, null, yarnScheduler, null, System
.currentTimeMillis());
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
index 6f6d565..9ae8bf0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
@@ -36,7 +36,6 @@
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
@@ -79,7 +78,7 @@
InlineDispatcher rmDispatcher = new InlineDispatcher();
rmContext =
- new RMContextImpl(new MemStore(), rmDispatcher, null, null, null,
+ new RMContextImpl(rmDispatcher, null, null, null,
mock(DelegationTokenRenewer.class), null, null, null);
scheduler = mock(YarnScheduler.class);
doAnswer(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
index ce64065..9ae5cba 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
@@ -31,7 +31,7 @@
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -47,7 +47,7 @@
@Before
public void setUp() throws Exception {
Configuration conf = new YarnConfiguration();
- Store store = StoreFactory.getStore(conf);
+ RMStateStore store = StoreFactory.getStore(conf);
resourceManager = new ResourceManager(store);
resourceManager.init(conf);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
index 81aba39..2b9c848 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
@@ -32,7 +32,6 @@
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
@@ -194,10 +193,6 @@
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
- public ApplicationStore getApplicationStore() {
- throw new UnsupportedOperationException("Not supported yet.");
- }
- @Override
public float getProgress() {
throw new UnsupportedOperationException("Not supported yet.");
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java
index 05ae726..130f0e4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java
@@ -49,8 +49,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
index 03d394a..211eaa5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
@@ -53,9 +53,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java
index 219a28f..f866694 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java
@@ -35,7 +35,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.junit.After;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java
index 00ab954..8a3f0a2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java
@@ -36,8 +36,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.junit.Before;
import org.junit.Test;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java
index f495ada..296ca73 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java
@@ -43,9 +43,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
index bec07af..1c4b6f9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
@@ -39,7 +39,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.NodeEventDispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@@ -70,7 +69,7 @@
Configuration conf = new Configuration();
// Dispatcher that processes events inline
Dispatcher dispatcher = new InlineDispatcher();
- RMContext context = new RMContextImpl(new MemStore(), dispatcher, null,
+ RMContext context = new RMContextImpl(dispatcher, null,
null, null, null, null, null, null);
dispatcher.register(SchedulerEventType.class,
new InlineDispatcher.EmptyEventHandler());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
index 629e888..a7a52c2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
@@ -39,7 +39,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@@ -66,7 +65,7 @@
}
});
RMContext context =
- new RMContextImpl(new MemStore(), dispatcher, null, null, null, null,
+ new RMContextImpl(dispatcher, null, null, null, null,
null, null, null);
dispatcher.register(RMNodeEventType.class,
new ResourceManager.NodeEventDispatcher(context));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
index e5ea2b86..2b1b892 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
@@ -28,7 +28,6 @@
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.MockApps;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -138,11 +137,6 @@
}
@Override
- public ApplicationStore getApplicationStore() {
- throw new UnsupportedOperationException("Not supported yet.");
- }
-
- @Override
public long getFinishTime() {
return finish;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
index 4546946..0f20cb3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
@@ -40,8 +40,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
@@ -140,7 +138,7 @@
AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class);
this.rmContext =
- new RMContextImpl(new MemStore(), rmDispatcher,
+ new RMContextImpl(rmDispatcher,
containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
null, new ApplicationTokenSecretManager(conf),
new RMContainerTokenSecretManager(conf),
@@ -171,7 +169,6 @@
// ensure max retries set to known value
conf.setInt(YarnConfiguration.RM_AM_MAX_RETRIES, maxRetries);
String clientTokenStr = "bogusstring";
- ApplicationStore appStore = mock(ApplicationStore.class);
YarnScheduler scheduler = mock(YarnScheduler.class);
ApplicationMasterService masterService =
new ApplicationMasterService(rmContext, scheduler);
@@ -183,7 +180,7 @@
RMApp application = new RMAppImpl(applicationId, rmContext,
conf, name, user,
queue, submissionContext, clientTokenStr,
- appStore, scheduler,
+ scheduler,
masterService, System.currentTimeMillis());
testAppStartState(applicationId, user, name, queue, application);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index 572cb2c..5c766c5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -53,7 +53,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@@ -162,7 +161,7 @@
amFinishingMonitor = mock(AMLivelinessMonitor.class);
Configuration conf = new Configuration();
rmContext =
- new RMContextImpl(new MemStore(), rmDispatcher,
+ new RMContextImpl(rmDispatcher,
containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
null, new ApplicationTokenSecretManager(conf),
new RMContainerTokenSecretManager(conf),
@@ -639,6 +638,14 @@
"Killed by user");
}
+
+ @Test
+ public void testNoTrackingUrl() {
+ Container amContainer = allocateApplicationAttempt();
+ launchApplicationAttempt(amContainer);
+ runApplicationAttempt(amContainer, "host", 8042, "");
+ }
+
@Test
public void testUnregisterToSuccessfulFinishing() {
Container amContainer = allocateApplicationAttempt();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
index 123b88f..ae628ef 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
@@ -479,7 +479,7 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0_0 =
spy(new FiCaSchedulerApp(appAttemptId_0_0, user_0, queue,
- queue.getActiveUsersManager(), rmContext, null));
+ queue.getActiveUsersManager(), rmContext));
queue.submitApplication(app_0_0, user_0, A);
List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
@@ -498,7 +498,7 @@
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_0_1 =
spy(new FiCaSchedulerApp(appAttemptId_0_1, user_0, queue,
- queue.getActiveUsersManager(), rmContext, null));
+ queue.getActiveUsersManager(), rmContext));
queue.submitApplication(app_0_1, user_0, A);
List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
@@ -517,7 +517,7 @@
TestUtils.getMockApplicationAttemptId(2, 0);
FiCaSchedulerApp app_1_0 =
spy(new FiCaSchedulerApp(appAttemptId_1_0, user_1, queue,
- queue.getActiveUsersManager(), rmContext, null));
+ queue.getActiveUsersManager(), rmContext));
queue.submitApplication(app_1_0, user_1, A);
List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index 23bbebc..b4cc6b3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -40,7 +40,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.Task;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -76,7 +76,7 @@
@Before
public void setUp() throws Exception {
- Store store = StoreFactory.getStore(new Configuration());
+ RMStateStore store = StoreFactory.getStore(new Configuration());
resourceManager = new ResourceManager(store);
CapacitySchedulerConfiguration csConf
= new CapacitySchedulerConfiguration();
@@ -251,7 +251,7 @@
CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
setupQueueConfiguration(conf);
cs.setConf(new YarnConfiguration());
- cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+ cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
null, new RMContainerTokenSecretManager(conf),
new ClientToAMTokenSecretManagerInRM()));
checkQueueCapacities(cs, A_CAPACITY, B_CAPACITY);
@@ -349,7 +349,7 @@
conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".a.a1.b1", 100.0f);
conf.setUserLimitFactor(CapacitySchedulerConfiguration.ROOT + ".a.a1.b1", 100.0f);
- cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+ cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
null, new RMContainerTokenSecretManager(conf),
new ClientToAMTokenSecretManagerInRM()));
}
@@ -361,7 +361,7 @@
setupQueueConfiguration(csConf);
CapacityScheduler cs = new CapacityScheduler();
cs.setConf(new YarnConfiguration());
- cs.reinitialize(csConf, new RMContextImpl(null, null, null, null, null,
+ cs.reinitialize(csConf, new RMContextImpl(null, null, null, null,
null, null, new RMContainerTokenSecretManager(csConf),
new ClientToAMTokenSecretManagerInRM()));
@@ -387,7 +387,7 @@
CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
setupQueueConfiguration(conf);
cs.setConf(new YarnConfiguration());
- cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+ cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
null, new RMContainerTokenSecretManager(conf),
new ClientToAMTokenSecretManagerInRM()));
checkQueueCapacities(cs, A_CAPACITY, B_CAPACITY);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
index 868c2bb..81c9f5d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -250,14 +250,14 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_0, user_0, B);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_1, user_0, B); // same user
@@ -295,14 +295,14 @@
final ApplicationAttemptId appAttemptId_0 = TestUtils
.getMockApplicationAttemptId(0, 1);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_d, d, null,
- rmContext, null);
+ rmContext);
d.submitApplication(app_0, user_d, D);
// Attempt the same application again
final ApplicationAttemptId appAttemptId_1 = TestUtils
.getMockApplicationAttemptId(0, 2);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_d, d, null,
- rmContext, null);
+ rmContext);
d.submitApplication(app_1, user_d, D); // same user
}
@@ -320,7 +320,7 @@
final ApplicationAttemptId appAttemptId_0 = TestUtils
.getMockApplicationAttemptId(0, 1);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, null,
- rmContext, null);
+ rmContext);
a.submitApplication(app_0, user_0, B);
when(cs.getApplication(appAttemptId_0)).thenReturn(app_0);
@@ -335,7 +335,7 @@
final ApplicationAttemptId appAttemptId_1 = TestUtils
.getMockApplicationAttemptId(0, 2);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null,
- rmContext, null);
+ rmContext);
a.submitApplication(app_1, user_0, B); // same user
assertEquals(1, a.getMetrics().getAppsSubmitted());
@@ -371,14 +371,14 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_1, user_0, A); // same user
@@ -495,21 +495,21 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_0, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_1, user_0, A); // same user
final ApplicationAttemptId appAttemptId_2 =
TestUtils.getMockApplicationAttemptId(2, 0);
FiCaSchedulerApp app_2 =
new FiCaSchedulerApp(appAttemptId_2, user_1, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_2, user_1, A);
// Setup some nodes
@@ -588,21 +588,21 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_0, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_1, user_0, A); // same user
final ApplicationAttemptId appAttemptId_2 =
TestUtils.getMockApplicationAttemptId(2, 0);
FiCaSchedulerApp app_2 =
new FiCaSchedulerApp(appAttemptId_2, user_1, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_2, user_1, A);
// Setup some nodes
@@ -699,28 +699,28 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_0, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_1, user_0, A); // same user
final ApplicationAttemptId appAttemptId_2 =
TestUtils.getMockApplicationAttemptId(2, 0);
FiCaSchedulerApp app_2 =
new FiCaSchedulerApp(appAttemptId_2, user_1, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_2, user_1, A);
final ApplicationAttemptId appAttemptId_3 =
TestUtils.getMockApplicationAttemptId(3, 0);
FiCaSchedulerApp app_3 =
new FiCaSchedulerApp(appAttemptId_3, user_2, a,
- a.getActiveUsersManager(), rmContext, null);
+ a.getActiveUsersManager(), rmContext);
a.submitApplication(app_3, user_2, A);
// Setup some nodes
@@ -874,14 +874,14 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_1, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_1, user_1, A);
// Setup some nodes
@@ -973,14 +973,14 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_1, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_1, user_1, A);
// Setup some nodes
@@ -1072,14 +1072,14 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_0, user_0, A);
final ApplicationAttemptId appAttemptId_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 =
new FiCaSchedulerApp(appAttemptId_1, user_1, a,
- mock(ActiveUsersManager.class), rmContext, null);
+ mock(ActiveUsersManager.class), rmContext);
a.submitApplication(app_1, user_1, A);
// Setup some nodes
@@ -1187,7 +1187,7 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null));
+ mock(ActiveUsersManager.class), rmContext));
a.submitApplication(app_0, user_0, A);
// Setup some nodes and racks
@@ -1327,7 +1327,7 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null));
+ mock(ActiveUsersManager.class), rmContext));
a.submitApplication(app_0, user_0, A);
// Setup some nodes and racks
@@ -1457,7 +1457,7 @@
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 =
spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a,
- mock(ActiveUsersManager.class), rmContext, null));
+ mock(ActiveUsersManager.class), rmContext));
a.submitApplication(app_0, user_0, A);
// Setup some nodes and racks
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
index e529815..c31fa7b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
@@ -43,7 +43,7 @@
CapacityScheduler capacityScheduler = new CapacityScheduler();
capacityScheduler.setConf(conf);
- capacityScheduler.reinitialize(conf, new RMContextImpl(null, null, null,
+ capacityScheduler.reinitialize(conf, new RMContextImpl(null, null,
null, null, null, null, new RMContainerTokenSecretManager(conf),
new ClientToAMTokenSecretManagerInRM()));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
index 3e20f97..df53d85 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
@@ -83,7 +83,7 @@
Configuration conf = new Configuration();
RMContext rmContext =
- new RMContextImpl(null, nullDispatcher, cae, null, null, null,
+ new RMContextImpl(nullDispatcher, cae, null, null, null,
new ApplicationTokenSecretManager(conf),
new RMContainerTokenSecretManager(conf),
new ClientToAMTokenSecretManagerInRM());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java
index 11f96f4..e75b62d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java
@@ -108,8 +108,5 @@
}
@Override
- public void redistributeShare() {}
-
- @Override
public void updateDemand() {}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java
new file mode 100644
index 0000000..cbad156
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
+import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+public class TestFSLeafQueue {
+ private FSLeafQueue schedulable = null;
+ private Resource maxResource = Resources.createResource(10);
+
+ @Before
+ public void setup() throws IOException {
+ FairScheduler scheduler = new FairScheduler();
+ Configuration conf = createConfiguration();
+ // All tests assume only one assignment per node update
+ conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");
+ RMStateStore store = StoreFactory.getStore(conf);
+ ResourceManager resourceManager = new ResourceManager(store);
+ resourceManager.init(conf);
+ ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
+ scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+ String queueName = "root.queue1";
+ QueueManager mockMgr = mock(QueueManager.class);
+ when(mockMgr.getMaxResources(queueName)).thenReturn(maxResource);
+
+ schedulable = new FSLeafQueue(queueName, mockMgr, scheduler, null);
+ }
+
+ @Test
+ public void testUpdateDemand() {
+ AppSchedulable app = mock(AppSchedulable.class);
+ Mockito.when(app.getDemand()).thenReturn(maxResource);
+
+ schedulable.addAppSchedulable(app);
+ schedulable.addAppSchedulable(app);
+
+ schedulable.updateDemand();
+
+ assertTrue("Demand is greater than max allowed ",
+ Resources.equals(schedulable.getDemand(), maxResource));
+ }
+
+ private Configuration createConfiguration() {
+ Configuration conf = new YarnConfiguration();
+ conf.setClass(YarnConfiguration.RM_SCHEDULER, FairScheduler.class,
+ ResourceScheduler.class);
+ return conf;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSQueueSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSQueueSchedulable.java
deleted file mode 100644
index 0fc7479..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSQueueSchedulable.java
+++ /dev/null
@@ -1,42 +0,0 @@
-package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
-
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
-import org.junit.Before;
-import org.junit.Test;
-import org.mockito.Mockito;
-
-public class TestFSQueueSchedulable {
- private FSQueueSchedulable schedulable = null;
- private Resource maxResource = Resources.createResource(10);
-
- @Before
- public void setup() {
- String queueName = "testFSQueue";
- FSQueue mockQueue = mock(FSQueue.class);
- when(mockQueue.getName()).thenReturn(queueName);
-
- QueueManager mockMgr = mock(QueueManager.class);
- when(mockMgr.getMaxResources(queueName)).thenReturn(maxResource);
-
- schedulable = new FSQueueSchedulable(null, mockQueue, mockMgr, null, 0, 0);
- }
-
- @Test
- public void testUpdateDemand() {
- AppSchedulable app = mock(AppSchedulable.class);
- Mockito.when(app.getDemand()).thenReturn(maxResource);
-
- schedulable.addApp(app);
- schedulable.addApp(app);
-
- schedulable.updateDemand();
-
- assertTrue("Demand is greater than max allowed ",
- Resources.equals(schedulable.getDemand(), maxResource));
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java
index f6ed364..62a1b9b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java
@@ -53,7 +53,7 @@
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
FSSchedulerApp schedulerApp =
- new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null, null);
+ new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null);
// Default level should be node-local
assertEquals(NodeType.NODE_LOCAL, schedulerApp.getAllowedLocalityLevel(
@@ -111,7 +111,7 @@
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
FSSchedulerApp schedulerApp =
- new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null, null);
+ new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null);
assertEquals(NodeType.OFF_SWITCH, schedulerApp.getAllowedLocalityLevel(
prio, 10, -1.0, -1.0));
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
index 620d0fb..56d247f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
@@ -31,6 +31,10 @@
import java.util.List;
import java.util.Map;
+import javax.xml.parsers.ParserConfigurationException;
+
+import junit.framework.Assert;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.authorize.AccessControlList;
import org.apache.hadoop.yarn.Clock;
@@ -47,7 +51,7 @@
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
@@ -61,6 +65,7 @@
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import org.xml.sax.SAXException;
public class TestFairScheduler {
@@ -97,7 +102,7 @@
Configuration conf = createConfiguration();
// All tests assume only one assignment per node update
conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");
- Store store = StoreFactory.getStore(conf);
+ RMStateStore store = StoreFactory.getStore(conf);
resourceManager = new ResourceManager(store);
resourceManager.init(conf);
((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
@@ -195,15 +200,64 @@
scheduler.update();
- Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+ Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
assertEquals(3, queues.size());
- for (FSQueue p : queues) {
- if (p.getName() != "default") {
- assertEquals(5120, p.getQueueSchedulable().getFairShare().getMemory());
+ for (FSLeafQueue p : queues) {
+ if (!p.getName().equals("root.default")) {
+ assertEquals(5120, p.getFairShare().getMemory());
}
}
}
+
+ @Test
+ public void testSimpleHierarchicalFairShareCalculation() {
+ // Add one big node (only care about aggregate capacity)
+ int capacity = 10 * 24;
+ RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(capacity));
+ NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
+ scheduler.handle(nodeEvent1);
+
+ // Have two queues which want entire cluster capacity
+ createSchedulingRequest(10 * 1024, "queue1", "user1");
+ createSchedulingRequest(10 * 1024, "parent.queue2", "user1");
+ createSchedulingRequest(10 * 1024, "parent.queue3", "user1");
+
+ scheduler.update();
+
+ QueueManager queueManager = scheduler.getQueueManager();
+ Collection<FSLeafQueue> queues = queueManager.getLeafQueues();
+ assertEquals(4, queues.size());
+
+ FSLeafQueue queue1 = queueManager.getLeafQueue("queue1");
+ FSLeafQueue queue2 = queueManager.getLeafQueue("parent.queue2");
+ FSLeafQueue queue3 = queueManager.getLeafQueue("parent.queue3");
+ assertEquals(capacity / 2, queue1.getFairShare().getMemory());
+ assertEquals(capacity / 4, queue2.getFairShare().getMemory());
+ assertEquals(capacity / 4, queue3.getFairShare().getMemory());
+ }
+
+ @Test
+ public void testHierarchicalQueuesSimilarParents() {
+ QueueManager queueManager = scheduler.getQueueManager();
+ FSLeafQueue leafQueue = queueManager.getLeafQueue("parent.child");
+ Assert.assertEquals(2, queueManager.getLeafQueues().size());
+ Assert.assertNotNull(leafQueue);
+ Assert.assertEquals("root.parent.child", leafQueue.getName());
+
+ FSLeafQueue leafQueue2 = queueManager.getLeafQueue("parent");
+ Assert.assertNull(leafQueue2);
+ Assert.assertEquals(2, queueManager.getLeafQueues().size());
+
+ FSLeafQueue leafQueue3 = queueManager.getLeafQueue("parent.child.grandchild");
+ Assert.assertNull(leafQueue3);
+ Assert.assertEquals(2, queueManager.getLeafQueues().size());
+
+ FSLeafQueue leafQueue4 = queueManager.getLeafQueue("parent.sister");
+ Assert.assertNotNull(leafQueue4);
+ Assert.assertEquals("root.parent.sister", leafQueue4.getName());
+ Assert.assertEquals(3, queueManager.getLeafQueues().size());
+ }
@Test
public void testSimpleContainerAllocation() {
@@ -228,14 +282,14 @@
// Asked for less than min_allocation.
assertEquals(YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
scheduler.getQueueManager().getQueue("queue1").
- getQueueSchedulable().getResourceUsage().getMemory());
+ getResourceUsage().getMemory());
NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2,
new ArrayList<ContainerStatus>(), new ArrayList<ContainerStatus>());
scheduler.handle(updateEvent2);
assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").
- getQueueSchedulable().getResourceUsage().getMemory());
+ getResourceUsage().getMemory());
}
@Test
@@ -254,7 +308,7 @@
// Make sure queue 1 is allocated app capacity
assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").
- getQueueSchedulable().getResourceUsage().getMemory());
+ getResourceUsage().getMemory());
// Now queue 2 requests likewise
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue2", "user1", 1);
@@ -263,7 +317,7 @@
// Make sure queue 2 is waiting with a reservation
assertEquals(0, scheduler.getQueueManager().getQueue("queue2").
- getQueueSchedulable().getResourceUsage().getMemory());
+ getResourceUsage().getMemory());
assertEquals(1024, scheduler.applications.get(attId).getCurrentReservation().getMemory());
// Now another node checks in with capacity
@@ -276,7 +330,7 @@
// Make sure this goes to queue 2
assertEquals(1024, scheduler.getQueueManager().getQueue("queue2").
- getQueueSchedulable().getResourceUsage().getMemory());
+ getResourceUsage().getMemory());
// The old reservation should still be there...
assertEquals(1024, scheduler.applications.get(attId).getCurrentReservation().getMemory());
@@ -294,17 +348,22 @@
AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(
createAppAttemptId(1, 1), "default", "user1");
scheduler.handle(appAddedEvent);
- assertEquals(1, scheduler.getQueueManager().getQueue("user1").getApplications().size());
- assertEquals(0, scheduler.getQueueManager().getQueue("default").getApplications().size());
+ assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1")
+ .getAppSchedulables().size());
+ assertEquals(0, scheduler.getQueueManager().getLeafQueue("default")
+ .getAppSchedulables().size());
conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "false");
scheduler.reinitialize(conf, resourceManager.getRMContext());
AppAddedSchedulerEvent appAddedEvent2 = new AppAddedSchedulerEvent(
createAppAttemptId(2, 1), "default", "user2");
scheduler.handle(appAddedEvent2);
- assertEquals(1, scheduler.getQueueManager().getQueue("user1").getApplications().size());
- assertEquals(1, scheduler.getQueueManager().getQueue("default").getApplications().size());
- assertEquals(0, scheduler.getQueueManager().getQueue("user2").getApplications().size());
+ assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1")
+ .getAppSchedulables().size());
+ assertEquals(1, scheduler.getQueueManager().getLeafQueue("default")
+ .getAppSchedulables().size());
+ assertEquals(0, scheduler.getQueueManager().getLeafQueue("user2")
+ .getAppSchedulables().size());
}
@Test
@@ -338,18 +397,17 @@
scheduler.update();
- Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+ Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
assertEquals(3, queues.size());
- for (FSQueue p : queues) {
- if (p.getName().equals("queueA")) {
- assertEquals(1024, p.getQueueSchedulable().getFairShare().getMemory());
+ for (FSLeafQueue p : queues) {
+ if (p.getName().equals("root.queueA")) {
+ assertEquals(1024, p.getFairShare().getMemory());
}
- else if (p.getName().equals("queueB")) {
- assertEquals(2048, p.getQueueSchedulable().getFairShare().getMemory());
+ else if (p.getName().equals("root.queueB")) {
+ assertEquals(2048, p.getFairShare().getMemory());
}
}
-
}
/**
@@ -358,11 +416,11 @@
@Test
public void testQueueDemandCalculation() throws Exception {
ApplicationAttemptId id11 = createAppAttemptId(1, 1);
- scheduler.addApplication(id11, "queue1", "user1");
+ scheduler.addApplication(id11, "root.queue1", "user1");
ApplicationAttemptId id21 = createAppAttemptId(2, 1);
- scheduler.addApplication(id21, "queue2", "user1");
+ scheduler.addApplication(id21, "root.queue2", "user1");
ApplicationAttemptId id22 = createAppAttemptId(2, 2);
- scheduler.addApplication(id22, "queue2", "user1");
+ scheduler.addApplication(id22, "root.queue2", "user1");
int minReqSize = YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB;
@@ -388,10 +446,10 @@
scheduler.update();
- assertEquals(2 * minReqSize, scheduler.getQueueManager().getQueue("queue1")
- .getQueueSchedulable().getDemand().getMemory());
+ assertEquals(2 * minReqSize, scheduler.getQueueManager().getQueue("root.queue1")
+ .getDemand().getMemory());
assertEquals(2 * minReqSize + 2 * minReqSize + (2 * minReqSize), scheduler
- .getQueueManager().getQueue("queue2").getQueueSchedulable().getDemand()
+ .getQueueManager().getQueue("root.queue2").getDemand()
.getMemory());
}
@@ -402,10 +460,11 @@
scheduler.handle(appAddedEvent1);
// Scheduler should have two queues (the default and the one created for user1)
- assertEquals(2, scheduler.getQueueManager().getQueues().size());
+ assertEquals(2, scheduler.getQueueManager().getLeafQueues().size());
// That queue should have one app
- assertEquals(1, scheduler.getQueueManager().getQueue("user1").getApplications().size());
+ assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1")
+ .getAppSchedulables().size());
AppRemovedSchedulerEvent appRemovedEvent1 = new AppRemovedSchedulerEvent(
createAppAttemptId(1, 1), RMAppAttemptState.FINISHED);
@@ -414,7 +473,8 @@
scheduler.handle(appRemovedEvent1);
// Queue should have no apps
- assertEquals(0, scheduler.getQueueManager().getQueue("user1").getApplications().size());
+ assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1")
+ .getAppSchedulables().size());
}
@Test
@@ -466,61 +526,99 @@
QueueManager queueManager = scheduler.getQueueManager();
queueManager.initialize();
- assertEquals(6, queueManager.getQueues().size()); // 5 in file + default queue
+ assertEquals(6, queueManager.getLeafQueues().size()); // 5 in file + default queue
assertEquals(Resources.createResource(0),
- queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+ queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+ queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
assertEquals(Resources.createResource(1024),
- queueManager.getMinResources("queueA"));
+ queueManager.getMinResources("root.queueA"));
assertEquals(Resources.createResource(2048),
- queueManager.getMinResources("queueB"));
+ queueManager.getMinResources("root.queueB"));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources("queueC"));
+ queueManager.getMinResources("root.queueC"));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources("queueD"));
+ queueManager.getMinResources("root.queueD"));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources("queueE"));
+ queueManager.getMinResources("root.queueE"));
- assertEquals(15, queueManager.getQueueMaxApps(YarnConfiguration.DEFAULT_QUEUE_NAME));
- assertEquals(15, queueManager.getQueueMaxApps("queueA"));
- assertEquals(15, queueManager.getQueueMaxApps("queueB"));
- assertEquals(15, queueManager.getQueueMaxApps("queueC"));
- assertEquals(3, queueManager.getQueueMaxApps("queueD"));
- assertEquals(15, queueManager.getQueueMaxApps("queueE"));
+ assertEquals(15, queueManager.getQueueMaxApps("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueA"));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueB"));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueC"));
+ assertEquals(3, queueManager.getQueueMaxApps("root.queueD"));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueE"));
assertEquals(10, queueManager.getUserMaxApps("user1"));
assertEquals(5, queueManager.getUserMaxApps("user2"));
// Unspecified queues should get default ACL
- Map<QueueACL, AccessControlList> aclsA = queueManager.getQueueAcls("queueA");
+ Map<QueueACL, AccessControlList> aclsA = queueManager.getQueueAcls("root.queueA");
assertTrue(aclsA.containsKey(QueueACL.ADMINISTER_QUEUE));
assertEquals("*", aclsA.get(QueueACL.ADMINISTER_QUEUE).getAclString());
assertTrue(aclsA.containsKey(QueueACL.SUBMIT_APPLICATIONS));
assertEquals("*", aclsA.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
// Queue B ACL
- Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("queueB");
+ Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("root.queueB");
assertTrue(aclsB.containsKey(QueueACL.ADMINISTER_QUEUE));
assertEquals("alice,bob admins", aclsB.get(QueueACL.ADMINISTER_QUEUE).getAclString());
// Queue c ACL
- Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("queueC");
+ Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("root.queueC");
assertTrue(aclsC.containsKey(QueueACL.SUBMIT_APPLICATIONS));
assertEquals("alice,bob admins", aclsC.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout(
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root." +
YarnConfiguration.DEFAULT_QUEUE_NAME));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueB"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueC"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueD"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
- assertEquals(60000, queueManager.getMinSharePreemptionTimeout("queueE"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueB"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueC"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueD"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+ assertEquals(60000, queueManager.getMinSharePreemptionTimeout("root.queueE"));
assertEquals(300000, queueManager.getFairSharePreemptionTimeout());
}
@Test
+ public void testHierarchicalQueueAllocationFileParsing() throws IOException, SAXException,
+ AllocationConfigurationException, ParserConfigurationException {
+ Configuration conf = createConfiguration();
+ conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+ scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+ PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
+ out.println("<?xml version=\"1.0\"?>");
+ out.println("<allocations>");
+ out.println("<queue name=\"queueA\">");
+ out.println("<minResources>2048</minResources>");
+ out.println("</queue>");
+ out.println("<queue name=\"queueB\">");
+ out.println("<minResources>2048</minResources>");
+ out.println("<queue name=\"queueC\">");
+ out.println("<minResources>2048</minResources>");
+ out.println("</queue>");
+ out.println("<queue name=\"queueD\">");
+ out.println("<minResources>2048</minResources>");
+ out.println("</queue>");
+ out.println("</queue>");
+ out.println("</allocations>");
+ out.close();
+
+ QueueManager queueManager = scheduler.getQueueManager();
+ queueManager.initialize();
+
+ Collection<FSLeafQueue> leafQueues = queueManager.getLeafQueues();
+ Assert.assertEquals(4, leafQueues.size());
+ Assert.assertNotNull(queueManager.getLeafQueue("queueA"));
+ Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueC"));
+ Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueD"));
+ Assert.assertNotNull(queueManager.getLeafQueue("default"));
+ // Make sure querying for queues didn't create any new ones:
+ Assert.assertEquals(4, leafQueues.size());
+ }
+
+ @Test
public void testBackwardsCompatibleAllocationFileParsing() throws Exception {
Configuration conf = createConfiguration();
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
@@ -569,29 +667,29 @@
QueueManager queueManager = scheduler.getQueueManager();
queueManager.initialize();
- assertEquals(6, queueManager.getQueues().size()); // 5 in file + default queue
+ assertEquals(6, queueManager.getLeafQueues().size()); // 5 in file + default queue
assertEquals(Resources.createResource(0),
- queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+ queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+ queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
assertEquals(Resources.createResource(1024),
- queueManager.getMinResources("queueA"));
+ queueManager.getMinResources("root.queueA"));
assertEquals(Resources.createResource(2048),
- queueManager.getMinResources("queueB"));
+ queueManager.getMinResources("root.queueB"));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources("queueC"));
+ queueManager.getMinResources("root.queueC"));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources("queueD"));
+ queueManager.getMinResources("root.queueD"));
assertEquals(Resources.createResource(0),
- queueManager.getMinResources("queueE"));
+ queueManager.getMinResources("root.queueE"));
- assertEquals(15, queueManager.getQueueMaxApps(YarnConfiguration.DEFAULT_QUEUE_NAME));
- assertEquals(15, queueManager.getQueueMaxApps("queueA"));
- assertEquals(15, queueManager.getQueueMaxApps("queueB"));
- assertEquals(15, queueManager.getQueueMaxApps("queueC"));
- assertEquals(3, queueManager.getQueueMaxApps("queueD"));
- assertEquals(15, queueManager.getQueueMaxApps("queueE"));
+ assertEquals(15, queueManager.getQueueMaxApps("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueA"));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueB"));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueC"));
+ assertEquals(3, queueManager.getQueueMaxApps("root.queueD"));
+ assertEquals(15, queueManager.getQueueMaxApps("root.queueE"));
assertEquals(10, queueManager.getUserMaxApps("user1"));
assertEquals(5, queueManager.getUserMaxApps("user2"));
@@ -603,23 +701,23 @@
assertEquals("*", aclsA.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
// Queue B ACL
- Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("queueB");
+ Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("root.queueB");
assertTrue(aclsB.containsKey(QueueACL.ADMINISTER_QUEUE));
assertEquals("alice,bob admins", aclsB.get(QueueACL.ADMINISTER_QUEUE).getAclString());
// Queue c ACL
- Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("queueC");
+ Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("root.queueC");
assertTrue(aclsC.containsKey(QueueACL.SUBMIT_APPLICATIONS));
assertEquals("alice,bob admins", aclsC.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout(
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root." +
YarnConfiguration.DEFAULT_QUEUE_NAME));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueB"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueC"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueD"));
- assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
- assertEquals(60000, queueManager.getMinSharePreemptionTimeout("queueE"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueB"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueC"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueD"));
+ assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+ assertEquals(60000, queueManager.getMinSharePreemptionTimeout("root.queueE"));
assertEquals(300000, queueManager.getFairSharePreemptionTimeout());
}
@@ -659,25 +757,25 @@
// Queue B arrives and wants 1 * 1024
createSchedulingRequest(1 * 1024, "queueB", "user1");
scheduler.update();
- Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+ Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
assertEquals(3, queues.size());
// Queue A should be above min share, B below.
- for (FSQueue p : queues) {
- if (p.getName().equals("queueA")) {
- assertEquals(false, scheduler.isStarvedForMinShare(p.getQueueSchedulable()));
+ for (FSLeafQueue p : queues) {
+ if (p.getName().equals("root.queueA")) {
+ assertEquals(false, scheduler.isStarvedForMinShare(p));
}
- else if (p.getName().equals("queueB")) {
- assertEquals(true, scheduler.isStarvedForMinShare(p.getQueueSchedulable()));
+ else if (p.getName().equals("root.queueB")) {
+ assertEquals(true, scheduler.isStarvedForMinShare(p));
}
}
// Node checks in again, should allocate for B
scheduler.handle(nodeEvent2);
// Now B should have min share ( = demand here)
- for (FSQueue p : queues) {
- if (p.getName().equals("queueB")) {
- assertEquals(false, scheduler.isStarvedForMinShare(p.getQueueSchedulable()));
+ for (FSLeafQueue p : queues) {
+ if (p.getName().equals("root.queueB")) {
+ assertEquals(false, scheduler.isStarvedForMinShare(p));
}
}
}
@@ -718,16 +816,16 @@
// Queue B arrives and wants 1 * 1024
createSchedulingRequest(1 * 1024, "queueB", "user1");
scheduler.update();
- Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+ Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
assertEquals(3, queues.size());
// Queue A should be above fair share, B below.
- for (FSQueue p : queues) {
- if (p.getName().equals("queueA")) {
- assertEquals(false, scheduler.isStarvedForFairShare(p.getQueueSchedulable()));
+ for (FSLeafQueue p : queues) {
+ if (p.getName().equals("root.queueA")) {
+ assertEquals(false, scheduler.isStarvedForFairShare(p));
}
- else if (p.getName().equals("queueB")) {
- assertEquals(true, scheduler.isStarvedForFairShare(p.getQueueSchedulable()));
+ else if (p.getName().equals("root.queueB")) {
+ assertEquals(true, scheduler.isStarvedForFairShare(p));
}
}
@@ -735,9 +833,9 @@
scheduler.handle(nodeEvent2);
// B should not be starved for fair share, since entire demand is
// satisfied.
- for (FSQueue p : queues) {
- if (p.getName().equals("queueB")) {
- assertEquals(false, scheduler.isStarvedForFairShare(p.getQueueSchedulable()));
+ for (FSLeafQueue p : queues) {
+ if (p.getName().equals("root.queueB")) {
+ assertEquals(false, scheduler.isStarvedForFairShare(p));
}
}
}
@@ -845,7 +943,7 @@
// We should be able to claw back one container from A and B each.
// Make sure it is lowest priority container.
- scheduler.preemptResources(scheduler.getQueueSchedulables(),
+ scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
Resources.createResource(2 * 1024));
assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size());
assertEquals(1, scheduler.applications.get(app2).getLiveContainers().size());
@@ -856,7 +954,7 @@
// We should be able to claw back another container from A and B each.
// Make sure it is lowest priority container.
- scheduler.preemptResources(scheduler.getQueueSchedulables(),
+ scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
Resources.createResource(2 * 1024));
assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size());
assertEquals(0, scheduler.applications.get(app2).getLiveContainers().size());
@@ -866,7 +964,7 @@
assertEquals(0, scheduler.applications.get(app6).getLiveContainers().size());
// Now A and B are below fair share, so preemption shouldn't do anything
- scheduler.preemptResources(scheduler.getQueueSchedulables(),
+ scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
Resources.createResource(2 * 1024));
assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size());
assertEquals(0, scheduler.applications.get(app2).getLiveContainers().size());
@@ -977,10 +1075,10 @@
scheduler.update();
- FSQueueSchedulable schedC =
- scheduler.getQueueManager().getQueue("queueC").getQueueSchedulable();
- FSQueueSchedulable schedD =
- scheduler.getQueueManager().getQueue("queueD").getQueueSchedulable();
+ FSLeafQueue schedC =
+ scheduler.getQueueManager().getLeafQueue("queueC");
+ FSLeafQueue schedD =
+ scheduler.getQueueManager().getLeafQueue("queueD");
assertTrue(Resources.equals(
Resources.none(), scheduler.resToPreempt(schedC, clock.getTime())));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java
index c329d9d..78f0145 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java
@@ -27,7 +27,7 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.junit.After;
@@ -50,7 +50,7 @@
// All tests assume only one assignment per node update
conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");
- Store store = StoreFactory.getStore(conf);
+ RMStateStore store = StoreFactory.getStore(conf);
resourceManager = new ResourceManager(store);
resourceManager.init(conf);
((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
index 90a9de1..5325119 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
@@ -38,7 +38,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.Task;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
@@ -59,7 +59,7 @@
@Before
public void setUp() throws Exception {
- Store store = StoreFactory.getStore(new Configuration());
+ RMStateStore store = StoreFactory.getStore(new Configuration());
resourceManager = new ResourceManager(store);
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER,
@@ -91,7 +91,7 @@
@Test
public void testAppAttemptMetrics() throws Exception {
AsyncDispatcher dispatcher = new InlineDispatcher();
- RMContext rmContext = new RMContextImpl(null, dispatcher, null,
+ RMContext rmContext = new RMContextImpl(dispatcher, null,
null, null, null, null, null, null);
FifoScheduler schedular = new FifoScheduler();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java
index 67b2903..b6e5ba3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java
@@ -38,7 +38,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.MockAsm;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -160,7 +159,7 @@
for (RMNode node : deactivatedNodes) {
deactivatedNodesMap.put(node.getHostName(), node);
}
- return new RMContextImpl(new MemStore(), null, null, null, null,
+ return new RMContextImpl(null, null, null, null,
null, null, null, null) {
@Override
public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
@@ -201,7 +200,7 @@
CapacityScheduler cs = new CapacityScheduler();
cs.setConf(new YarnConfiguration());
- cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+ cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
null, new RMContainerTokenSecretManager(conf),
new ClientToAMTokenSecretManagerInRM()));
return cs;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
index 9a7a978..1c4f2cf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
@@ -48,7 +48,7 @@
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
import org.apache.hadoop.yarn.service.AbstractService;
import org.apache.hadoop.yarn.service.CompositeService;
@@ -154,7 +154,7 @@
getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS,
MiniYARNCluster.getHostname() + ":0");
}
- Store store = StoreFactory.getStore(getConfig());
+ RMStateStore store = StoreFactory.getStore(getConfig());
resourceManager = new ResourceManager(store) {
@Override
protected void doSecureLogin() throws IOException {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
index b5da6bb..988c42d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
@@ -53,6 +53,22 @@
capacity between the running apps. queues can also be given weights to share
the cluster non-proportionally in the config file.
+ The fair scheduler supports hierarchical queues. All queues descend from a
+ queue named "root". Available resources are distributed among the children
+ of the root queue in the typical fair scheduling fashion. Then, the children
+ distribute the resources assigned to them to their children in the same
+ fashion. Applications may only be scheduled on leaf queues. Queues can be
+ specified as children of other queues by placing them as sub-elements of
+ their parents in the fair scheduler configuration file.
+
+ A queue's name starts with the names of its parents, with periods as
+ separators. So a queue named "queue1" under the root named, would be
+ referred to as "root.queue1", and a queue named "queue2" under a queue
+ named "parent1" would be referred to as "root.parent1.queue2". When
+ referring to queues, the root part of the name is optional, so queue1 could
+ be referred to as just "queue1", and a queue2 could be referred to as just
+ "parent1.queue2".
+
In addition to providing fair sharing, the Fair Scheduler allows assigning
guaranteed minimum shares to queues, which is useful for ensuring that
certain users, groups or production applications always get sufficient
@@ -163,11 +179,14 @@
<?xml version="1.0"?>
<allocations>
<queue name="sample_queue">
- <minResources>100000</minResources>
- <maxResources>900000</maxResources>
+ <minResources>10000</minResources>
+ <maxResources>90000</maxResources>
<maxRunningApps>50</maxRunningApps>
<weight>2.0</weight>
<schedulingMode>fair</schedulingMode>
+ <queue name="sample_sub_queue">
+ <minResources>5000</minResources>
+ </queue>
</queue>
<user name="sample_user">
<maxRunningApps>30</maxRunningApps>