| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hdfs.server.namenode.ha; |
| |
| import java.io.IOException; |
| import java.util.concurrent.TimeoutException; |
| |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.commons.logging.impl.Log4JLogger; |
| import org.apache.hadoop.fs.BlockLocation; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hdfs.DFSConfigKeys; |
| import org.apache.hadoop.hdfs.DFSTestUtil; |
| import org.apache.hadoop.hdfs.MiniDFSCluster; |
| import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; |
| import org.apache.hadoop.ipc.Server; |
| import org.apache.hadoop.test.GenericTestUtils; |
| import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; |
| import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; |
| import org.apache.log4j.Level; |
| import org.junit.Assert; |
| import org.junit.Test; |
| |
| import com.google.common.base.Supplier; |
| |
| |
| /** |
| * Stress-test for potential bugs when replication is changing |
| * on blocks during a failover. |
| */ |
| public class TestDNFencingWithReplication { |
| static { |
| ((Log4JLogger)FSNamesystem.auditLog).getLogger().setLevel(Level.WARN); |
| ((Log4JLogger)Server.LOG).getLogger().setLevel(Level.FATAL); |
| ((Log4JLogger)LogFactory.getLog( |
| "org.apache.hadoop.io.retry.RetryInvocationHandler")) |
| .getLogger().setLevel(Level.FATAL); |
| } |
| |
| private static final int NUM_THREADS = 20; |
| // How long should the test try to run for. In practice |
| // it runs for ~20-30s longer than this constant due to startup/ |
| // shutdown time. |
| private static final long RUNTIME = 35000; |
| private static final int BLOCK_SIZE = 1024; |
| |
| private static class ReplicationToggler extends RepeatingTestThread { |
| private final FileSystem fs; |
| private final Path path; |
| |
| public ReplicationToggler(TestContext ctx, FileSystem fs, Path p) { |
| super(ctx); |
| this.fs = fs; |
| this.path = p; |
| } |
| |
| @Override |
| public void doAnAction() throws Exception { |
| fs.setReplication(path, (short)1); |
| waitForReplicas(1); |
| fs.setReplication(path, (short)2); |
| waitForReplicas(2); |
| } |
| |
| private void waitForReplicas(final int replicas) throws Exception { |
| try { |
| GenericTestUtils.waitFor(new Supplier<Boolean>() { |
| @Override |
| public Boolean get() { |
| try { |
| BlockLocation[] blocks = fs.getFileBlockLocations(path, 0, 10); |
| Assert.assertEquals(1, blocks.length); |
| return blocks[0].getHosts().length == replicas; |
| } catch (IOException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }, 100, 60000); |
| } catch (TimeoutException te) { |
| throw new IOException("Timed out waiting for " + replicas + " replicas " + |
| "on path " + path); |
| } |
| } |
| |
| @Override |
| public String toString() { |
| return "Toggler for " + path; |
| } |
| } |
| |
| @Test |
| public void testFencingStress() throws Exception { |
| HAStressTestHarness harness = new HAStressTestHarness(); |
| harness.conf.setInt( |
| DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000); |
| |
| final MiniDFSCluster cluster = harness.startCluster(); |
| try { |
| cluster.waitActive(); |
| cluster.transitionToActive(0); |
| |
| FileSystem fs = harness.getFailoverFs(); |
| TestContext togglers = new TestContext(); |
| for (int i = 0; i < NUM_THREADS; i++) { |
| Path p = new Path("/test-" + i); |
| DFSTestUtil.createFile(fs, p, BLOCK_SIZE*10, (short)3, (long)i); |
| togglers.addThread(new ReplicationToggler(togglers, fs, p)); |
| } |
| |
| // Start a separate thread which will make sure that replication |
| // happens quickly by triggering deletion reports and replication |
| // work calculation frequently. |
| harness.addReplicationTriggerThread(500); |
| harness.addFailoverThread(5000); |
| harness.startThreads(); |
| togglers.startThreads(); |
| |
| togglers.waitFor(RUNTIME); |
| togglers.stop(); |
| harness.stopThreads(); |
| |
| // CHeck that the files can be read without throwing |
| for (int i = 0; i < NUM_THREADS; i++) { |
| Path p = new Path("/test-" + i); |
| DFSTestUtil.readFile(fs, p); |
| } |
| } finally { |
| System.err.println("===========================\n\n\n\n"); |
| harness.shutdown(); |
| } |
| |
| } |
| } |