blob: 8647e84bcf82eb9793836cde1d64b32758c0086b [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
package org.apache.geode.internal.cache.partitioned;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.apache.geode.cache.RegionShortcut.PARTITION_PERSISTENT;
import static org.apache.geode.distributed.ConfigurationProperties.DISABLE_JMX;
import static org.apache.geode.distributed.ConfigurationProperties.ENABLE_NETWORK_PARTITION_DETECTION;
import static org.apache.geode.test.awaitility.GeodeAwaitility.getTimeout;
import static org.apache.geode.test.dunit.IgnoredException.addIgnoredException;
import static org.apache.geode.test.dunit.VM.addVMEventListener;
import static org.apache.geode.test.dunit.VM.getAllVMs;
import static org.apache.geode.test.dunit.VM.getController;
import static org.apache.geode.test.dunit.VM.getVM;
import static org.apache.geode.test.dunit.VM.removeVMEventListener;
import static org.apache.geode.test.dunit.VM.toArray;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.apache.geode.CancelException;
import org.apache.geode.cache.PartitionAttributesFactory;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.RegionFactory;
import org.apache.geode.cache.persistence.PartitionOfflineException;
import org.apache.geode.distributed.internal.ClusterDistributionManager;
import org.apache.geode.distributed.internal.DistributionMessage;
import org.apache.geode.distributed.internal.DistributionMessageObserver;
import org.apache.geode.internal.cache.InitialImageOperation.RequestImageMessage;
import org.apache.geode.internal.cache.InternalCache;
import org.apache.geode.internal.cache.control.InternalResourceManager;
import org.apache.geode.internal.cache.control.InternalResourceManager.ResourceObserver;
import org.apache.geode.internal.cache.control.InternalResourceManager.ResourceObserverAdapter;
import org.apache.geode.test.dunit.AsyncInvocation;
import org.apache.geode.test.dunit.IgnoredException;
import org.apache.geode.test.dunit.VM;
import org.apache.geode.test.dunit.VMEventListener;
import org.apache.geode.test.dunit.rules.CacheRule;
import org.apache.geode.test.dunit.rules.DistributedDiskDirRule;
import org.apache.geode.test.dunit.rules.DistributedRule;
import org.apache.geode.test.junit.rules.serializable.SerializableTestName;
* Extracted from {@link PersistentPartitionedRegionRegressionTest}.
public class PersistentPartitionHangsDuringRestartRegressionTest implements Serializable {
private static final long TIMEOUT_MILLIS = getTimeout().getValueInMS();
/** WAIT_TO_BOUNCE latch is never counted down -- it prevents responding to RequestImageMessage */
private static final CountDownLatch WAIT_TO_BOUNCE = new CountDownLatch(99);
private static volatile CountDownLatch beforeBounceLatch;
private static volatile CountDownLatch afterBounceLatch;
private final transient VMEventListener vmEventListener = new VMEventListener() {
public void afterBounceVM(VM vm) {
private String partitionedRegionName;
private VM vmController;
private VM vm0;
private VM vm1;
public DistributedRule distributedRule = new DistributedRule();
public CacheRule cacheRule = new CacheRule();
public SerializableTestName testName = new SerializableTestName();
public DistributedDiskDirRule diskDirRule = new DistributedDiskDirRule();
public void setUp() {
beforeBounceLatch = new CountDownLatch(1);
afterBounceLatch = new CountDownLatch(1);
vmController = getController();
vm0 = getVM(0);
vm1 = getVM(1);
String uniqueName = getClass().getSimpleName() + "-" + testName.getMethodName();
partitionedRegionName = uniqueName + "-partitionedRegion";
public void tearDown() {
for (VM vm : toArray(getAllVMs(), getController())) {
vm.invoke(() -> {
* RegressionTest for bug 42226. <br>
* 1. Member A has the bucket <br>
* 2. Member B starts creating the bucket. It tells member A that it hosts the bucket <br>
* 3. Member A crashes <br>
* 4. Member B destroys the bucket and throws a partition offline exception, because it wasn't
* able to complete initialization. <br>
* 5. Member A recovers, and gets stuck waiting for member B.
* <p>
* TRAC 42226: recycled VM hangs during re-start while waiting for Partition to come online (after
* Controller VM sees unexpected PartitionOffLineException while doing ops)
public void doesNotWaitForPreviousInstanceOfOnlineServer() throws Exception {
vm0.invoke(() -> {
createPartitionedRegion(1, 0, 1, true);
// Make sure we create a bucket
createData(0, 1, "a", partitionedRegionName);
vm0.invoke(() -> {
// notify controller and then wait to bounce
DistributionMessageObserver.setInstance(new WaitToBounceWhenImageRequested());
try (IgnoredException ie = addIgnoredException(PartitionOfflineException.class)) {
// This should recover redundancy, which should cause vm0 to bounce/disconnect
AsyncInvocation createPRAsync = vm1.invokeAsync(() -> createPartitionedRegion(1, 0, 1, true));
beforeBounceLatch.await(TIMEOUT_MILLIS, MILLISECONDS);
// Make sure get a partition offline exception
vm1.invoke(() -> {
assertThatThrownBy(() -> createData(0, 1, "a", partitionedRegionName))
// This should recreate the bucket
vm0.invoke(() -> createPartitionedRegion(1, 0, 1, true));
vm1.invoke(() -> checkData(0, 1, "a", partitionedRegionName));
private void createPartitionedRegion(final int redundancy, final int recoveryDelay,
final int numBuckets, final boolean synchronous) throws InterruptedException {
CountDownLatch recoveryDone = new CountDownLatch(1);
if (redundancy > 0) {
ResourceObserver observer = new ResourceObserverAdapter() {
public void recoveryFinished(Region region) {
} else {
PartitionAttributesFactory<?, ?> partitionAttributesFactory = new PartitionAttributesFactory();
RegionFactory<?, ?> regionFactory = getCache().createRegionFactory(PARTITION_PERSISTENT);
private void createData(final int startKey, final int endKey, final String value,
final String regionName) {
Region<Integer, String> region = getCache().getRegion(regionName);
for (int i = startKey; i < endKey; i++) {
region.put(i, value);
private void checkData(final int startKey, final int endKey, final String value,
final String regionName) {
Region<Integer, String> region = getCache().getRegion(regionName);
for (int i = startKey; i < endKey; i++) {
* Prevent GEODE-6232 by disabling JMX which is not needed in this test.
private InternalCache getCache() {
Properties config = new Properties();
config.setProperty(DISABLE_JMX, "true");
config.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "false");
InternalCache cache = cacheRule.getOrCreateCache(config);
return cache;
private class WaitToBounceWhenImageRequested extends DistributionMessageObserver
implements Serializable {
public void beforeProcessMessage(ClusterDistributionManager dm, DistributionMessage message) {
if (message instanceof RequestImageMessage) {
RequestImageMessage requestImageMessage = (RequestImageMessage) message;
// Don't bounce until we see a bucket
if (requestImageMessage.regionPath.contains("_B_")) {
vmController.invoke(() -> beforeBounceLatch.countDown());
try {
} catch (InterruptedException e) {
throw new RuntimeException(e);