blob: f3887e2f8ffff46ff35016cba42beb402d54544e [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
package org.apache.geode.internal.cache;
import static org.apache.geode.distributed.ConfigurationProperties.LOCATORS;
import static org.apache.geode.distributed.ConfigurationProperties.MCAST_PORT;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.rules.TestName;
import org.apache.geode.cache.Cache;
import org.apache.geode.cache.CacheFactory;
import org.apache.geode.cache.DiskStore;
import org.apache.geode.cache.DiskStoreFactory;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.RegionFactory;
import org.apache.geode.cache.RegionShortcut;
* This is a bugtest for bug 37500.
* <p>
* TRAC #37500: A DiskAccessException brings cache server to a stall
* <pre>
* com.gemstone.gemfire.cache.DiskAccessException: Unable to get free space for creating an oplog after waiting for 20 seconds
* at com.gemstone.gemfire.internal.cache.ComplexDiskRegion.getNextDir(
* at com.gemstone.gemfire.internal.cache.Oplog.switchOpLog(
* at com.gemstone.gemfire.internal.cache.Oplog.basicModify(
* at com.gemstone.gemfire.internal.cache.Oplog.modify(
* at com.gemstone.gemfire.internal.cache.DiskRegion.put(
* at com.gemstone.gemfire.internal.cache.DiskEntry$Helper.writeToDisk(
* at com.gemstone.gemfire.internal.cache.DiskEntry$Helper.overflowToDisk(
* at com.gemstone.gemfire.internal.cache.AbstractLRURegionMap.evictEntry(
* at com.gemstone.gemfire.internal.cache.AbstractLRURegionMap.lruUpdateCallback(
* at com.gemstone.gemfire.internal.cache.AbstractRegionMap.basicPut(
* at com.gemstone.gemfire.internal.cache.LocalRegion.virtualPut(
* at com.gemstone.gemfire.internal.cache.DistributedRegion.virtualPut(
* at com.gemstone.gemfire.internal.cache.LocalRegion.basicUpdate(
* at com.gemstone.gemfire.internal.cache.AbstractUpdateOperation.doPutOrCreate(
* at com.gemstone.gemfire.internal.cache.AbstractUpdateOperation$AbstractUpdateMessage.basicOperateOnRegion(
* at com.gemstone.gemfire.internal.cache.AbstractUpdateOperation$AbstractUpdateMessage.operateOnRegion(
* at com.gemstone.gemfire.internal.cache.DistributedCacheOperation$CacheOperationMessage.basicProcess(
* at com.gemstone.gemfire.internal.cache.DistributedCacheOperation$CacheOperationMessage.process(
* at com.gemstone.gemfire.distributed.internal.DistributionMessage.scheduleAction(
* at com.gemstone.gemfire.distributed.internal.DistributionMessage.schedule(
* at com.gemstone.gemfire.distributed.internal.DistributionManager.scheduleIncomingMessage(
* at com.gemstone.gemfire.distributed.internal.DistributionManager.handleIncomingDMsg(
* at com.gemstone.gemfire.distributed.internal.DistributionManager$MyListener.messageReceived(
* at com.gemstone.gemfire.distributed.internal.membership.jgroup.JGroupMembershipManager.processMessage(
* at com.gemstone.gemfire.distributed.internal.membership.jgroup.JGroupMembershipManager.handleOrDeferMessage(
* at com.gemstone.gemfire.distributed.internal.membership.jgroup.JGroupMembershipManager$MyDCReceiver.messageReceived(
* at
* at com.gemstone.gemfire.internal.tcp.TCPConduit.messageReceived(
* at com.gemstone.gemfire.internal.tcp.Connection.dispatchMessage(
* at com.gemstone.gemfire.internal.tcp.Connection.processNIOBuffer(
* at com.gemstone.gemfire.internal.tcp.Connection.runNioReader(
* at
* at
* </pre>
public class DiskAccessExceptionDisablesServerRegressionTest {
private static final int MAX_OPLOG_SIZE = 1000;
private static final String KEY1 = "KEY1";
private static final String KEY2 = "KEY2";
private Cache cache;
private Region<String, byte[]> region;
private MyCacheObserver observer;
public TemporaryFolder temporaryFolder = new TemporaryFolder();
public TestName testName = new TestName();
public void setUp() throws Exception {
String uniqueName = getClass().getSimpleName() + "_" + testName.getMethodName();
File temporaryDirectory = temporaryFolder.newFolder(uniqueName);
observer = new MyCacheObserver();
cache = new CacheFactory().set(LOCATORS, "").set(MCAST_PORT, "0").create();
DiskStoreFactory dsf = cache.createDiskStoreFactory();
dsf.setDiskDirsAndSizes(new File[] {temporaryDirectory}, new int[] {2000});
((DiskStoreFactoryImpl) dsf).setMaxOplogSizeInBytes(MAX_OPLOG_SIZE);
((DiskStoreFactoryImpl) dsf).setDiskDirSizesUnit((DiskDirSizesUnit.BYTES));
DiskStore diskStore = dsf.create(uniqueName);
RegionFactory<String, byte[]> regionFactory =
region = regionFactory.create(uniqueName);
public void tearDown() {
* This test does the following: <br>
* 1. Create a disk-region with following configurations:
* <ul>
* <li>dirSize = 2000 bytes
* <li>maxOplogSize = 500 bytes
* <li>rolling = true
* <li>syncMode = true
* <li>approx size on disk for operations = 440 bytes
* </ul>
* <p>
* 2.Make Roller go into WAIT state via CacheObserverAdapter.beforeGoingToCompact callback
* <p>
* 3.Put 440 bytes , it will go in oplog1
* <p>
* 4.Put another 440 bytes ,it will go in oplog1
* <p>
* 5.Put 440 bytes , switching will be caused, it will go in oplog2, Roller will remained blocked
* (step 2)
* <p>
* 6.Put 440 bytes , it will go in oplog2, oplog2 will now be full
* <p>
* 7.Notify the Roller and put 440 bytes , this will try further switching. The put will fail with
* exception due to bug 37500. The put thread takes an entry level lock for entry2 ( the one with
* KEY2) and tries to write to disk but there is no free space left, so it goes into wait,
* expecting Roller to free up the space. The roller, which has now been notified to run, tries to
* roll entry2 for which it seeks entry level lock which has been acquired by put-thread. So the
* put thread eventually comes out of the wait with DiskAccessException
* <p>
* Another scenario for this bug is, once the disk space was getting exhausted , the entry
* operation threads which had already taken a lock on Entry got stuck trying to seek the Oplog
* Lock. The switching thread had acquired the Oplog.lock & was waiting for the roller thread to
* free disk space. Since the roller needed to acquire Entry lock to roll, it was unable to do so
* because of entry operation threads. This would cause the entry operation threads to get
* DiskAccessException after completing the stipulated wait. The Roller was able to free space
* only when it has rolled all the relevant entries which could happen only when the entry
* operation threads released the entry lock after getting DiskAccessException.
public void testBug37500() throws Exception {
// put 440 bytes , it will go in oplog1
region.put(KEY1, new byte[420]);
// put another 440 bytes ,it will go in oplog1
region.put(KEY2, new byte[420]);
// put 440 bytes , switching will be caused, it will go in oplog2 (value
// size increased to 432 as key wont be written to disk for UPDATE)
region.put(KEY1, new byte[432]);
// put 440 bytes , it will go in oplog2
region.put(KEY1, new byte[432]);
// put 440 bytes , this will try further switching
region.put(KEY2, new byte[432]);
private static class MyCacheObserver extends CacheObserverAdapter {
private final Object notification = new Object();
* Flag to decide whether we want to allow roller to run
private volatile boolean notifyRoller = false;
private volatile boolean proceedForRolling = false;
void notifyRoller() {
notifyRoller = true;
public void beforeGoingToCompact() {
synchronized (notification) {
while (!proceedForRolling) {
try {
} catch (InterruptedException e) {
throw new RuntimeException(e);
public void beforeSwitchingOplog() {
if (notifyRoller) {
synchronized (notification) {
proceedForRolling = true;