blob: e675f63f825a7b041ce8ecfc6e9ac83ee3f87c79 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.cache.persistence;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.file.OpenOption;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.cache.CacheAtomicityMode;
import org.apache.ignite.cache.CacheRebalanceMode;
import org.apache.ignite.cache.CacheWriteSynchronizationMode;
import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.ConnectorConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.WALMode;
import org.apache.ignite.failure.AbstractFailureHandler;
import org.apache.ignite.failure.FailureContext;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.IgniteInterruptedCheckedException;
import org.apache.ignite.internal.pagemem.PageIdUtils;
import org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord;
import org.apache.ignite.internal.pagemem.wal.record.RolloverType;
import org.apache.ignite.internal.processors.cache.IgniteInternalCache;
import org.apache.ignite.internal.processors.cache.persistence.file.FileIO;
import org.apache.ignite.internal.processors.cache.persistence.file.FileIODecorator;
import org.apache.ignite.internal.processors.cache.persistence.file.FileIOFactory;
import org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager;
import org.apache.ignite.internal.processors.cache.persistence.file.RandomAccessFileIOFactory;
import org.apache.ignite.internal.processors.cache.persistence.metastorage.MetaStorage;
import org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryEx;
import org.apache.ignite.internal.processors.cache.persistence.tree.io.PageIO;
import org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIO;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;
import static org.apache.ignite.IgniteSystemProperties.IGNITE_PDS_SKIP_CRC;
import static org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager.DFLT_STORE_DIR;
import static org.apache.ignite.internal.processors.cache.persistence.metastorage.MetaStorage.METASTORAGE_CACHE_ID;
/**
*
*/
public class IgnitePdsCorruptedStoreTest extends GridCommonAbstractTest {
/** */
private static final String CACHE_NAME1 = "cache1";
/** */
private static final String CACHE_NAME2 = "cache2";
/** Failure handler. */
private DummyFailureHandler failureHnd;
/** Failing FileIO factory. */
private FailingFileIOFactory failingFileIOFactory;
/** {@inheritDoc} */
@Override protected void beforeTest() throws Exception {
cleanPersistenceDir();
super.beforeTest();
}
/** {@inheritDoc} */
@Override protected void afterTest() throws Exception {
super.afterTest();
stopAllGrids();
cleanPersistenceDir();
}
/** {@inheritDoc} */
@Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
cfg.setConnectorConfiguration(new ConnectorConfiguration());
cfg.setConsistentId(igniteInstanceName);
failingFileIOFactory = new FailingFileIOFactory();
DataStorageConfiguration memCfg = new DataStorageConfiguration()
.setDefaultDataRegionConfiguration(
new DataRegionConfiguration()
.setMaxSize(100 * 1024 * 1024)
.setPersistenceEnabled(true)
)
.setWalMode(WALMode.FSYNC)
.setFileIOFactory(failingFileIOFactory);
cfg.setDataStorageConfiguration(memCfg);
cfg.setCacheConfiguration(cacheConfiguration(CACHE_NAME1), cacheConfiguration(CACHE_NAME2));
failureHnd = new DummyFailureHandler();
cfg.setFailureHandler(failureHnd);
return cfg;
}
/**
* @return File or folder in work directory.
* @throws IgniteCheckedException If failed to resolve file name.
*/
private File file(String file) throws IgniteCheckedException {
return U.resolveWorkDirectory(U.defaultWorkDirectory(), file, false);
}
/**
* Create cache configuration.
*
* @param name Cache name.
*/
private CacheConfiguration cacheConfiguration(String name) {
CacheConfiguration ccfg = new CacheConfiguration();
ccfg.setName(name);
ccfg.setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL);
ccfg.setRebalanceMode(CacheRebalanceMode.SYNC);
ccfg.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC);
ccfg.setAffinity(new RendezvousAffinityFunction(false, 32));
ccfg.setBackups(2);
return ccfg;
}
/**
* @throws Exception If test failed.
*/
@Test
public void testNodeInvalidatedWhenPersistenceIsCorrupted() throws Exception {
Ignite ignite = startGrid(0);
startGrid(1);
ignite.cluster().active(true);
awaitPartitionMapExchange();
IgniteCache<Integer, String> cache1 = ignite.cache(CACHE_NAME1);
for (int i = 0; i < 100; ++i)
cache1.put(i, String.valueOf(i));
forceCheckpoint();
cache1.put(2, "test");
String nodeName = ignite.name().replaceAll("\\.", "_");
stopAllGrids();
U.delete(file(String.format("db/%s/cache-%s/part-2.bin", nodeName, CACHE_NAME1)));
startGrid(1);
try {
startGrid(0);
}
catch (IgniteCheckedException ex) {
if (X.hasCause(ex, StorageException.class, IOException.class))
return; // Success;
throw ex;
}
waitFailure(StorageException.class);
}
/**
* Test node invalidation when page CRC is wrong and page not found in wal.
*
* @throws Exception In case of fail
*/
@Test
public void testWrongPageCRC() throws Exception {
System.setProperty(IGNITE_PDS_SKIP_CRC, "true");
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
ignite.cluster().active(false);
stopGrid(0);
System.setProperty(IGNITE_PDS_SKIP_CRC, "false");
File dbDir = U.resolveWorkDirectory(U.defaultWorkDirectory(), DFLT_STORE_DIR, false);
File walDir = new File(dbDir, "wal");
U.delete(walDir);
try {
startGrid(0);
ignite.cluster().active(true);
}
catch (Exception e) {
// No-op.
}
waitFailure(StorageException.class);
}
/**
* Test node invalidation when meta storage is corrupted.
*/
@Test
public void testMetaStorageCorruption() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
MetaStorage metaStorage = ignite.context().cache().context().database().metaStorage();
corruptTreeRoot(ignite, (PageMemoryEx)metaStorage.pageMemory(), METASTORAGE_CACHE_ID,
MetaStorage.METASTORE_PARTITION);
stopGrid(0);
try {
startGrid(0);
ignite.cluster().active(true);
}
catch (Exception e) {
// No-op.
}
waitFailure(StorageException.class);
}
/**
* Test node invalidation when cache meta is corrupted.
*/
@Test
public void testCacheMetaCorruption() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
IgniteInternalCache cache = ignite.cachex(CACHE_NAME1);
cache.put(1, 1);
int partId = cache.affinity().partition(1);
int grpId = cache.context().group().groupId();
corruptTreeRoot(ignite, (PageMemoryEx)cache.context().dataRegion().pageMemory(), grpId, partId);
ignite.cluster().active(false);
stopGrid(0);
try {
startGrid(0);
ignite.cluster().active(true);
cache.put(1, 1);
}
catch (Exception e) {
// No-op.
}
waitFailure(StorageException.class);
}
/**
* @param ignite Ignite.
* @param grpId Group id.
* @param partId Partition id.
*/
private void corruptTreeRoot(IgniteEx ignite, PageMemoryEx pageMem, int grpId, int partId)
throws IgniteCheckedException {
ignite.context().cache().context().database().checkpointReadLock();
try {
long partMetaId = pageMem.partitionMetaPageId(grpId, partId);
long partMetaPage = pageMem.acquirePage(grpId, partMetaId);
try {
long pageAddr = pageMem.writeLock(grpId, partMetaId, partMetaPage);
try {
PagePartitionMetaIO io = PageIO.getPageIO(pageAddr);
// Corrupt tree root
io.setTreeRoot(pageAddr, PageIdUtils.pageId(0, (byte)0, 0));
}
catch (Exception e) {
fail("Failed to change page: " + e.getMessage());
}
finally {
pageMem.writeUnlock(grpId, partMetaId, partMetaPage, null, true);
}
}
finally {
pageMem.releasePage(grpId, partMetaId, partMetaPage);
}
}
finally {
ignite.context().cache().context().database().checkpointReadUnlock();
}
}
/**
* Test node invalidation when meta store is read only.
*/
@Test
public void testReadOnlyMetaStore() throws Exception {
IgniteEx ignite0 = startGrid(0);
AtomicReference<File> readOnlyFile = new AtomicReference<>();
failingFileIOFactory.createClosure((file, options) -> {
if (Arrays.asList(options).contains(StandardOpenOption.WRITE) && file.equals(readOnlyFile.get()))
throw new IOException("File is readonly.");
return null;
});
ignite0.cluster().active(true);
IgniteInternalCache<Integer, Integer> cache = ignite0.cachex(CACHE_NAME1);
cache.put(1, 1);
ignite0.cluster().active(false);
FilePageStoreManager storeMgr = ((FilePageStoreManager)ignite0.context().cache().context().pageStore());
File workDir = storeMgr.workDir();
File metaStoreDir = new File(workDir, MetaStorage.METASTORAGE_CACHE_NAME.toLowerCase());
File metaStoreFile = new File(metaStoreDir, String.format(FilePageStoreManager.PART_FILE_TEMPLATE, 0));
readOnlyFile.set(metaStoreFile);
IgniteInternalFuture fut = GridTestUtils.runAsync(new Runnable() {
@Override public void run() {
try {
ignite0.cluster().active(true);
}
catch (Exception ignore) {
// No-op.
}
}
});
waitFailure(IOException.class);
fut.cancel();
}
/**
* Test node invalidation due to checkpoint error.
*/
@Test
public void testCheckpointFailure() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
forceCheckpoint(); // Trigger empty checkpoint to make sure initial checkpoint on node start will finish.
ignite.cache(CACHE_NAME1).put(0, 0); // Mark some pages as dirty.
AtomicBoolean fail = new AtomicBoolean(true);
AtomicReference<FileIO> ref = new AtomicReference<>();
failingFileIOFactory.createClosure(new IgniteBiClosureX<File, OpenOption[], FileIO>() {
@Override public FileIO apply(File file, OpenOption[] options) {
if (file.getName().contains("-END.bin")) {
FileIO delegate;
try {
delegate = failingFileIOFactory.delegateFactory().create(file, options);
}
catch (IOException ignore) {
return null;
}
FileIODecorator dec = new FileIODecorator(delegate) {
@Override public void close() throws IOException {
if (fail.get())
throw new IOException("Checkpoint failed");
else
super.close();
}
};
ref.set(dec);
return dec;
}
return null;
}
});
try {
try {
forceCheckpoint(ignite);
}
catch (Exception ignore) {
// No-op.
}
waitFailure(IOException.class);
}
finally {
fail.set(false);
ref.get().close(); // Release file for any test outcome.
}
}
/**
* Test node invalidation due to error on WAL write header.
*/
@Test
public void testWalFsyncWriteHeaderFailure() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
ignite.cache(CACHE_NAME1).put(0, 0);
failingFileIOFactory.createClosure((file, options) -> {
FileIO delegate = failingFileIOFactory.delegateFactory().create(file, options);
if (file.getName().endsWith(".wal")) {
return new FileIODecorator(delegate) {
@Override public int write(ByteBuffer srcBuf) throws IOException {
throw new IOException("No space left on device");
}
};
}
return delegate;
});
ignite.context().cache().context().database().checkpointReadLock();
try {
ignite.context().cache().context().wal().log(new CheckpointRecord(null), RolloverType.NEXT_SEGMENT);
}
catch (StorageException expected) {
// No-op.
}
finally {
ignite.context().cache().context().database().checkpointReadUnlock();
}
waitFailure(StorageException.class);
}
/**
* @param expError Expected error.
*/
private void waitFailure(Class<? extends Throwable> expError) throws IgniteInterruptedCheckedException {
assertTrue(GridTestUtils.waitForCondition(() -> failureHnd.failure(), 5_000L));
assertTrue(X.hasCause(failureHnd.error(), expError));
}
/**
* Dummy failure handler
*/
public static class DummyFailureHandler extends AbstractFailureHandler {
/** Failure. */
private volatile boolean failure = false;
/** Error. */
private volatile Throwable error = null;
/**
* @return failure.
*/
public boolean failure() {
return failure;
}
/**
* @return Error.
*/
public Throwable error() {
return error;
}
/** {@inheritDoc} */
@Override protected boolean handle(Ignite ignite, FailureContext failureCtx) {
failure = true;
error = failureCtx.error();
return true;
}
}
/**
* Create File I/O which can fail according to implemented closure.
*/
private static class FailingFileIOFactory implements FileIOFactory {
/** Delegate factory. */
private final FileIOFactory delegateFactory = new RandomAccessFileIOFactory();
/** Create FileIO closure. */
private volatile IgniteBiClosureX<File, OpenOption[], FileIO> createClo;
/** {@inheritDoc} */
@Override public FileIO create(File file, OpenOption... openOption) throws IOException {
FileIO fileIO = null;
if (createClo != null)
fileIO = createClo.apply(file, openOption);
return fileIO != null ? fileIO : delegateFactory.create(file, openOption);
}
/**
* @param createClo FileIO create closure.
*/
public void createClosure(IgniteBiClosureX<File, OpenOption[], FileIO> createClo) {
this.createClo = createClo;
}
/**
*
*/
public FileIOFactory delegateFactory() {
return delegateFactory;
}
}
/** */
private interface IgniteBiClosureX<E1, E2, R> extends Serializable {
/** */
R apply(E1 e1, E2 e2) throws IOException;
}
}