Adds Static "named" constructors and updates GC (#3805)
* Adds static named constructors to ensure that inuse scan candidates are not removed.
* Fix possible race condition with InUse Candidates
This change writes the gcCandidates twice when performing a major
compaction to ensure that valid candidates were not removed before the
tablet mutation had completed.
Fixes: #3802
* Refactored test method name
Renamed `assertRemoved` to `assertFileRemoved` to convey that the
candidate is now an hdfs file reference that has been deleted by the GC
diff --git a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java b/core/src/main/java/org/apache/accumulo/core/gc/Reference.java
index cdffbd7..4c67bfd 100644
--- a/core/src/main/java/org/apache/accumulo/core/gc/Reference.java
+++ b/core/src/main/java/org/apache/accumulo/core/gc/Reference.java
@@ -32,6 +32,11 @@
boolean isDirectory();
/**
+ * Only return true if the reference is a scan.
+ */
+ boolean isScan();
+
+ /**
* Get the {@link TableId} of the reference.
*/
TableId getTableId();
@@ -42,6 +47,8 @@
* {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily}
* A directory will be read from the "srv:dir" column family:
* {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily}
+ * A scan will be read from the Tablet "scan" column family:
+ * {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily}
*/
String getMetadataEntry();
}
diff --git a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java
index b9a6589..5491020 100644
--- a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java
+++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceDirectory.java
@@ -28,7 +28,7 @@
private final String tabletDir; // t-0003
public ReferenceDirectory(TableId tableId, String dirName) {
- super(tableId, dirName);
+ super(tableId, dirName, false);
MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(dirName);
this.tabletDir = dirName;
}
diff --git a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java
index 7f796e8..b9eece9 100644
--- a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java
@@ -29,13 +29,23 @@
public class ReferenceFile implements Reference, Comparable<ReferenceFile> {
// parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003"
public final TableId tableId; // 2a
+ public final boolean isScan;
// the exact string that is stored in the metadata
protected final String metadataEntry;
- public ReferenceFile(TableId tableId, String metadataEntry) {
+ protected ReferenceFile(TableId tableId, String metadataEntry, boolean isScan) {
this.tableId = Objects.requireNonNull(tableId);
this.metadataEntry = Objects.requireNonNull(metadataEntry);
+ this.isScan = isScan;
+ }
+
+ public static ReferenceFile forFile(TableId tableId, String metadataEntry) {
+ return new ReferenceFile(tableId, metadataEntry, false);
+ }
+
+ public static ReferenceFile forScan(TableId tableId, String metadataEntry) {
+ return new ReferenceFile(tableId, metadataEntry, true);
}
@Override
@@ -44,6 +54,11 @@
}
@Override
+ public boolean isScan() {
+ return isScan;
+ }
+
+ @Override
public TableId getTableId() {
return tableId;
}
diff --git a/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java b/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java
index 2dbc170..aff8dd5 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/gc/AllVolumesDirectory.java
@@ -32,7 +32,7 @@
public class AllVolumesDirectory extends ReferenceFile {
public AllVolumesDirectory(TableId tableId, String dirName) {
- super(tableId, getDeleteTabletOnAllVolumesUri(tableId, dirName));
+ super(tableId, getDeleteTabletOnAllVolumesUri(tableId, dirName), false);
}
private static String getDeleteTabletOnAllVolumesUri(TableId tableId, String dirName) {
diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java
index 65fa86b..223a9cf 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java
@@ -216,7 +216,9 @@
if (level == DataLevel.ROOT) {
if (type == GcCandidateType.INUSE) {
- // Deletion of INUSE candidates is not supported in 2.1.x.
+ // Since there is only a single root tablet, supporting INUSE candidate deletions would add
+ // additional code complexity without any substantial benefit.
+ // Therefore, deletion of root INUSE candidates is not supported.
return;
}
mutateRootGcCandidates(rgcc -> rgcc.remove(candidates.stream()));
diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java
index 6073822..5d25027 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java
@@ -180,6 +180,8 @@
TServerInstance tServerInstance, Location lastLocation, ServiceLock zooLock,
Optional<ExternalCompactionId> ecid) {
+ // Write candidates before the mutation to ensure that a process failure after a mutation would
+ // not affect candidate creation
context.getAmple().putGcCandidates(extent.tableId(), datafilesToDelete);
TabletMutator tablet = context.getAmple().mutateTablet(extent);
@@ -204,6 +206,8 @@
tablet.putZooLock(zooLock);
tablet.mutate();
+ // Write candidates again to avoid a possible race condition when removing InUse candidates
+ context.getAmple().putGcCandidates(extent.tableId(), datafilesToDelete);
}
/**
diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
index 34d0381..60b16e5 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
@@ -332,8 +332,8 @@
if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) {
StoredTabletFile stf = new StoredTabletFile(key.getColumnQualifierData().toString());
- bw.addMutation(
- ample.createDeleteMutation(new ReferenceFile(tableId, stf.getMetaUpdateDelete())));
+ bw.addMutation(ample
+ .createDeleteMutation(ReferenceFile.forFile(tableId, stf.getMetaUpdateDelete())));
}
if (ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) {
diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
index 6a1b9e5..c03d449 100644
--- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
+++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
@@ -189,36 +189,40 @@
// there is a lot going on in this "one line" so see below for more info
var tabletReferences = tabletStream.flatMap(tm -> {
+ var tableId = tm.getTableId();
// verify that dir and prev row entries present for to check for complete row scan
- log.trace("tablet metadata table id: {}, end row:{}, dir:{}, saw: {}, prev row: {}",
- tm.getTableId(), tm.getEndRow(), tm.getDirName(), tm.sawPrevEndRow(), tm.getPrevEndRow());
+ log.trace("tablet metadata table id: {}, end row:{}, dir:{}, saw: {}, prev row: {}", tableId,
+ tm.getEndRow(), tm.getDirName(), tm.sawPrevEndRow(), tm.getPrevEndRow());
if (tm.getDirName() == null || tm.getDirName().isEmpty() || !tm.sawPrevEndRow()) {
- throw new IllegalStateException("possible incomplete metadata scan for table id: "
- + tm.getTableId() + ", end row: " + tm.getEndRow() + ", dir: " + tm.getDirName()
- + ", saw prev row: " + tm.sawPrevEndRow());
+ throw new IllegalStateException("possible incomplete metadata scan for table id: " + tableId
+ + ", end row: " + tm.getEndRow() + ", dir: " + tm.getDirName() + ", saw prev row: "
+ + tm.sawPrevEndRow());
}
// combine all the entries read from file and scan columns in the metadata table
- Stream<StoredTabletFile> fileStream = tm.getFiles().stream();
+ Stream<StoredTabletFile> stfStream = tm.getFiles().stream();
+ // map the files to Reference objects
+ var fileStream = stfStream.map(f -> ReferenceFile.forFile(tableId, f.getMetaUpdateDelete()));
+
// scans are normally empty, so only introduce a layer of indirection when needed
final var tmScans = tm.getScans();
if (!tmScans.isEmpty()) {
- fileStream = Stream.concat(fileStream, tmScans.stream());
+ var scanStream =
+ tmScans.stream().map(s -> ReferenceFile.forScan(tableId, s.getMetaUpdateDelete()));
+ fileStream = Stream.concat(fileStream, scanStream);
}
- // map the files to Reference objects
- var stream = fileStream.map(f -> new ReferenceFile(tm.getTableId(), f.getMetaUpdateDelete()));
- // if dirName is populated then we have a tablet directory aka srv:dir
+ // if dirName is populated, then we have a tablet directory aka srv:dir
if (tm.getDirName() != null) {
// add the tablet directory to the stream
- var tabletDir = new ReferenceDirectory(tm.getTableId(), tm.getDirName());
- stream = Stream.concat(stream, Stream.of(tabletDir));
+ var tabletDir = new ReferenceDirectory(tableId, tm.getDirName());
+ fileStream = Stream.concat(fileStream, Stream.of(tabletDir));
}
- return stream;
+ return fileStream;
});
var scanServerRefs = context.getAmple().getScanServerFileReferences()
- .map(sfr -> new ReferenceFile(sfr.getTableId(), sfr.getPathStr()));
+ .map(sfr -> ReferenceFile.forScan(sfr.getTableId(), sfr.getPathStr()));
return Stream.concat(tabletReferences, scanServerRefs);
}
diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
index cc77197..6800b9a 100644
--- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
+++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
@@ -144,7 +144,7 @@
private void removeCandidatesInUse(GarbageCollectionEnvironment gce,
SortedMap<String,GcCandidate> candidateMap) throws InterruptedException {
- List<GcCandidate> inUseCandidates = new ArrayList<>();
+ List<GcCandidate> candidateEntriesToBeDeleted = new ArrayList<>();
Set<TableId> tableIdsBefore = gce.getCandidateTableIDs();
Set<TableId> tableIdsSeen = new HashSet<>();
Iterator<Reference> iter = gce.getReferences().iterator();
@@ -163,8 +163,7 @@
GcCandidate gcTemp = candidateMap.remove(dir);
if (gcTemp != null) {
log.debug("Directory Candidate was still in use by dir ref: {}", dir);
- // Intentionally not adding dir candidates to inUseCandidates as they are only added once.
- // If dir candidates are deleted, due to being in use, nothing will add them again.
+ // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created once.
}
} else {
String reference = ref.getMetadataEntry();
@@ -183,15 +182,18 @@
GcCandidate gcTemp = candidateMap.remove(relativePath);
if (gcTemp != null) {
log.debug("File Candidate was still in use: {}", relativePath);
- inUseCandidates.add(gcTemp);
+ // Prevent deletion of candidates that are still in use by scans, because they won't be
+ // recreated once the scan is finished.
+ if (!ref.isScan()) {
+ candidateEntriesToBeDeleted.add(gcTemp);
+ }
}
String dir = relativePath.substring(0, relativePath.lastIndexOf('/'));
GcCandidate gcT = candidateMap.remove(dir);
if (gcT != null) {
log.debug("Directory Candidate was still in use by file ref: {}", relativePath);
- // Intentionally not adding dir candidates to inUseCandidates as they are only added once.
- // If dir candidates are deleted, due to being in use, nothing will add them again.
+ // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created once.
}
}
}
@@ -199,7 +201,7 @@
ensureAllTablesChecked(Collections.unmodifiableSet(tableIdsBefore),
Collections.unmodifiableSet(tableIdsSeen), Collections.unmodifiableSet(tableIdsAfter));
if (gce.canRemoveInUseCandidates()) {
- gce.deleteGcCandidates(inUseCandidates, GcCandidateType.INUSE);
+ gce.deleteGcCandidates(candidateEntriesToBeDeleted, GcCandidateType.INUSE);
}
}
diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
index 0536bda..8e9a2d1 100644
--- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
+++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
@@ -60,7 +60,7 @@
Map<String,Reference> references = new TreeMap<>();
HashSet<TableId> tableIds = new HashSet<>();
- ArrayList<GcCandidate> deletes = new ArrayList<>();
+ ArrayList<GcCandidate> fileDeletions = new ArrayList<>();
ArrayList<TableId> tablesDirsToDelete = new ArrayList<>();
TreeMap<String,Status> filesToReplicate = new TreeMap<>();
boolean deleteInUseRefs = false;
@@ -121,6 +121,9 @@
public void deleteGcCandidates(Collection<GcCandidate> refCandidates, GcCandidateType type) {
// Mimic ServerAmpleImpl behavior for root InUse Candidates
if (type.equals(GcCandidateType.INUSE) && this.level.equals(Ample.DataLevel.ROOT)) {
+ // Since there is only a single root tablet, supporting INUSE candidate deletions would add
+ // additional code complexity without any substantial benefit.
+ // Therefore, deletion of root INUSE candidates is not supported.
return;
}
refCandidates.forEach(gcCandidate -> deletedCandidates.put(gcCandidate, type));
@@ -136,7 +139,7 @@
@Override
public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> candidateMap) {
- deletes.addAll(candidateMap.values());
+ fileDeletions.addAll(candidateMap.values());
this.candidates.removeAll(candidateMap.values());
}
@@ -147,7 +150,7 @@
public void addFileReference(String tableId, String endRow, String file) {
TableId tid = TableId.of(tableId);
- references.put(tableId + ":" + endRow + ":" + file, new ReferenceFile(tid, file));
+ references.put(tableId + ":" + endRow + ":" + file, ReferenceFile.forFile(tid, file));
tableIds.add(tid);
}
@@ -167,6 +170,17 @@
removeLastTableIdRef(TableId.of(tableId));
}
+ public void addScanReference(String tableId, String endRow, String scan) {
+ TableId tid = TableId.of(tableId);
+ references.put(tableId + ":" + endRow + ":scan:" + scan, ReferenceFile.forScan(tid, scan));
+ tableIds.add(tid);
+ }
+
+ public void removeScanReference(String tableId, String endRow, String scan) {
+ references.remove(tableId + ":" + endRow + ":scan:" + scan);
+ removeLastTableIdRef(TableId.of(tableId));
+ }
+
/*
* this is to be called from removeDirReference or removeFileReference.
*
@@ -216,12 +230,12 @@
}
}
- private void assertRemoved(TestGCE gce, GcCandidate... candidates) {
+ private void assertFileDeleted(TestGCE gce, GcCandidate... candidates) {
for (GcCandidate candidate : candidates) {
- assertTrue(gce.deletes.remove(candidate));
+ assertTrue(gce.fileDeletions.remove(candidate));
}
- assertEquals(0, gce.deletes.size(), "Deletes not empty: " + gce.deletes);
+ assertEquals(0, gce.fileDeletions.size(), "Deletes not empty: " + gce.fileDeletions);
}
private void assertNoCandidatesRemoved(TestGCE gce) {
@@ -257,7 +271,7 @@
GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
gca.collect(gce);
- assertRemoved(gce, candidate);
+ assertFileDeleted(gce, candidate);
}
@Test
@@ -276,29 +290,29 @@
GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// Remove the reference to this flush file, run the GC which should not trim it from the
// candidates, and assert that it's gone
gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
gca.collect(gce);
- assertRemoved(gce, candOne);
+ assertFileDeleted(gce, candOne);
// Removing a reference to a file that wasn't in the candidates should do nothing
gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// Remove the reference to a file in the candidates should cause it to be removed
gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf");
gca.collect(gce);
- assertRemoved(gce, candTwo);
+ assertFileDeleted(gce, candTwo);
// Adding more candidates which do not have references should be removed
var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
gca.collect(gce);
- assertRemoved(gce, candThree, candFour);
+ assertFileDeleted(gce, candThree, candFour);
}
@@ -353,29 +367,29 @@
GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
gca.collect(gce);
- assertRemoved(gce, toBeRemoved);
+ assertFileDeleted(gce, toBeRemoved);
// Remove the reference to this flush file, run the GC which should not trim it from the
// candidates, and assert that it's gone
gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
gca.collect(gce);
- assertRemoved(gce, candOne);
+ assertFileDeleted(gce, candOne);
// Removing a reference to a file that wasn't in the candidates should do nothing
gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// Remove the reference to a file in the candidates should cause it to be removed
gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf");
gca.collect(gce);
- assertRemoved(gce, candTwo);
+ assertFileDeleted(gce, candTwo);
// Adding more candidates which do no have references should be removed
var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
gca.collect(gce);
- assertRemoved(gce, candThree, candFour);
+ assertFileDeleted(gce, candThree, candFour);
}
/**
@@ -397,7 +411,7 @@
GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
gca.collect(gce);
- assertRemoved(gce, candidate);
+ assertFileDeleted(gce, candidate);
}
@Test
@@ -418,7 +432,7 @@
// All candidates currently have references
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
List<String[]> refsToRemove = new ArrayList<>();
refsToRemove.add(new String[] {"4", "/t0/F000.rf"});
@@ -430,28 +444,28 @@
for (int i = 0; i < 2; i++) {
gce.removeFileReference(refsToRemove.get(i)[0], null, refsToRemove.get(i)[1]);
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
}
gce.removeFileReference(refsToRemove.get(2)[0], null, refsToRemove.get(2)[1]);
gca.collect(gce);
- assertRemoved(gce, candOne);
+ assertFileDeleted(gce, candOne);
gce.removeFileReference("4", null, "/t0/F001.rf");
gca.collect(gce);
- assertRemoved(gce, candThree);
+ assertFileDeleted(gce, candThree);
// add absolute candidate for file that already has a relative candidate
var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
gce.removeFileReference("4", null, "/t0/F002.rf");
gca.collect(gce);
- assertRemoved(gce, candFour);
+ assertFileDeleted(gce, candFour);
gca.collect(gce);
- assertRemoved(gce, candTwo);
+ assertFileDeleted(gce, candTwo);
}
@Test
@@ -472,7 +486,7 @@
// Nothing should be removed because all candidates exist within a blip
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// Remove the first blip
gce.blips.remove("/4/b-0");
@@ -480,18 +494,18 @@
// And we should lose all files in that blip and the blip directory itself -- relative and
// absolute
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("/4/b-0", 0L), new GcCandidate("/4/b-0/F002.rf", 1L),
+ assertFileDeleted(gce, new GcCandidate("/4/b-0", 0L), new GcCandidate("/4/b-0/F002.rf", 1L),
new GcCandidate("hdfs://foo.com:6000/accumulo/tables/4/b-0/F001.rf", 2L));
gce.blips.remove("hdfs://foo.com:6000/accumulo/tables/5/b-0");
// Same as above, we should lose relative and absolute for a relative or absolute blip
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("/5/b-0", 3L), new GcCandidate("/5/b-0/F002.rf", 4L),
+ assertFileDeleted(gce, new GcCandidate("/5/b-0", 3L), new GcCandidate("/5/b-0/F002.rf", 4L),
new GcCandidate("hdfs://foo.com:6000/accumulo/tables/5/b-0/F001.rf", 5L));
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
}
@Test
@@ -528,21 +542,21 @@
// A directory reference does not preclude a candidate file beneath that directory from deletion
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("/4/t-0/F002.rf", 1L));
+ assertFileDeleted(gce, new GcCandidate("/4/t-0/F002.rf", 1L));
// Removing the dir reference for a table will delete all tablet directories
gce.removeDirReference("5", null);
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("hdfs://foo.com:6000/accumulo/tables/5/t-0", 2L));
+ assertFileDeleted(gce, new GcCandidate("hdfs://foo.com:6000/accumulo/tables/5/t-0", 2L));
gce.removeDirReference("4", null);
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("/4/t-0", 0L));
+ assertFileDeleted(gce, new GcCandidate("/4/t-0", 0L));
gce.removeDirReference("6", null);
gce.removeDirReference("7", null);
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("/6/t-0", 3L),
+ assertFileDeleted(gce, new GcCandidate("/6/t-0", 3L),
new GcCandidate("hdfs://foo:6000/accumulo/tables/7/t-0/", 4L));
gce.removeFileReference("8", "m", "/t-0/F00.rf");
@@ -552,13 +566,13 @@
gce.removeFileReference("e", "m", "../c/t-0/F00.rf");
gce.removeFileReference("f", "m", "../d/t-0/F00.rf");
gca.collect(gce);
- assertRemoved(gce, new GcCandidate("/8/t-0", 5L),
+ assertFileDeleted(gce, new GcCandidate("/8/t-0", 5L),
new GcCandidate("hdfs://foo:6000/accumulo/tables/9/t-0", 6L), new GcCandidate("/a/t-0", 7L),
new GcCandidate("hdfs://foo:6000/accumulo/tables/b/t-0", 8L), new GcCandidate("/c/t-0", 9L),
new GcCandidate("hdfs://foo:6000/accumulo/tables/d/t-0", 10L));
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
}
@Test
@@ -596,23 +610,23 @@
// A directory reference does not preclude a candidate file beneath that directory from deletion
gca.collect(gce);
- assertRemoved(gce, candidates.get(2));
+ assertFileDeleted(gce, candidates.get(2));
// Removing the dir reference for a table will delete all tablet directories
gce.removeDirReference("5", null);
// but we need to add a file ref
gce.addFileReference("8", "m", "/t-0/F00.rf");
gca.collect(gce);
- assertRemoved(gce, candidates.get(3));
+ assertFileDeleted(gce, candidates.get(3));
gce.removeDirReference("4", null);
gca.collect(gce);
- assertRemoved(gce, candidates.get(1));
+ assertFileDeleted(gce, candidates.get(1));
gce.removeDirReference("6", null);
gce.removeDirReference("7", null);
gca.collect(gce);
- assertRemoved(gce, candidates.get(4), candidates.get(5));
+ assertFileDeleted(gce, candidates.get(4), candidates.get(5));
gce.removeFileReference("8", "m", "/t-0/F00.rf");
gce.removeFileReference("9", "m", "/t-0/F00.rf");
@@ -621,11 +635,11 @@
gce.removeFileReference("e", "m", "../c/t-0/F00.rf");
gce.removeFileReference("f", "m", "../d/t-0/F00.rf");
gca.collect(gce);
- assertRemoved(gce, candidates.get(6), candidates.get(7), candidates.get(8), candidates.get(9),
- candidates.get(10), candidates.get(11));
+ assertFileDeleted(gce, candidates.get(6), candidates.get(7), candidates.get(8),
+ candidates.get(9), candidates.get(10), candidates.get(11));
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
}
private void badRefTest(String ref) {
@@ -696,8 +710,8 @@
gce.addCandidate("hdfs://foo.com:6000/user/foo/tables/a/t-0/t-1/F00.rf");
gca.collect(gce);
- System.out.println(gce.deletes);
- assertRemoved(gce);
+ System.out.println(gce.fileDeletions);
+ assertFileDeleted(gce);
}
@Test
@@ -709,17 +723,17 @@
gce.addCandidate("/1636/default_tablet");
gce.addDirReference("1636", null, "default_tablet");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
gce.candidates.clear();
var tempCandidate = gce.addCandidate("/1636/default_tablet/someFile");
gca.collect(gce);
- assertRemoved(gce, tempCandidate);
+ assertFileDeleted(gce, tempCandidate);
gce.addFileReference("1636", null, "/default_tablet/someFile");
gce.addCandidate("/1636/default_tablet/someFile");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// have an indirect file reference
gce = new TestGCE();
@@ -728,19 +742,19 @@
gce.addDirReference("1636", null, "default_tablet");
gce.addCandidate("/9/default_tablet/someFile");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// have an indirect file reference and a directory candidate
gce.candidates.clear();
gce.addCandidate("/9/default_tablet");
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
gce.candidates.clear();
gce.addCandidate("/9/default_tablet");
gce.addCandidate("/9/default_tablet/someFile");
long blipCount = gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
assertEquals(0, blipCount);
gce = new TestGCE();
@@ -748,7 +762,7 @@
gce.blips.add("/1636/b-0001");
gce.addCandidate("/1636/b-0001/I0000");
blipCount = gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
assertEquals(1, blipCount);
gce = new TestGCE();
@@ -762,7 +776,7 @@
gce.addCandidate("/1000/b-1002/I0007");
var candidate = gce.addCandidate("/1000/t-0003/I0008");
blipCount = gca.collect(gce);
- assertRemoved(gce, candidate);
+ assertFileDeleted(gce, candidate);
assertEquals(5, blipCount);
}
@@ -810,7 +824,7 @@
// No refs to A000002.rf, and a closed, finished repl for A000001.rf should not preclude
// it from being deleted
- assertEquals(2, gce.deletes.size());
+ assertEquals(2, gce.fileDeletions.size());
}
@Test
@@ -829,8 +843,8 @@
gca.collect(gce);
// We need to replicate that one file still, should not delete it.
- assertEquals(1, gce.deletes.size());
- assertEquals(candidate, gce.deletes.get(0));
+ assertEquals(1, gce.fileDeletions.size());
+ assertEquals(candidate, gce.fileDeletions.get(0));
}
@Test
@@ -851,8 +865,8 @@
gca.collect(gce);
// We need to replicate that one file still, should not delete it.
- assertEquals(1, gce.deletes.size());
- assertEquals(candidate, gce.deletes.get(0));
+ assertEquals(1, gce.fileDeletions.size());
+ assertEquals(candidate, gce.fileDeletions.get(0));
}
@Test
@@ -861,7 +875,7 @@
TestGCE gce = new TestGCE();
- assertEquals(0, gce.deletes.size());
+ assertEquals(0, gce.fileDeletions.size());
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/1/t-00001/A000001.rf");
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/2/t-00002/A000002.rf");
@@ -873,9 +887,9 @@
gca.collect(gce);
// We need to replicate that one file still, should not delete it.
- assertEquals(1, gce.deletes.size());
+ assertEquals(1, gce.fileDeletions.size());
assertEquals(new GcCandidate("hdfs://foo.com:6000/accumulo/tables/2/t-00002/A000002.rf", 1L),
- gce.deletes.get(0));
+ gce.fileDeletions.get(0));
}
@Test
@@ -915,13 +929,13 @@
gce.deleteInUseRefs = false;
// All candidates currently have references
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
assertNoCandidatesRemoved(gce);
// Enable InUseRefs to be removed if the file ref is found.
gce.deleteInUseRefs = true;
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
assertCandidateRemoved(gce, GcCandidateType.INUSE, candidate);
var cand1 = gce.addCandidate("/9/t0/F003.rf");
@@ -932,7 +946,7 @@
gca.collect(gce);
assertNoCandidatesRemoved(gce);
// File references did not exist, so candidates are processed
- assertRemoved(gce, cand1, cand2);
+ assertFileDeleted(gce, cand1, cand2);
}
@Test
@@ -958,14 +972,14 @@
gce.deleteInUseRefs = false;
// No InUse Candidates should be removed.
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
assertNoCandidatesRemoved(gce);
gce.deleteInUseRefs = true;
// Due to the gce Datalevel of ROOT, InUse candidate deletion is not supported regardless of
// property setting.
gca.collect(gce);
- assertRemoved(gce);
+ assertFileDeleted(gce);
assertNoCandidatesRemoved(gce);
gce.removeFileReference("+r", null, "/t0/F000.rf");
@@ -975,7 +989,7 @@
// With file references deleted, the GC should now process the candidates
gca.collect(gce);
- assertRemoved(gce, toBeRemoved);
+ assertFileDeleted(gce, toBeRemoved);
assertNoCandidatesRemoved(gce);
}
@@ -993,7 +1007,7 @@
gce.addDirReference("6", null, "t-0");
gca.collect(gce);
- assertRemoved(gce, candTwo);
+ assertFileDeleted(gce, candTwo);
assertNoCandidatesRemoved(gce);
assertEquals(1, gce.candidates.size());
@@ -1001,7 +1015,7 @@
gce.removeDirReference("6", null);
gca.collect(gce);
- assertRemoved(gce, candOne);
+ assertFileDeleted(gce, candOne);
assertNoCandidatesRemoved(gce);
assertEquals(0, gce.candidates.size());
@@ -1019,12 +1033,38 @@
gca.collect(gce);
assertCandidateRemoved(gce, GcCandidateType.INUSE, removedCandidate);
- assertRemoved(gce);
+ assertFileDeleted(gce);
// Check and make sure the InUse directory candidates are not removed.
assertEquals(1, gce.candidates.size());
assertTrue(gce.candidates.contains(candidate));
}
+ @Test
+ public void testInUseScanReferenceCandidates() throws Exception {
+ TestGCE gce = new TestGCE();
+
+ // InUse Scan Refs should not be removed.
+ var scanCandidate = gce.addCandidate("/4/t0/F010.rf");
+ var candOne = gce.addCandidate("/4/t0/F000.rf");
+ var candTwo = gce.addCandidate("/6/t0/F123.rf");
+ gce.addScanReference("4", null, "/t0/F010.rf");
+ gce.addFileReference("4", null, "/t0/F000.rf");
+
+ GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+ gce.deleteInUseRefs = true;
+
+ gca.collect(gce);
+ assertFileDeleted(gce, candTwo);
+ assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne);
+ assertEquals(Set.of(scanCandidate), gce.candidates);
+
+ gce.removeScanReference("4", null, "/t0/F010.rf");
+ gca.collect(gce);
+ assertFileDeleted(gce, scanCandidate);
+ assertNoCandidatesRemoved(gce);
+ assertEquals(0, gce.candidates.size());
+ }
+
// below are tests for potential failure conditions of the GC process. Some of these cases were
// observed on clusters. Some were hypothesis based on observations. The result was that
// candidate entries were not removed when they should have been and therefore files were
diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 482bbd2..b9914be 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@ -703,7 +703,7 @@
Key key = entry.getKey();
if (key.compareColumnFamily(DataFileColumnFamily.NAME) == 0) {
var stf = new StoredTabletFile(key.getColumnQualifierData().toString());
- datafilesAndDirs.add(new ReferenceFile(stf.getTableId(), stf.getMetaUpdateDelete()));
+ datafilesAndDirs.add(ReferenceFile.forFile(stf.getTableId(), stf.getMetaUpdateDelete()));
if (datafilesAndDirs.size() > 1000) {
ample.putGcFileAndDirCandidates(extent.tableId(), datafilesAndDirs);
datafilesAndDirs.clear();
diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java
index 7cf276d..1ed199a 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer1/CleanUpBulkImport.java
@@ -62,7 +62,7 @@
ample.removeBulkLoadInProgressFlag(
"/" + bulkDir.getParent().getName() + "/" + bulkDir.getName());
ample.putGcFileAndDirCandidates(tableId,
- Collections.singleton(new ReferenceFile(tableId, bulkDir.toString())));
+ Collections.singleton(ReferenceFile.forFile(tableId, bulkDir.toString())));
log.debug("removing the metadata table markers for loaded files");
ample.removeBulkLoadEntries(tableId, tid, null, null);
log.debug("releasing HDFS reservations for " + source + " and " + error);
diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java
index f681055..12bbacf 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/CleanUpBulkImport.java
@@ -59,7 +59,7 @@
ample.removeBulkLoadInProgressFlag(
"/" + bulkDir.getParent().getName() + "/" + bulkDir.getName());
ample.putGcFileAndDirCandidates(info.tableId,
- Collections.singleton(new ReferenceFile(info.tableId, bulkDir.toString())));
+ Collections.singleton(ReferenceFile.forFile(info.tableId, bulkDir.toString())));
if (info.tableState == TableState.ONLINE) {
Text firstSplit = info.firstSplit == null ? null : new Text(info.firstSplit);
diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java
index 54c3775..6bf9e32 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader9to10.java
@@ -601,7 +601,7 @@
var tableId = TableId.of(pathNoVolume.getParent().getName());
// except bulk directories don't get an all volume prefix
if (pathNoVolume.getName().startsWith(Constants.BULK_PREFIX)) {
- return new ReferenceFile(tableId, olddelete.toString());
+ return ReferenceFile.forFile(tableId, olddelete.toString());
} else {
return new AllVolumesDirectory(tableId, tabletDir);
}
@@ -610,7 +610,7 @@
if (pathNoVolume.depth() == 4) {
Path tabletDirPath = pathNoVolume.getParent();
var tableId = TableId.of(tabletDirPath.getParent().getName());
- return new ReferenceFile(tableId, olddelete.toString());
+ return ReferenceFile.forFile(tableId, olddelete.toString());
} else {
throw new IllegalStateException("Invalid delete marker: " + olddelete);
}
diff --git a/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java b/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java
index a1ad441..290208b 100644
--- a/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java
+++ b/server/manager/src/test/java/org/apache/accumulo/manager/upgrade/Upgrader9to10Test.java
@@ -85,13 +85,13 @@
resolved = Upgrader9to10.resolveRelativeDelete("/5a/" + BULK_PREFIX + "0005", VOL_PROP);
assertEquals(new Path(VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005"), resolved);
- ref1 = new ReferenceFile(tableId5a, VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005");
+ ref1 = ReferenceFile.forFile(tableId5a, VOL_PROP + "/tables/5a/" + BULK_PREFIX + "0005");
var ref2 = Upgrader9to10.switchToAllVolumes(resolved);
compareReferences(ref1, ref2);
resolved = Upgrader9to10.resolveRelativeDelete("/5a/t-0005/F0009.rf", VOL_PROP);
assertEquals(new Path(VOL_PROP + "/tables/5a/t-0005/F0009.rf"), resolved);
- ref1 = new ReferenceFile(tableId5a, VOL_PROP + "/tables/5a/t-0005/F0009.rf");
+ ref1 = ReferenceFile.forFile(tableId5a, VOL_PROP + "/tables/5a/t-0005/F0009.rf");
ref2 = Upgrader9to10.switchToAllVolumes(resolved);
compareReferences(ref1, ref2);
}
@@ -123,14 +123,15 @@
resolved = Upgrader9to10.resolveRelativeDelete(
"hdfs://localhost:9000/accumulo/tables/5a/" + BULK_PREFIX + "0005", VOL_PROP);
- ref1 = new ReferenceFile(tableId5a,
+ ref1 = ReferenceFile.forFile(tableId5a,
"hdfs://localhost:9000/accumulo/tables/5a/" + BULK_PREFIX + "0005");
var ref2 = Upgrader9to10.switchToAllVolumes(resolved);
compareReferences(ref1, ref2);
resolved = Upgrader9to10.resolveRelativeDelete(
"hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf", VOL_PROP);
- ref1 = new ReferenceFile(tableId5a, "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf");
+ ref1 = ReferenceFile.forFile(tableId5a,
+ "hdfs://localhost:9000/accumulo/tables/5a/t-0005/C0009.rf");
ref2 = Upgrader9to10.switchToAllVolumes(resolved);
compareReferences(ref1, ref2);
}
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java b/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java
index eb1cafe..c35238d 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/GarbageCollectorIT.java
@@ -452,7 +452,7 @@
String longpath = "aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee"
+ "ffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjj";
var path = String.format("file:/%020d/%s", i, longpath);
- Mutation delFlag = ample.createDeleteMutation(new ReferenceFile(TableId.of("1"), path));
+ Mutation delFlag = ample.createDeleteMutation(ReferenceFile.forFile(TableId.of("1"), path));
bw.addMutation(delFlag);
}
}