Fix issue in TGW when shutting down all tservers (#3866)
Fixes an issue where the root table was being
unloaded due to an EventCoordinator initiatied
run of the TGW while the metadata table was still
hosted.
Fixes #3861
diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 29fe40c..da6d4a4 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@ -298,6 +298,15 @@
IOException {
TableMgmtStats tableMgmtStats = new TableMgmtStats();
+ final boolean shuttingDownAllTabletServers =
+ manager.serversToShutdown.equals(currentTServers.keySet());
+ if (shuttingDownAllTabletServers && !isFullScan) {
+ // If we are shutting down all of the TabletServers, then don't process any events
+ // from the EventCoordinator.
+ LOG.debug("Partial scan requested, but aborted due to shutdown of all TabletServers");
+ return tableMgmtStats;
+ }
+
int unloaded = 0;
Map<TableId,MergeInfo> currentMerges = new HashMap<>();
@@ -393,8 +402,8 @@
}
// if we are shutting down all the tabletservers, we have to do it in order
- if (isFullScan && (goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED)
- && manager.serversToShutdown.equals(currentTServers.keySet())) {
+ if (shuttingDownAllTabletServers
+ && (goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED)) {
if (dependentWatcher != null) {
// If the dependentWatcher is for the user tables, check to see
// that user tables exist.
@@ -505,6 +514,8 @@
TServerConnection client =
manager.tserverSet.getConnection(location.getServerInstance());
if (client != null) {
+ LOG.debug("Requesting tserver {} unload tablet {}", location.getServerInstance(),
+ tm.getExtent());
client.unloadTablet(manager.managerLock, tm.getExtent(), goal.howUnload(),
manager.getSteadyTime());
tableMgmtStats.totalUnloaded++;
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java b/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java
index 5e60e7c..0f82311 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java
@@ -26,7 +26,6 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
-import java.time.Duration;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -85,20 +84,13 @@
import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Iterables;
import com.google.common.net.HostAndPort;
-@Disabled // ELASTICITY_TODO
public class ManagerAssignmentIT extends SharedMiniClusterBase {
- @Override
- protected Duration defaultTimeout() {
- return Duration.ofMinutes(2);
- }
-
@BeforeAll
public static void beforeAll() throws Exception {
SharedMiniClusterBase.startMiniClusterWithConfig((cfg, core) -> {
@@ -114,13 +106,8 @@
@BeforeEach
public void before() throws Exception {
try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
- if (client.instanceOperations().getTabletServers().size() == 0) {
- // There are a couple of tests in this class that kill tservers without
- // clearing the list of processes for them. Calling stopAllServers in this
- // case should clear out the list of processes. Then start the tablet servers.
- getCluster().getClusterControl().stopAllServers(ServerType.TABLET_SERVER);
- getCluster().getClusterControl().start(ServerType.TABLET_SERVER);
- }
+ Wait.waitFor(() -> countTabletsWithLocation(client, RootTable.ID) > 0);
+ Wait.waitFor(() -> countTabletsWithLocation(client, MetadataTable.ID) > 0);
}
}
@@ -494,10 +481,6 @@
@Test
public void testShutdownOnlyTServerWithUserTable() throws Exception {
- // 2 TabletServers started for this test, shut them down so we only have 1.
- getCluster().getClusterControl().stopAllServers(ServerType.TABLET_SERVER);
- getCluster().getClusterControl().start(ServerType.TABLET_SERVER, Collections.emptyMap(), 1);
-
String tableName = getUniqueNames(1)[0];
try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
@@ -580,16 +563,18 @@
Wait.waitFor(() -> client.instanceOperations().getTabletServers().size() == 0);
+ // restart the tablet server for the other tests. Need to call stopAllServers
+ // to clear out the process list because we shutdown the TabletServer outside
+ // of MAC control.
+ getCluster().getClusterControl().stopAllServers(ServerType.TABLET_SERVER);
+ getCluster().getClusterControl().start(ServerType.TABLET_SERVER);
+ Wait.waitFor(() -> client.instanceOperations().getTabletServers().size() == 1, 60_000);
}
}
@Test
public void testShutdownOnlyTServerWithoutUserTable() throws Exception {
- // 2 TabletServers started for this test, shut them down so we only have 1.
- getCluster().getClusterControl().stopAllServers(ServerType.TABLET_SERVER);
- getCluster().getClusterControl().start(ServerType.TABLET_SERVER, Collections.emptyMap(), 1);
-
try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
Wait.waitFor(() -> client.instanceOperations().getTabletServers().size() == 1,
@@ -625,9 +610,14 @@
}
});
-
Wait.waitFor(() -> client.instanceOperations().getTabletServers().size() == 0);
+ // restart the tablet server for the other tests. Need to call stopAllServers
+ // to clear out the process list because we shutdown the TabletServer outside
+ // of MAC control.
+ getCluster().getClusterControl().stopAllServers(ServerType.TABLET_SERVER);
+ getCluster().getClusterControl().start(ServerType.TABLET_SERVER);
+ Wait.waitFor(() -> client.instanceOperations().getTabletServers().size() == 1, 60_000);
}
}
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/ShutdownIT.java b/test/src/main/java/org/apache/accumulo/test/functional/ShutdownIT.java
index 07f389a..60ab7dd 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/ShutdownIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/ShutdownIT.java
@@ -33,7 +33,6 @@
import org.apache.accumulo.test.TestIngest;
import org.apache.accumulo.test.TestRandomDeletes;
import org.apache.accumulo.test.VerifyIngest;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
public class ShutdownIT extends ConfigurableMacBase {
@@ -107,7 +106,6 @@
}
@Test
- @Disabled // ELASTICITY_TODO
public void adminStop() throws Exception {
try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) {
runAdminStopTest(c, cluster);
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/SslWithClientAuthIT.java b/test/src/main/java/org/apache/accumulo/test/functional/SslWithClientAuthIT.java
index d8709e4..6e422eb 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/SslWithClientAuthIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/SslWithClientAuthIT.java
@@ -24,7 +24,6 @@
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
import org.apache.hadoop.conf.Configuration;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
/**
@@ -61,7 +60,6 @@
@Override
@Test
- @Disabled // ELASTICITY_TODO
public void adminStop() throws Exception {
super.adminStop();
}