Fix region migration code (#12472)
diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
index 7356e42..276f415 100644
--- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
+++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
@@ -300,7 +300,6 @@
long broadcastTime = System.currentTimeMillis();
Map<TConsensusGroupId, TRegionReplicaSet> tmpPriorityMap = getRegionPriorityMap();
- LOGGER.info("region map: {}", tmpPriorityMap);
AsyncClientHandler<TRegionRouteReq, TSStatus> clientHandler =
new AsyncClientHandler<>(
DataNodeRequestType.UPDATE_REGION_ROUTE_MAP,
diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java
index b8dd607..a556f37 100644
--- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java
+++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java
@@ -95,8 +95,7 @@
TSStatus status = handler.createNewRegionPeer(consensusGroupId, destDataNode);
setKillPoint(state);
if (status.getCode() != SUCCESS_STATUS.getStatusCode()) {
- rollback(env, handler);
- return Flow.NO_MORE_STATE;
+ return warnAndRollBackAndNoMoreState(env, handler, "CREATE_NEW_REGION_PEER fail");
}
setNextState(AddRegionPeerState.DO_ADD_REGION_PEER);
break;
@@ -109,10 +108,8 @@
this.getProcId(), destDataNode, consensusGroupId, coordinator);
setKillPoint(state);
if (tsStatus.getCode() != SUCCESS_STATUS.getStatusCode()) {
- throw new ProcedureException(
- String.format(
- "[pid%d][AddRegion] failed to submit task to DataNode, procedure failed",
- getProcId()));
+ return warnAndRollBackAndNoMoreState(
+ env, handler, "submit DO_ADD_REGION_PEER task fail");
}
}
TRegionMigrateResult result = handler.waitTaskFinish(this.getProcId(), coordinator);
@@ -121,24 +118,17 @@
// coordinator crashed and lost its task table
case FAIL:
// maybe some DataNode crash
- LOGGER.warn(
- "[pid{}][AddRegion] {} result is {}, procedure failed. Will try to reset peer list automatically...",
- getProcId(),
- state,
- result.getTaskStatus());
- rollback(env, handler);
- return Flow.NO_MORE_STATE;
+ return warnAndRollBackAndNoMoreState(
+ env, handler, String.format("%s result is %s", state, result.getTaskStatus()));
case PROCESSING:
// should never happen
- LOGGER.error("should never happen");
- throw new UnsupportedOperationException("should never happen");
+ return warnAndRollBackAndNoMoreState(env, handler, "should never return PROCESSING");
case SUCCESS:
setNextState(UPDATE_REGION_LOCATION_CACHE);
break outerSwitch;
default:
- String msg = String.format("status %s is unsupported", result.getTaskStatus());
- LOGGER.error(msg);
- throw new UnsupportedOperationException(msg);
+ return warnAndRollBackAndNoMoreState(
+ env, handler, String.format("status %s is unsupported", result.getTaskStatus()));
}
case UPDATE_REGION_LOCATION_CACHE:
handler.updateRegionCache(consensusGroupId, destDataNode, RegionStatus.Running);
@@ -164,8 +154,20 @@
return Flow.HAS_MORE_STATE;
}
- private void rollback(ConfigNodeProcedureEnv env, RegionMaintainHandler handler)
+ private Flow warnAndRollBackAndNoMoreState(
+ ConfigNodeProcedureEnv env, RegionMaintainHandler handler, String reason)
throws ProcedureException {
+ return warnAndRollBackAndNoMoreState(env, handler, reason, null);
+ }
+
+ private Flow warnAndRollBackAndNoMoreState(
+ ConfigNodeProcedureEnv env, RegionMaintainHandler handler, String reason, Exception e)
+ throws ProcedureException {
+ if (e != null) {
+ LOGGER.warn("[pid{}][AddRegion] Start to roll back, because: {}", getRootProcId(), reason, e);
+ } else {
+ LOGGER.warn("[pid{}][AddRegion] Start to roll back, because: {}", getRootProcId(), reason);
+ }
handler.removeRegionLocation(consensusGroupId, destDataNode);
List<TDataNodeLocation> correctDataNodeLocations =
@@ -228,6 +230,7 @@
correctStr);
}
});
+ return Flow.NO_MORE_STATE;
}
@Override