stash
diff --git a/iotdb-core/ainode/ainode/core/inference/pool_scheduler/basic_pool_scheduler.py b/iotdb-core/ainode/ainode/core/inference/pool_scheduler/basic_pool_scheduler.py index 8994a9e..0343c2a 100644 --- a/iotdb-core/ainode/ainode/core/inference/pool_scheduler/basic_pool_scheduler.py +++ b/iotdb-core/ainode/ainode/core/inference/pool_scheduler/basic_pool_scheduler.py
@@ -51,7 +51,7 @@ Schedule a scaling action for the given model_id. """ if model_id not in self._request_pool_map: - pool_num = estimate_pool_size(self.DEFAULT_DEVICE, model_id) + pool_num = 2 if pool_num <= 0: raise InferenceModelInternalError( f"Not enough memory to run model {model_id}."
diff --git a/iotdb-core/ainode/ainode/core/manager/inference_manager.py b/iotdb-core/ainode/ainode/core/manager/inference_manager.py index 2d4e208..a189274 100644 --- a/iotdb-core/ainode/ainode/core/manager/inference_manager.py +++ b/iotdb-core/ainode/ainode/core/manager/inference_manager.py
@@ -242,9 +242,7 @@ predict_length, ) - if model_id in self.ACCELERATE_MODEL_ID and "cuda" in str( - self.DEFAULT_DEVICE - ): + if model_id in self.ACCELERATE_MODEL_ID: # TODO: Logic in this branch shall handle all LTSM inferences # TODO: TSBlock -> Tensor codes should be unified data = full_data[1][0]