refactor(llm): check graph conn before updating embedding (#205)
The vertex embedding is updated every hour.
**When a new user starts the program for the first time**, if the graph
database related information is not configured, **many errors will
occur, which affects the user experience.**
Therefore, **before updating the vertex embedding, it will check whether
the current graph link is reachable.**
When the link is unreachable, **the effect is as follows:**

---------
Co-authored-by: imbajin <jin@apache.org>
diff --git a/.gitignore b/.gitignore
index 77c4168..afe736e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -131,6 +131,8 @@
# Environments
.env
.venv
+.vscode
+.cursor
env/
venv/
ENV/
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
index 435e558..bad48ad 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
@@ -18,8 +18,10 @@
# pylint: disable=E1101
import asyncio
+
import gradio as gr
+from hugegraph_llm.config import huge_settings
from hugegraph_llm.config import prompt
from hugegraph_llm.utils.graph_index_utils import (
get_graph_index_info,
@@ -29,8 +31,10 @@
extract_graph,
import_graph_data,
)
-from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info
+from hugegraph_llm.utils.hugegraph_utils import check_graph_db_connection
from hugegraph_llm.utils.log import log
+from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info
+
def store_prompt(doc, schema, example_prompt):
# update env variables: doc, schema and example_prompt
@@ -148,15 +152,35 @@
return input_text, input_schema, info_extract_template
-async def timely_update_vid_embedding():
+async def timely_update_vid_embedding(interval_seconds: int = 3600):
+ """
+ Periodically updates vertex embeddings in the graph database.
+
+ Args:
+ :param interval_seconds: Time interval between updates in seconds (default: 3600s -> 1h)
+ """
while True:
try:
- await asyncio.to_thread(update_vid_embedding)
- log.info("rebuild_vid_index timely executed successfully.")
+ # Get the latest configuration values on each iteration
+ config = {
+ "ip": huge_settings.graph_ip,
+ "port": huge_settings.graph_port,
+ "name": huge_settings.graph_name,
+ "user": huge_settings.graph_user,
+ "pwd": huge_settings.graph_pwd,
+ "graph_space": huge_settings.graph_space
+ }
+ if check_graph_db_connection(**config):
+ await asyncio.to_thread(update_vid_embedding)
+ log.info("update_vid_embedding executed successfully")
+ else:
+ log.warning("HugeGraph server connection failed, so skipping update_vid_embedding, "
+ "please check graph configuration and connectivity")
except asyncio.CancelledError as ce:
log.info("Periodic task has been cancelled due to: %s", ce)
break
+ # TODO: Add Gradio Warning here
+ # pylint: disable=W0718
except Exception as e:
- log.error("Failed to execute rebuild_vid_index: %s", e, exc_info=True)
- raise Exception("Failed to execute rebuild_vid_index") from e
- await asyncio.sleep(3600)
+ log.warning("Failed to execute update_vid_embedding: %s", e, exc_info=True)
+ await asyncio.sleep(interval_seconds)
diff --git a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
index 4b90943..0d5d107 100644
--- a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
+++ b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
@@ -19,6 +19,8 @@
import os
import shutil
from datetime import datetime
+import requests
+from requests.auth import HTTPBasicAuth
from hugegraph_llm.config import huge_settings, resource_path
from hugegraph_llm.utils.log import log
@@ -168,3 +170,23 @@
except Exception as e: # pylint: disable=W0718
log.error("Failed to manage backup retention: %s", e, exc_info=True)
raise Exception("Failed to manage backup retention") from e
+
+
+#TODO: In the path demo/rag_demo/configs_block.py,
+# there is a function test_api_connection that is similar to this function,
+# but it is not straightforward to reuse
+def check_graph_db_connection(ip: str, port: str, name: str, user: str, pwd: str, graph_space: str) -> bool:
+ try:
+ if graph_space and graph_space.strip():
+ test_url = f"http://{ip}:{port}/graphspaces/{graph_space}/graphs/{name}/schema"
+ else:
+ test_url = f"http://{ip}:{port}/graphs/{name}/schema"
+ auth = HTTPBasicAuth(user, pwd)
+ response = requests.get(test_url, timeout=(1.0, 5.0), auth=auth)
+ return response.status_code == 200
+ except (requests.exceptions.RequestException, requests.exceptions.Timeout) as e:
+ log.warning("GraphDB connection error: %s", str(e))
+ return False
+ except Exception as e:
+ log.error("Unexpected connection error: %s", e, exc_info=True)
+ raise Exception("Failed to execute update_vid_embedding") from e