hugegraph-llm/src/hugegraph_llm/state/ai_state.py - incubator-hugegraph-ai - Git at Google

 #  Licensed to the Apache Software Foundation (ASF) under one or more
 #  contributor license agreements.  See the NOTICE file distributed with
 #  this work for additional information regarding copyright ownership.
 #  The ASF licenses this file to You under the Apache License, Version 2.0
 #  (the "License"); you may not use this file except in compliance with
 #  the License.  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

 from typing import AsyncGenerator, Union, List, Optional, Any, Dict
 from PyCGraph import GParam, CStatus

 from hugegraph_llm.utils.log import log


 class WkFlowInput(GParam):
     texts: Optional[Union[str, List[str]]] = None  # texts input used by ChunkSplit Node
     language: Optional[str] = None  # language configuration used by ChunkSplit Node
     split_type: Optional[str] = None  # split type used by ChunkSplit Node
     example_prompt: Optional[str] = None  # need by graph information extract
     schema: Optional[str] = None  # Schema information requeired by SchemaNode
     data_json: Optional[Dict[str, Any]] = None
     extract_type: Optional[str] = None
     query_examples: Optional[Any] = None
     few_shot_schema: Optional[Any] = None
     # Fields related to PromptGenerate
     source_text: Optional[str] = None  # Original text
     scenario: Optional[str] = None  # Scenario description
     example_name: Optional[str] = None  # Example name
     # Fields for Text2Gremlin
     example_num: Optional[int] = None
     requested_outputs: Optional[List[str]] = None

     # RAG Flow related fields
     query: Optional[str] = None  # User query for RAG
     vector_search: Optional[bool] = None  # Enable vector search
     graph_search: Optional[bool] = None  # Enable graph search
     raw_answer: Optional[bool] = None  # Return raw answer
     vector_only_answer: Optional[bool] = None  # Vector only answer mode
     graph_only_answer: Optional[bool] = None  # Graph only answer mode
     graph_vector_answer: Optional[bool] = None  # Combined graph and vector answer
     graph_ratio: Optional[float] = None  # Graph ratio for merging
     rerank_method: Optional[str] = None  # Reranking method
     near_neighbor_first: Optional[bool] = None  # Near neighbor first flag
     custom_related_information: Optional[str] = None  # Custom related information
     answer_prompt: Optional[str] = None  # Answer generation prompt
     keywords_extract_prompt: Optional[str] = None  # Keywords extraction prompt
     gremlin_tmpl_num: Optional[int] = None  # Gremlin template number
     gremlin_prompt: Optional[str] = None  # Gremlin generation prompt
     max_graph_items: Optional[int] = None  # Maximum graph items
     topk_return_results: Optional[int] = None  # Top-k return results
     vector_dis_threshold: Optional[float] = None  # Vector distance threshold
     topk_per_keyword: Optional[int] = None  # Top-k per keyword
     max_keywords: Optional[int] = None
     max_items: Optional[int] = None

     # Semantic query related fields
     semantic_by: Optional[str] = None  # Semantic query method
     topk_per_query: Optional[int] = None  # Top-k per query

     # Graph query related fields
     max_deep: Optional[int] = None  # Maximum depth for graph traversal
     max_v_prop_len: Optional[int] = None  # Maximum vertex property length
     max_e_prop_len: Optional[int] = None  # Maximum edge property length
     prop_to_match: Optional[str] = None  # Property to match

     stream: Optional[bool] = None  # used for recognize stream mode

     # used for rag_recall api
     is_graph_rag_recall: bool = False
     is_vector_only: bool = False

     # used for build text2gremin index
     examples: Optional[List[Dict[str, str]]] = None

     def reset(self, _: CStatus) -> None:
         self.texts = None
         self.language = None
         self.split_type = None
         self.example_prompt = None
         self.schema = None
         self.data_json = None
         self.extract_type = None
         self.query_examples = None
         self.few_shot_schema = None
         # PromptGenerate related configuration
         self.source_text = None
         self.scenario = None
         self.example_name = None
         # Text2Gremlin related configuration
         self.example_num = None
         self.gremlin_prompt = None
         self.requested_outputs = None
         # RAG Flow related fields
         self.query = None
         self.vector_search = None
         self.graph_search = None
         self.raw_answer = None
         self.vector_only_answer = None
         self.graph_only_answer = None
         self.graph_vector_answer = None
         self.graph_ratio = None
         self.rerank_method = None
         self.near_neighbor_first = None
         self.custom_related_information = None
         self.answer_prompt = None
         self.keywords_extract_prompt = None
         self.gremlin_tmpl_num = None
         self.max_graph_items = None
         self.topk_return_results = None
         self.vector_dis_threshold = None
         self.topk_per_keyword = None
         self.max_keywords = None
         self.max_items = None
         # Semantic query related fields
         self.semantic_by = None
         self.topk_per_query = None
         # Graph query related fields
         self.max_deep = None
         self.max_v_prop_len = None
         self.max_e_prop_len = None
         self.prop_to_match = None
         self.stream = None

         self.examples = None
         self.is_graph_rag_recall = False
         self.is_vector_only = False


 class WkFlowState(GParam):
     schema: Optional[str] = None  # schema message
     simple_schema: Optional[str] = None
     chunks: Optional[List[str]] = None
     edges: Optional[List[Any]] = None
     vertices: Optional[List[Any]] = None
     triples: Optional[List[Any]] = None
     call_count: Optional[int] = None

     keywords: Optional[List[str]] = None
     vector_result: Optional[Any] = None
     graph_result: Optional[Any] = None
     keywords_embeddings: Optional[Any] = None

     generated_extract_prompt: Optional[str] = None
     # Fields for Text2Gremlin results
     match_result: Optional[List[dict]] = None
     result: Optional[str] = None
     raw_result: Optional[str] = None
     template_exec_res: Optional[Any] = None
     raw_exec_res: Optional[Any] = None

     match_vids: Optional[Any] = None

     raw_answer: Optional[str] = None
     vector_only_answer: Optional[str] = None
     graph_only_answer: Optional[str] = None
     graph_vector_answer: Optional[str] = None

     merged_result: Optional[Any] = None

     vertex_num: Optional[int] = None
     edge_num: Optional[int] = None
     note: Optional[str] = None
     removed_vid_vector_num: Optional[int] = None
     added_vid_vector_num: Optional[int] = None
     raw_texts: Optional[List] = None
     query_examples: Optional[List] = None
     few_shot_schema: Optional[Dict] = None
     source_text: Optional[str] = None
     scenario: Optional[str] = None
     example_name: Optional[str] = None

     graph_ratio: Optional[float] = None
     query: Optional[str] = None
     vector_search: Optional[bool] = None
     graph_search: Optional[bool] = None
     max_graph_items: Optional[int] = None
     stream_generator: Optional[AsyncGenerator] = None

     graph_result_flag: Optional[int] = None
     vertex_degree_list: Optional[List] = None
     knowledge_with_degree: Optional[Dict] = None
     graph_context_head: Optional[str] = None

     embed_dim: Optional[int] = None
     is_graph_rag_recall: Optional[bool] = None

     def setup(self) -> CStatus:
         self.schema = None
         self.simple_schema = None
         self.chunks = None
         self.edges = None
         self.vertices = None
         self.triples = None
         self.call_count = None

         self.keywords = None
         self.vector_result = None
         self.graph_result = None
         self.keywords_embeddings = None

         self.generated_extract_prompt = None
         # Text2Gremlin results reset
         self.match_result = None
         self.result = None
         self.raw_result = None
         self.template_exec_res = None
         self.raw_exec_res = None

         self.raw_answer = None
         self.vector_only_answer = None
         self.graph_only_answer = None
         self.graph_vector_answer = None

         self.merged_result = None

         self.match_vids = None
         self.vertex_num = None
         self.edge_num = None
         self.note = None
         self.removed_vid_vector_num = None
         self.added_vid_vector_num = None

         self.raw_texts = None
         self.query_examples = None
         self.few_shot_schema = None
         self.source_text = None
         self.scenario = None
         self.example_name = None

         self.graph_ratio = None
         self.query = None
         self.vector_search = None
         self.graph_search = None
         self.max_graph_items = None

         self.stream_generator = None
         self.graph_result_flag = None
         self.vertex_degree_list = None
         self.knowledge_with_degree = None
         self.graph_context_head = None

         self.embed_dim = None
         self.is_graph_rag_recall = None
         return CStatus()

     def to_json(self):
         """
         Automatically returns a JSON-formatted dictionary of all non-None instance members,
         eliminating the need to manually maintain the member list.

         Returns:
             dict: A dictionary containing non-None instance members and their serialized values.
         """
         # Only export instance attributes (excluding methods and class attributes) whose values are not None
         return {
             k: v
             for k, v in self.__dict__.items()
             if not k.startswith("_") and v is not None
         }

     # Implement a method that assigns keys from data_json as WkFlowState member variables
     def assign_from_json(self, data_json: dict):
         """
         Assigns each key in the input json object as a member variable of WkFlowState.
         """
         for k, v in data_json.items():
             if hasattr(self, k):
                 setattr(self, k, v)
             else:
                 log.warning(
                     "key %s should be a member of WkFlowState & type %s", k, type(v)
                 )
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from typing import AsyncGenerator, Union, List, Optional, Any, Dict
	from PyCGraph import GParam, CStatus

	from hugegraph_llm.utils.log import log


	class WkFlowInput(GParam):
	texts: Optional[Union[str, List[str]]] = None # texts input used by ChunkSplit Node
	language: Optional[str] = None # language configuration used by ChunkSplit Node
	split_type: Optional[str] = None # split type used by ChunkSplit Node
	example_prompt: Optional[str] = None # need by graph information extract
	schema: Optional[str] = None # Schema information requeired by SchemaNode
	data_json: Optional[Dict[str, Any]] = None
	extract_type: Optional[str] = None
	query_examples: Optional[Any] = None
	few_shot_schema: Optional[Any] = None
	# Fields related to PromptGenerate
	source_text: Optional[str] = None # Original text
	scenario: Optional[str] = None # Scenario description
	example_name: Optional[str] = None # Example name
	# Fields for Text2Gremlin
	example_num: Optional[int] = None
	requested_outputs: Optional[List[str]] = None

	# RAG Flow related fields
	query: Optional[str] = None # User query for RAG
	vector_search: Optional[bool] = None # Enable vector search
	graph_search: Optional[bool] = None # Enable graph search
	raw_answer: Optional[bool] = None # Return raw answer
	vector_only_answer: Optional[bool] = None # Vector only answer mode
	graph_only_answer: Optional[bool] = None # Graph only answer mode
	graph_vector_answer: Optional[bool] = None # Combined graph and vector answer
	graph_ratio: Optional[float] = None # Graph ratio for merging
	rerank_method: Optional[str] = None # Reranking method
	near_neighbor_first: Optional[bool] = None # Near neighbor first flag
	custom_related_information: Optional[str] = None # Custom related information
	answer_prompt: Optional[str] = None # Answer generation prompt
	keywords_extract_prompt: Optional[str] = None # Keywords extraction prompt
	gremlin_tmpl_num: Optional[int] = None # Gremlin template number
	gremlin_prompt: Optional[str] = None # Gremlin generation prompt
	max_graph_items: Optional[int] = None # Maximum graph items
	topk_return_results: Optional[int] = None # Top-k return results
	vector_dis_threshold: Optional[float] = None # Vector distance threshold
	topk_per_keyword: Optional[int] = None # Top-k per keyword
	max_keywords: Optional[int] = None
	max_items: Optional[int] = None

	# Semantic query related fields
	semantic_by: Optional[str] = None # Semantic query method
	topk_per_query: Optional[int] = None # Top-k per query

	# Graph query related fields
	max_deep: Optional[int] = None # Maximum depth for graph traversal
	max_v_prop_len: Optional[int] = None # Maximum vertex property length
	max_e_prop_len: Optional[int] = None # Maximum edge property length
	prop_to_match: Optional[str] = None # Property to match

	stream: Optional[bool] = None # used for recognize stream mode

	# used for rag_recall api
	is_graph_rag_recall: bool = False
	is_vector_only: bool = False

	# used for build text2gremin index
	examples: Optional[List[Dict[str, str]]] = None

	def reset(self, _: CStatus) -> None:
	self.texts = None
	self.language = None
	self.split_type = None
	self.example_prompt = None
	self.schema = None
	self.data_json = None
	self.extract_type = None
	self.query_examples = None
	self.few_shot_schema = None
	# PromptGenerate related configuration
	self.source_text = None
	self.scenario = None
	self.example_name = None
	# Text2Gremlin related configuration
	self.example_num = None
	self.gremlin_prompt = None
	self.requested_outputs = None
	# RAG Flow related fields
	self.query = None
	self.vector_search = None
	self.graph_search = None
	self.raw_answer = None
	self.vector_only_answer = None
	self.graph_only_answer = None
	self.graph_vector_answer = None
	self.graph_ratio = None
	self.rerank_method = None
	self.near_neighbor_first = None
	self.custom_related_information = None
	self.answer_prompt = None
	self.keywords_extract_prompt = None
	self.gremlin_tmpl_num = None
	self.max_graph_items = None
	self.topk_return_results = None
	self.vector_dis_threshold = None
	self.topk_per_keyword = None
	self.max_keywords = None
	self.max_items = None
	# Semantic query related fields
	self.semantic_by = None
	self.topk_per_query = None
	# Graph query related fields
	self.max_deep = None
	self.max_v_prop_len = None
	self.max_e_prop_len = None
	self.prop_to_match = None
	self.stream = None

	self.examples = None
	self.is_graph_rag_recall = False
	self.is_vector_only = False


	class WkFlowState(GParam):
	schema: Optional[str] = None # schema message
	simple_schema: Optional[str] = None
	chunks: Optional[List[str]] = None
	edges: Optional[List[Any]] = None
	vertices: Optional[List[Any]] = None
	triples: Optional[List[Any]] = None
	call_count: Optional[int] = None

	keywords: Optional[List[str]] = None
	vector_result: Optional[Any] = None
	graph_result: Optional[Any] = None
	keywords_embeddings: Optional[Any] = None

	generated_extract_prompt: Optional[str] = None
	# Fields for Text2Gremlin results
	match_result: Optional[List[dict]] = None
	result: Optional[str] = None
	raw_result: Optional[str] = None
	template_exec_res: Optional[Any] = None
	raw_exec_res: Optional[Any] = None

	match_vids: Optional[Any] = None

	raw_answer: Optional[str] = None
	vector_only_answer: Optional[str] = None
	graph_only_answer: Optional[str] = None
	graph_vector_answer: Optional[str] = None

	merged_result: Optional[Any] = None

	vertex_num: Optional[int] = None
	edge_num: Optional[int] = None
	note: Optional[str] = None
	removed_vid_vector_num: Optional[int] = None
	added_vid_vector_num: Optional[int] = None
	raw_texts: Optional[List] = None
	query_examples: Optional[List] = None
	few_shot_schema: Optional[Dict] = None
	source_text: Optional[str] = None
	scenario: Optional[str] = None
	example_name: Optional[str] = None

	graph_ratio: Optional[float] = None
	query: Optional[str] = None
	vector_search: Optional[bool] = None
	graph_search: Optional[bool] = None
	max_graph_items: Optional[int] = None
	stream_generator: Optional[AsyncGenerator] = None

	graph_result_flag: Optional[int] = None
	vertex_degree_list: Optional[List] = None
	knowledge_with_degree: Optional[Dict] = None
	graph_context_head: Optional[str] = None

	embed_dim: Optional[int] = None
	is_graph_rag_recall: Optional[bool] = None

	def setup(self) -> CStatus:
	self.schema = None
	self.simple_schema = None
	self.chunks = None
	self.edges = None
	self.vertices = None
	self.triples = None
	self.call_count = None

	self.keywords = None
	self.vector_result = None
	self.graph_result = None
	self.keywords_embeddings = None

	self.generated_extract_prompt = None
	# Text2Gremlin results reset
	self.match_result = None
	self.result = None
	self.raw_result = None
	self.template_exec_res = None
	self.raw_exec_res = None

	self.raw_answer = None
	self.vector_only_answer = None
	self.graph_only_answer = None
	self.graph_vector_answer = None

	self.merged_result = None

	self.match_vids = None
	self.vertex_num = None
	self.edge_num = None
	self.note = None
	self.removed_vid_vector_num = None
	self.added_vid_vector_num = None

	self.raw_texts = None
	self.query_examples = None
	self.few_shot_schema = None
	self.source_text = None
	self.scenario = None
	self.example_name = None

	self.graph_ratio = None
	self.query = None
	self.vector_search = None
	self.graph_search = None
	self.max_graph_items = None

	self.stream_generator = None
	self.graph_result_flag = None
	self.vertex_degree_list = None
	self.knowledge_with_degree = None
	self.graph_context_head = None

	self.embed_dim = None
	self.is_graph_rag_recall = None
	return CStatus()

	def to_json(self):
	"""
	Automatically returns a JSON-formatted dictionary of all non-None instance members,
	eliminating the need to manually maintain the member list.

	Returns:
	dict: A dictionary containing non-None instance members and their serialized values.
	"""
	# Only export instance attributes (excluding methods and class attributes) whose values are not None
	return {
	k: v
	for k, v in self.__dict__.items()
	if not k.startswith("_") and v is not None
	}

	# Implement a method that assigns keys from data_json as WkFlowState member variables
	def assign_from_json(self, data_json: dict):
	"""
	Assigns each key in the input json object as a member variable of WkFlowState.
	"""
	for k, v in data_json.items():
	if hasattr(self, k):
	setattr(self, k, v)
	else:
	log.warning(
	"key %s should be a member of WkFlowState & type %s", k, type(v)
	)