blob: 429aba955564d01b3fd9f9c7e58c49144de2d9b7 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import AsyncGenerator, Union, List, Optional, Any, Dict
from PyCGraph import GParam, CStatus
from hugegraph_llm.utils.log import log
class WkFlowInput(GParam):
texts: Optional[Union[str, List[str]]] = None # texts input used by ChunkSplit Node
language: Optional[str] = None # language configuration used by ChunkSplit Node
split_type: Optional[str] = None # split type used by ChunkSplit Node
example_prompt: Optional[str] = None # need by graph information extract
schema: Optional[str] = None # Schema information requeired by SchemaNode
data_json: Optional[Dict[str, Any]] = None
extract_type: Optional[str] = None
query_examples: Optional[Any] = None
few_shot_schema: Optional[Any] = None
# Fields related to PromptGenerate
source_text: Optional[str] = None # Original text
scenario: Optional[str] = None # Scenario description
example_name: Optional[str] = None # Example name
# Fields for Text2Gremlin
example_num: Optional[int] = None
requested_outputs: Optional[List[str]] = None
# RAG Flow related fields
query: Optional[str] = None # User query for RAG
vector_search: Optional[bool] = None # Enable vector search
graph_search: Optional[bool] = None # Enable graph search
raw_answer: Optional[bool] = None # Return raw answer
vector_only_answer: Optional[bool] = None # Vector only answer mode
graph_only_answer: Optional[bool] = None # Graph only answer mode
graph_vector_answer: Optional[bool] = None # Combined graph and vector answer
graph_ratio: Optional[float] = None # Graph ratio for merging
rerank_method: Optional[str] = None # Reranking method
near_neighbor_first: Optional[bool] = None # Near neighbor first flag
custom_related_information: Optional[str] = None # Custom related information
answer_prompt: Optional[str] = None # Answer generation prompt
keywords_extract_prompt: Optional[str] = None # Keywords extraction prompt
gremlin_tmpl_num: Optional[int] = None # Gremlin template number
gremlin_prompt: Optional[str] = None # Gremlin generation prompt
max_graph_items: Optional[int] = None # Maximum graph items
topk_return_results: Optional[int] = None # Top-k return results
vector_dis_threshold: Optional[float] = None # Vector distance threshold
topk_per_keyword: Optional[int] = None # Top-k per keyword
max_keywords: Optional[int] = None
max_items: Optional[int] = None
# Semantic query related fields
semantic_by: Optional[str] = None # Semantic query method
topk_per_query: Optional[int] = None # Top-k per query
# Graph query related fields
max_deep: Optional[int] = None # Maximum depth for graph traversal
max_v_prop_len: Optional[int] = None # Maximum vertex property length
max_e_prop_len: Optional[int] = None # Maximum edge property length
prop_to_match: Optional[str] = None # Property to match
stream: Optional[bool] = None # used for recognize stream mode
# used for rag_recall api
is_graph_rag_recall: bool = False
is_vector_only: bool = False
# used for build text2gremin index
examples: Optional[List[Dict[str, str]]] = None
def reset(self, _: CStatus) -> None:
self.texts = None
self.language = None
self.split_type = None
self.example_prompt = None
self.schema = None
self.data_json = None
self.extract_type = None
self.query_examples = None
self.few_shot_schema = None
# PromptGenerate related configuration
self.source_text = None
self.scenario = None
self.example_name = None
# Text2Gremlin related configuration
self.example_num = None
self.gremlin_prompt = None
self.requested_outputs = None
# RAG Flow related fields
self.query = None
self.vector_search = None
self.graph_search = None
self.raw_answer = None
self.vector_only_answer = None
self.graph_only_answer = None
self.graph_vector_answer = None
self.graph_ratio = None
self.rerank_method = None
self.near_neighbor_first = None
self.custom_related_information = None
self.answer_prompt = None
self.keywords_extract_prompt = None
self.gremlin_tmpl_num = None
self.max_graph_items = None
self.topk_return_results = None
self.vector_dis_threshold = None
self.topk_per_keyword = None
self.max_keywords = None
self.max_items = None
# Semantic query related fields
self.semantic_by = None
self.topk_per_query = None
# Graph query related fields
self.max_deep = None
self.max_v_prop_len = None
self.max_e_prop_len = None
self.prop_to_match = None
self.stream = None
self.examples = None
self.is_graph_rag_recall = False
self.is_vector_only = False
class WkFlowState(GParam):
schema: Optional[str] = None # schema message
simple_schema: Optional[str] = None
chunks: Optional[List[str]] = None
edges: Optional[List[Any]] = None
vertices: Optional[List[Any]] = None
triples: Optional[List[Any]] = None
call_count: Optional[int] = None
keywords: Optional[List[str]] = None
vector_result: Optional[Any] = None
graph_result: Optional[Any] = None
keywords_embeddings: Optional[Any] = None
generated_extract_prompt: Optional[str] = None
# Fields for Text2Gremlin results
match_result: Optional[List[dict]] = None
result: Optional[str] = None
raw_result: Optional[str] = None
template_exec_res: Optional[Any] = None
raw_exec_res: Optional[Any] = None
match_vids: Optional[Any] = None
raw_answer: Optional[str] = None
vector_only_answer: Optional[str] = None
graph_only_answer: Optional[str] = None
graph_vector_answer: Optional[str] = None
merged_result: Optional[Any] = None
vertex_num: Optional[int] = None
edge_num: Optional[int] = None
note: Optional[str] = None
removed_vid_vector_num: Optional[int] = None
added_vid_vector_num: Optional[int] = None
raw_texts: Optional[List] = None
query_examples: Optional[List] = None
few_shot_schema: Optional[Dict] = None
source_text: Optional[str] = None
scenario: Optional[str] = None
example_name: Optional[str] = None
graph_ratio: Optional[float] = None
query: Optional[str] = None
vector_search: Optional[bool] = None
graph_search: Optional[bool] = None
max_graph_items: Optional[int] = None
stream_generator: Optional[AsyncGenerator] = None
graph_result_flag: Optional[int] = None
vertex_degree_list: Optional[List] = None
knowledge_with_degree: Optional[Dict] = None
graph_context_head: Optional[str] = None
embed_dim: Optional[int] = None
is_graph_rag_recall: Optional[bool] = None
def setup(self) -> CStatus:
self.schema = None
self.simple_schema = None
self.chunks = None
self.edges = None
self.vertices = None
self.triples = None
self.call_count = None
self.keywords = None
self.vector_result = None
self.graph_result = None
self.keywords_embeddings = None
self.generated_extract_prompt = None
# Text2Gremlin results reset
self.match_result = None
self.result = None
self.raw_result = None
self.template_exec_res = None
self.raw_exec_res = None
self.raw_answer = None
self.vector_only_answer = None
self.graph_only_answer = None
self.graph_vector_answer = None
self.merged_result = None
self.match_vids = None
self.vertex_num = None
self.edge_num = None
self.note = None
self.removed_vid_vector_num = None
self.added_vid_vector_num = None
self.raw_texts = None
self.query_examples = None
self.few_shot_schema = None
self.source_text = None
self.scenario = None
self.example_name = None
self.graph_ratio = None
self.query = None
self.vector_search = None
self.graph_search = None
self.max_graph_items = None
self.stream_generator = None
self.graph_result_flag = None
self.vertex_degree_list = None
self.knowledge_with_degree = None
self.graph_context_head = None
self.embed_dim = None
self.is_graph_rag_recall = None
return CStatus()
def to_json(self):
"""
Automatically returns a JSON-formatted dictionary of all non-None instance members,
eliminating the need to manually maintain the member list.
Returns:
dict: A dictionary containing non-None instance members and their serialized values.
"""
# Only export instance attributes (excluding methods and class attributes) whose values are not None
return {
k: v
for k, v in self.__dict__.items()
if not k.startswith("_") and v is not None
}
# Implement a method that assigns keys from data_json as WkFlowState member variables
def assign_from_json(self, data_json: dict):
"""
Assigns each key in the input json object as a member variable of WkFlowState.
"""
for k, v in data_json.items():
if hasattr(self, k):
setattr(self, k, v)
else:
log.warning(
"key %s should be a member of WkFlowState & type %s", k, type(v)
)