blob: b57cbfbe310b7d726bb197915a28cb062d419099 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from PyCGraph import GPipeline
from hugegraph_llm.flows.common import BaseFlow
from hugegraph_llm.nodes.document_node.chunk_split import ChunkSplitNode
from hugegraph_llm.nodes.index_node.build_vector_index import BuildVectorIndexNode
from hugegraph_llm.state.ai_state import WkFlowInput
from hugegraph_llm.state.ai_state import WkFlowState
# pylint: disable=arguments-differ,keyword-arg-before-vararg
class BuildVectorIndexFlow(BaseFlow):
def __init__(self):
pass
def prepare(self, prepared_input: WkFlowInput, texts, **kwargs):
prepared_input.texts = texts
prepared_input.language = "zh"
prepared_input.split_type = "paragraph"
def build_flow(self, texts, **kwargs):
pipeline = GPipeline()
# prepare for workflow input
prepared_input = WkFlowInput()
self.prepare(prepared_input, texts)
pipeline.createGParam(prepared_input, "wkflow_input")
pipeline.createGParam(WkFlowState(), "wkflow_state")
chunk_split_node = ChunkSplitNode()
build_vector_node = BuildVectorIndexNode()
pipeline.registerGElement(chunk_split_node, set(), "chunk_split")
pipeline.registerGElement(build_vector_node, {chunk_split_node}, "build_vector")
return pipeline
def post_deal(self, pipeline=None, **kwargs):
res = pipeline.getGParamWithNoEmpty("wkflow_state").to_json()
return json.dumps(res, ensure_ascii=False, indent=2)