| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import json |
| |
| from PyCGraph import GPipeline |
| |
| from hugegraph_llm.flows.common import BaseFlow |
| from hugegraph_llm.nodes.document_node.chunk_split import ChunkSplitNode |
| from hugegraph_llm.nodes.index_node.build_vector_index import BuildVectorIndexNode |
| from hugegraph_llm.state.ai_state import WkFlowInput |
| from hugegraph_llm.state.ai_state import WkFlowState |
| |
| |
| # pylint: disable=arguments-differ,keyword-arg-before-vararg |
| class BuildVectorIndexFlow(BaseFlow): |
| def __init__(self): |
| pass |
| |
| def prepare(self, prepared_input: WkFlowInput, texts, **kwargs): |
| prepared_input.texts = texts |
| prepared_input.language = "zh" |
| prepared_input.split_type = "paragraph" |
| |
| def build_flow(self, texts, **kwargs): |
| pipeline = GPipeline() |
| # prepare for workflow input |
| prepared_input = WkFlowInput() |
| self.prepare(prepared_input, texts) |
| |
| pipeline.createGParam(prepared_input, "wkflow_input") |
| pipeline.createGParam(WkFlowState(), "wkflow_state") |
| |
| chunk_split_node = ChunkSplitNode() |
| build_vector_node = BuildVectorIndexNode() |
| pipeline.registerGElement(chunk_split_node, set(), "chunk_split") |
| pipeline.registerGElement(build_vector_node, {chunk_split_node}, "build_vector") |
| |
| return pipeline |
| |
| def post_deal(self, pipeline=None, **kwargs): |
| res = pipeline.getGParamWithNoEmpty("wkflow_state").to_json() |
| return json.dumps(res, ensure_ascii=False, indent=2) |