hugegraph-llm/src/hugegraph_llm/flows/graph_extract.py - incubator-hugegraph-ai - Git at Google

 #  Licensed to the Apache Software Foundation (ASF) under one or more
 #  contributor license agreements.  See the NOTICE file distributed with
 #  this work for additional information regarding copyright ownership.
 #  The ASF licenses this file to You under the Apache License, Version 2.0
 #  (the "License"); you may not use this file except in compliance with
 #  the License.  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

 import json
 from PyCGraph import GPipeline
 from hugegraph_llm.flows.common import BaseFlow
 from hugegraph_llm.nodes.document_node.chunk_split import ChunkSplitNode
 from hugegraph_llm.nodes.hugegraph_node.schema import SchemaNode
 from hugegraph_llm.nodes.llm_node.extract_info import ExtractNode
 from hugegraph_llm.state.ai_state import WkFlowInput, WkFlowState
 from hugegraph_llm.utils.log import log


 # pylint: disable=arguments-differ,keyword-arg-before-vararg
 class GraphExtractFlow(BaseFlow):
     def __init__(self):
         pass

     def prepare(
         self,
         prepared_input: WkFlowInput,
         schema,
         texts,
         example_prompt,
         extract_type,
         language="zh",
         **kwargs,
     ):
         # prepare input data
         prepared_input.texts = texts
         prepared_input.language = language
         prepared_input.split_type = "document"
         prepared_input.example_prompt = example_prompt
         prepared_input.schema = schema
         prepared_input.extract_type = extract_type

     def build_flow(
         self, schema, texts, example_prompt, extract_type, language="zh", **kwargs
     ):
         pipeline = GPipeline()
         prepared_input = WkFlowInput()
         # prepare input data
         self.prepare(
             prepared_input, schema, texts, example_prompt, extract_type, language
         )

         pipeline.createGParam(prepared_input, "wkflow_input")
         pipeline.createGParam(WkFlowState(), "wkflow_state")
         schema_node = SchemaNode()

         chunk_split_node = ChunkSplitNode()
         graph_extract_node = ExtractNode()
         pipeline.registerGElement(schema_node, set(), "schema_node")
         pipeline.registerGElement(chunk_split_node, set(), "chunk_split")
         pipeline.registerGElement(
             graph_extract_node, {schema_node, chunk_split_node}, "graph_extract"
         )

         return pipeline

     def post_deal(self, pipeline=None, **kwargs):
         res = pipeline.getGParamWithNoEmpty("wkflow_state").to_json()
         vertices = res.get("vertices", [])
         edges = res.get("edges", [])
         if not vertices and not edges:
             log.info("Please check the schema.(The schema may not match the Doc)")
             return json.dumps(
                 {
                     "vertices": vertices,
                     "edges": edges,
                     "warning": "The schema may not match the Doc",
                 },
                 ensure_ascii=False,
                 indent=2,
             )
         return json.dumps(
             {"vertices": vertices, "edges": edges},
             ensure_ascii=False,
             indent=2,
         )
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import json
	from PyCGraph import GPipeline
	from hugegraph_llm.flows.common import BaseFlow
	from hugegraph_llm.nodes.document_node.chunk_split import ChunkSplitNode
	from hugegraph_llm.nodes.hugegraph_node.schema import SchemaNode
	from hugegraph_llm.nodes.llm_node.extract_info import ExtractNode
	from hugegraph_llm.state.ai_state import WkFlowInput, WkFlowState
	from hugegraph_llm.utils.log import log


	# pylint: disable=arguments-differ,keyword-arg-before-vararg
	class GraphExtractFlow(BaseFlow):
	def __init__(self):
	pass

	def prepare(
	self,
	prepared_input: WkFlowInput,
	schema,
	texts,
	example_prompt,
	extract_type,
	language="zh",
	**kwargs,
	):
	# prepare input data
	prepared_input.texts = texts
	prepared_input.language = language
	prepared_input.split_type = "document"
	prepared_input.example_prompt = example_prompt
	prepared_input.schema = schema
	prepared_input.extract_type = extract_type

	def build_flow(
	self, schema, texts, example_prompt, extract_type, language="zh", **kwargs
	):
	pipeline = GPipeline()
	prepared_input = WkFlowInput()
	# prepare input data
	self.prepare(
	prepared_input, schema, texts, example_prompt, extract_type, language
	)

	pipeline.createGParam(prepared_input, "wkflow_input")
	pipeline.createGParam(WkFlowState(), "wkflow_state")
	schema_node = SchemaNode()

	chunk_split_node = ChunkSplitNode()
	graph_extract_node = ExtractNode()
	pipeline.registerGElement(schema_node, set(), "schema_node")
	pipeline.registerGElement(chunk_split_node, set(), "chunk_split")
	pipeline.registerGElement(
	graph_extract_node, {schema_node, chunk_split_node}, "graph_extract"
	)

	return pipeline

	def post_deal(self, pipeline=None, **kwargs):
	res = pipeline.getGParamWithNoEmpty("wkflow_state").to_json()
	vertices = res.get("vertices", [])
	edges = res.get("edges", [])
	if not vertices and not edges:
	log.info("Please check the schema.(The schema may not match the Doc)")
	return json.dumps(
	{
	"vertices": vertices,
	"edges": edges,
	"warning": "The schema may not match the Doc",
	},
	ensure_ascii=False,
	indent=2,
	)
	return json.dumps(
	{"vertices": vertices, "edges": edges},
	ensure_ascii=False,
	indent=2,
	)