blob: 10ed2dce47901fde7446fa841506a81ae34a2408 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any, Optional, Dict
from hugegraph_llm.utils.log import log
class CheckSchema:
def __init__(self, data):
self.result = None
self.data = data
def run(self, context: Optional[Dict[str, Any]] = None) -> Any: # pylint: disable=too-many-branches
if context is None:
context = {}
schema = self.data or context.get("schema")
if not isinstance(schema, dict):
raise ValueError("Input data is not a dictionary.")
if "vertexlabels" not in schema or "edgelabels" not in schema:
raise ValueError("Input data does not contain 'vertexlabels' or 'edgelabels'.")
if not isinstance(schema["vertexlabels"], list) or not isinstance(schema["edgelabels"],
list):
raise ValueError("'vertexlabels' or 'edgelabels' in input data is not a list.")
for vertex in schema["vertexlabels"]:
if not isinstance(vertex, dict):
raise ValueError("Vertex in input data is not a dictionary.")
if "name" not in vertex:
raise ValueError("Vertex in input data does not contain 'name'.")
if not isinstance(vertex["name"], str):
raise ValueError("'name' in vertex is not of correct type.")
if "properties" not in vertex:
raise ValueError("Vertex in input data does not contain 'properties'.")
properties = vertex["properties"]
if not isinstance(properties, list):
raise ValueError("'properties' in vertex is not of correct type.")
if len(properties) == 0:
raise ValueError("'properties' in vertex is empty.")
primary_keys = vertex.get("primary_keys", properties[:1])
if not isinstance(primary_keys, list):
raise ValueError("'primary_keys' in vertex is not of correct type.")
new_primary_keys = []
for key in primary_keys:
if key not in properties:
log.waring("Primary key '%s' not found in properties has been auto removed.",
key)
else:
new_primary_keys.append(key)
if len(new_primary_keys) == 0:
raise ValueError(f"primary keys of vertexLabel {vertex['vertex_label']} is empty.")
vertex["primary_keys"] = new_primary_keys
nullable_keys = vertex.get("nullable_keys", properties[1:])
if not isinstance(nullable_keys, list):
raise ValueError("'nullable_keys' in vertex is not of correct type.")
new_nullable_keys = []
for key in nullable_keys:
if key not in properties:
log.warning("Nullable key '%s' not found in properties has been auto removed.",
key)
else:
new_nullable_keys.append(key)
vertex["nullable_keys"] = new_nullable_keys
for edge in schema["edgelabels"]:
if not isinstance(edge, dict):
raise ValueError("Edge in input data is not a dictionary.")
if (
"name" not in edge
or "source_label" not in edge
or "target_label" not in edge
):
raise ValueError(
"Edge in input data does not contain "
"'name', 'source_label', 'target_label'."
)
if (
not isinstance(edge["name"], str)
or not isinstance(edge["source_label"], str)
or not isinstance(edge["target_label"], str)
):
raise ValueError(
"'name', 'source_label', 'target_label' "
"in edge is not of correct type."
)
context.update({"schema": schema})
return context