blob: 12128f352b9e2b270b242abd2811f1a117ef0eb7 [file] [log] [blame]
from django.contrib.postgres.fields import ArrayField
from django.core.validators import MinLengthValidator
from django.db import models
from django.utils.translation import gettext as _
from trackingserver_base.models import GenericAttribute, TimeStampedModel
from trackingserver_projects.models import Project
class CodeVersion(TimeStampedModel):
"""Represents a version of code -- this is uniquely identified by a hash + version info type"""
class DAGTemplate(TimeStampedModel):
"""This is a template for a DAG. Its become a bit large, and it's quite likely we'll
want to separate this out at some point int he future. For now, it contains:
1. Name
2. DAG Hash
3. Code Hash
To find the actual nodes + code artifacts, you have to find associated NodeTemplates
and they're associated code artifacts. Any code we konw about is stored with the
DAGTemplate.
"""
class TemplateType(models.TextChoices):
HAMILTON = "HAMILTON", _("Hamilton")
project = models.ForeignKey(Project, on_delete=models.SET_NULL, null=True)
# DAG templates have a name
name = models.CharField(max_length=255)
# the type of the template
template_type = models.CharField(max_length=255, choices=TemplateType.choices)
# Any configuration information required to create it
config = models.JSONField(null=True)
# Unique dag hash -- this hashes the DAG + config so we only store once
# This is computed on the user-side -- a merkel hash of the nodes
dag_hash = models.CharField(max_length=64, validators=[MinLengthValidator(64)], unique=False)
is_active = models.BooleanField(default=True)
tags = models.JSONField(null=True)
class CodeLogStore(models.TextChoices):
s3 = "s3", _("S3")
local = "local", _("Local")
none = "none", _("None")
class VersionInfo(models.TextChoices):
GIT = "git", _("Git")
AD_HOC = "ad_hoc", _("Ad hoc")
code_hash = models.CharField()
code_version_info_type = models.CharField(choices=VersionInfo.choices)
code_version_info = models.JSONField(null=True)
code_version_info_schema = models.IntegerField(null=True)
code_log_store = models.CharField(choices=CodeLogStore.choices)
# Blob store location
code_log_url = models.CharField(default=None, null=True) # We allow you not to store code
# We will handle versions on the server side in case we want to store specific information
code_log_schema_version = models.IntegerField(null=True)
class Meta:
unique_together = ("dag_hash", "code_hash", "name", "project")
class DAGTemplateAttribute(GenericAttribute):
# In Hamilton's case:
# > All Availalable Functions
dag_template = models.ForeignKey(DAGTemplate, on_delete=models.SET_NULL, null=True)
class Meta:
unique_together = ("name", "dag_template")
class CodeArtifact(TimeStampedModel):
"""Represents a code artifact -- this is a unique code artifact, and it is
uniquely identified by the code hash and the artifact name.
"""
dag_template = models.ForeignKey(DAGTemplate, on_delete=models.SET_NULL, null=True)
class ArtifactType(models.TextChoices):
PYTHON_FUNCTION = "p_function", _("Function")
PYTHON_CLASS = "p_class", _("Class")
PYTHON_MODULE = "p_module", _("Module")
UNKNOWN = "p_unknown", _("Unknown")
# The name of the code artifact
# Note this must be unique within a project version so we can access this
name = models.CharField()
# The type of the code artifact
type = models.CharField(max_length=15, choices=ArtifactType.choices)
# The path to the code artifact
path = models.CharField()
# The start line of the code artifact
start = models.IntegerField()
# The end line of the code artifact
end = models.IntegerField()
# The URL to the code artifact
url = models.CharField(max_length=255)
class NodeTemplateCodeArtifactRelation(TimeStampedModel):
is_primary = models.BooleanField(default=False)
node_template = models.ForeignKey("NodeTemplate", on_delete=models.SET_NULL, null=True)
code_artifact = models.ForeignKey("CodeArtifact", on_delete=models.SET_NULL, null=True)
class Meta:
constraints = [
models.UniqueConstraint(
fields=["node_template", "is_primary"],
name="unique_primary",
condition=models.Q(is_primary=True),
),
]
class NodeTemplate(TimeStampedModel):
"""Represents a node in a template DAG. This is general -- it doesn't have to have
depeendencies, etc..., but the more we know the better."""
class NodeType(models.TextChoices):
transform = "transform", _("Transform")
data_saver = "data_saver", _("DataSaver")
data_loader = "data_loader", _("DataLoader")
input = "input", _("Input") # input, not actually run
placeholder = "placeholder", _("Placeholder")
# Nodes have a unique name (up to 511 chars)
# In Hamilton's case, this includes a .-separated namespace
name = models.CharField(db_index=True)
# Nodes have a template they belong to -- this is an indexed foreign key
dag_template = models.ForeignKey(DAGTemplate, on_delete=models.SET_NULL, null=True)
# Nodes have one or more code artifacts that they use
# These
code_artifacts = models.ManyToManyField(
"CodeArtifact",
db_index=False,
through=NodeTemplateCodeArtifactRelation,
related_name="code_artifacts",
)
# nodes have dependencies -- these are references to other nodes, but not handled by the DB
# This is in case we need to be flexbile
dependencies = ArrayField(models.CharField(), null=True)
# Nodes have specs for dependencies to specify types/whatnot
dependency_specs = ArrayField(models.JSONField(), null=True)
dependency_specs_type = models.CharField(max_length=63, null=True)
dependency_specs_schema_version = models.IntegerField(null=True)
# Nodes have a spec for the output
# This is nullable in case the output is nothing
output = models.JSONField(null=True)
output_type = models.CharField(null=True)
output_schema_version = models.IntegerField(null=True)
# all nodes have the capability to have documentation
documentation = models.TextField(null=True)
# Any tags the node specifies (arbitrary json)
# In hamilton's case, this corresponds directly to tags
tags = models.JSONField(null=True)
classifications = ArrayField(models.CharField(choices=NodeType.choices))
class Meta:
unique_together = ("name", "dag_template")
def __str__(self):
return self.name + " in " + str(self.dag_template_id)
# For now let's leave this out
# class TemplateNodeAttribute(GenericAttribute):
# template_node = models.ForeignKey(TemplateNode, on_delete=models.SET_NULL, null=True)