blob: 1875a2df4afece94f5bbbd43deef9644c47f5754 [file] [log] [blame]
# SPDX-License-Identifier: Apache-2.0
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from nifiapi.properties import PropertyDependency, PropertyDescriptor, StandardValidators
# Embedding Functions
ONNX_ALL_MINI_LM_L6_V2 = "ONNX all-MiniLM-L6-v2 Model"
HUGGING_FACE = "Hugging Face Model"
OPENAI = "OpenAI Model"
SENTENCE_TRANSFORMERS = "Sentence Transformers"
EMBEDDING_FUNCTION = PropertyDescriptor(
name="Embedding Function",
description="Specifies which embedding function should be used in order to create embeddings from incoming Documents",
allowable_values=[ONNX_ALL_MINI_LM_L6_V2, HUGGING_FACE, OPENAI, SENTENCE_TRANSFORMERS],
default_value=ONNX_ALL_MINI_LM_L6_V2,
required=True,
)
HUGGING_FACE_MODEL_NAME = PropertyDescriptor(
name="HuggingFace Model Name",
description="The name of the HuggingFace model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="sentence-transformers/all-MiniLM-L6-v2",
required=True,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, HUGGING_FACE)],
)
HUGGING_FACE_API_KEY = PropertyDescriptor(
name="HuggingFace API Key",
description="The API Key for interacting with HuggingFace",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for interacting with OpenAI",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, OPENAI)],
)
OPENAI_MODEL_NAME = PropertyDescriptor(
name="OpenAI Model Name",
description="The name of the OpenAI model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
required=True,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, OPENAI)],
)
OPENAI_ORGANIZATION = PropertyDescriptor(
name="OpenAI Organization ID",
description="The OpenAI Organization ID",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=False,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, OPENAI)],
)
OPENAI_API_BASE = PropertyDescriptor(
name="OpenAI API Base Path",
description="The API Base to use for interacting with OpenAI. This is used for interacting with different deployments, such as an Azure deployment.",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=False,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, OPENAI)],
)
OPENAI_API_TYPE = PropertyDescriptor(
name="OpenAI API Deployment Type",
description="The type of the OpenAI API Deployment. This is used for interacting with different deployments, such as an Azure deployment.",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=False,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, OPENAI)],
)
OPENAI_API_VERSION = PropertyDescriptor(
name="OpenAI API Version",
description="The OpenAI API Version. This is used for interacting with different deployments, such as an Azure deployment.",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=False,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, OPENAI)],
)
SENTENCE_TRANSFORMER_MODEL_NAME = PropertyDescriptor(
name="Sentence Transformer Model Name",
description="The name of the Sentence Transformer model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="all-MiniLM-L6-v2",
required=True,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, SENTENCE_TRANSFORMERS)],
)
SENTENCE_TRANSFORMER_DEVICE = PropertyDescriptor(
name="Sentence Transformer Device Type",
description="""The type of device to use for performing the embeddings using the Sentence Transformer, such as 'cpu', 'cuda', 'mps', 'cuda:0', etc.
If not specified, a GPU will be used if possible, otherwise a CPU.""",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=False,
dependencies=[PropertyDependency(EMBEDDING_FUNCTION, SENTENCE_TRANSFORMERS)],
)
EMBEDDING_MODEL = PropertyDescriptor(
name="Embedding Model",
description="Specifies which embedding model should be used in order to create embeddings from incoming Documents. Default model is OpenAI.",
allowable_values=[HUGGING_FACE, OPENAI],
default_value=OPENAI,
required=True,
)
OPENAI_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The name of the OpenAI model to use",
default_value="text-embedding-ada-002",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
default_value="sentence-transformers/all-MiniLM-L6-v2",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
PROPERTIES = [
EMBEDDING_FUNCTION,
HUGGING_FACE_MODEL_NAME,
HUGGING_FACE_API_KEY,
OPENAI_MODEL_NAME,
OPENAI_API_KEY,
OPENAI_ORGANIZATION,
OPENAI_API_BASE,
OPENAI_API_TYPE,
OPENAI_API_VERSION,
SENTENCE_TRANSFORMER_MODEL_NAME,
SENTENCE_TRANSFORMER_DEVICE,
EMBEDDING_MODEL,
]
def create_embedding_function(context):
from chromadb.utils.embedding_functions import (
HuggingFaceEmbeddingFunction,
ONNXMiniLM_L6_V2,
OpenAIEmbeddingFunction,
SentenceTransformerEmbeddingFunction,
)
function_name = context.getProperty(EMBEDDING_FUNCTION).getValue()
if function_name == ONNX_ALL_MINI_LM_L6_V2:
return ONNXMiniLM_L6_V2()
if function_name == OPENAI:
api_key = context.getProperty(OPENAI_API_KEY).getValue()
model_name = context.getProperty(OPENAI_MODEL_NAME).getValue()
organization_id = context.getProperty(OPENAI_ORGANIZATION).getValue()
api_base = context.getProperty(OPENAI_API_BASE).getValue()
api_type = context.getProperty(OPENAI_API_TYPE).getValue()
api_version = context.getProperty(OPENAI_API_VERSION).getValue()
return OpenAIEmbeddingFunction(
api_key=api_key,
model_name=model_name,
organization_id=organization_id,
api_base=api_base,
api_type=api_type,
api_version=api_version,
)
if function_name == HUGGING_FACE:
api_key = context.getProperty(HUGGING_FACE_API_KEY).getValue()
model_name = context.getProperty(HUGGING_FACE_MODEL_NAME).getValue()
return HuggingFaceEmbeddingFunction(api_key=api_key, model_name=model_name)
model_name = context.getProperty(SENTENCE_TRANSFORMER_MODEL_NAME).getValue()
device = context.getProperty(SENTENCE_TRANSFORMER_DEVICE).getValue()
return SentenceTransformerEmbeddingFunction(model_name=model_name, device=device)
def create_embedding_service(context):
embedding_service = context.getProperty(EMBEDDING_MODEL).getValue()
if embedding_service == OPENAI:
openai_api_key = context.getProperty(OPENAI_API_KEY).getValue()
openai_model = context.getProperty(OPENAI_MODEL).getValue()
return OpenAIEmbeddings(openai_api_key=openai_api_key, model=openai_model)
huggingface_api_key = context.getProperty(HUGGING_FACE_API_KEY).getValue()
huggingface_model = context.getProperty(HUGGING_FACE_MODEL).getValue()
return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key, model_name=huggingface_model)