blob: 134247d6c9cafb65465ec0d82a75b83bcf6bf492 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package plugin
import (
"context"
"fmt"
"strings"
"github.com/sashabaranov/go-openai"
"github.com/segmentfault/pacman/log"
)
// VectorSearchResult holds a single similarity search result returned by a VectorSearch plugin.
type VectorSearchResult struct {
// ObjectID is the unique identifier of the matched object (question ID or answer ID).
ObjectID string `json:"object_id"`
// ObjectType is "question" or "answer".
ObjectType string `json:"object_type"`
// Metadata is a JSON string containing VectorSearchMetadata for link composition and content retrieval.
Metadata string `json:"metadata"`
// Score is the cosine similarity score (0-1).
Score float64 `json:"score"`
}
// VectorSearchContent is the document structure passed to plugins for indexing.
type VectorSearchContent struct {
// ObjectID is the unique identifier (question ID or answer ID).
ObjectID string `json:"objectID"`
// ObjectType is "question" or "answer".
ObjectType string `json:"objectType"`
// Title is the question title.
Title string `json:"title"`
// Content is the aggregated text to be embedded (question body + answers + comments).
Content string `json:"content"`
// Metadata is a JSON string containing VectorSearchMetadata.
Metadata string `json:"metadata"`
}
// VectorSearchDesc describes the vector search engine for display purposes.
type VectorSearchDesc struct {
// Icon is an SVG icon for display. Optional.
Icon string `json:"icon"`
// Link is the URL of the vector search engine. Optional.
Link string `json:"link"`
}
// VectorSearchMetadata holds IDs for URI composition and content retrieval at query time.
// Shared between plugins and the core MCP controller.
type VectorSearchMetadata struct {
QuestionID string `json:"question_id"`
AnswerID string `json:"answer_id,omitempty"`
Answers []VectorSearchMetadataAnswer `json:"answers,omitempty"`
Comments []VectorSearchMetadataComment `json:"comments,omitempty"`
}
// VectorSearchMetadataAnswer stores answer ID and its comment IDs in metadata.
type VectorSearchMetadataAnswer struct {
AnswerID string `json:"answer_id"`
Comments []VectorSearchMetadataComment `json:"comments,omitempty"`
}
// VectorSearchMetadataComment stores a comment ID in metadata.
type VectorSearchMetadataComment struct {
CommentID string `json:"comment_id"`
}
// VectorSearch is the plugin interface for vector/semantic search engines.
// Plugins implementing this interface manage their own vector storage, embedding computation,
// data synchronization schedule, and similarity search.
type VectorSearch interface {
Base
// Description returns metadata about the vector search engine.
Description() VectorSearchDesc
// RegisterSyncer is called by the core to provide a data syncer.
// The plugin should store the syncer and use it to bulk-sync content
// (typically in a background goroutine).
RegisterSyncer(ctx context.Context, syncer VectorSearchSyncer)
// SearchSimilar performs a semantic similarity search.
// The plugin is responsible for embedding the query text and searching its vector store.
// Returns up to topK results sorted by similarity score (descending).
SearchSimilar(ctx context.Context, query string, topK int) ([]VectorSearchResult, error)
// UpdateContent upserts a single document in the vector store.
// Called by the core on incremental content changes.
UpdateContent(ctx context.Context, content *VectorSearchContent) error
// DeleteContent removes a document from the vector store by object ID.
DeleteContent(ctx context.Context, objectID string) error
}
// VectorSearchSyncer is implemented by the core and provided to plugins via RegisterSyncer.
// Plugins call these methods to pull all content for bulk indexing.
type VectorSearchSyncer interface {
// GetQuestionsPage returns a page of questions with aggregated text (title + body + answers + comments).
GetQuestionsPage(ctx context.Context, page, pageSize int) ([]*VectorSearchContent, error)
// GetAnswersPage returns a page of answers with aggregated text (answer body + parent question title + comments).
GetAnswersPage(ctx context.Context, page, pageSize int) ([]*VectorSearchContent, error)
}
var (
// CallVectorSearch is a function that calls all registered VectorSearch plugins.
CallVectorSearch,
registerVectorSearch = MakePlugin[VectorSearch](false)
)
// GenerateEmbedding is a base utility function that generates an embedding vector
// using an OpenAI-compatible API. Plugins that don't have a built-in vectorizer
// (most vector databases) can call this function with their own credentials.
// Plugins with built-in vectorizers (e.g., Weaviate) can skip this and use their own.
//
// Parameters:
// - ctx: context for cancellation
// - apiHost: the API base URL (e.g. "https://api.openai.com"); "/v1" is appended if missing
// - apiKey: the API key for authentication
// - model: the embedding model name (e.g. "text-embedding-3-small")
// - text: the text to embed
//
// Returns the embedding vector as []float32, or an error.
func GenerateEmbedding(ctx context.Context, apiHost, apiKey, model, text string) ([]float32, error) {
if model == "" {
return nil, fmt.Errorf("embedding model is not configured")
}
if text == "" {
return nil, fmt.Errorf("text is empty")
}
config := openai.DefaultConfig(apiKey)
config.BaseURL = apiHost
if !strings.HasSuffix(config.BaseURL, "/v1") {
config.BaseURL += "/v1"
}
log.Debugf("embedding: requesting model=%s baseURL=%s textLen=%d", model, config.BaseURL, len(text))
client := openai.NewClientWithConfig(config)
resp, err := client.CreateEmbeddings(ctx, openai.EmbeddingRequestStrings{
Input: []string{text},
Model: openai.EmbeddingModel(model),
})
if err != nil {
log.Errorf("embedding: request failed model=%s baseURL=%s err=%v", model, config.BaseURL, err)
return nil, fmt.Errorf("create embeddings failed: %w", err)
}
if len(resp.Data) == 0 {
log.Errorf("embedding: no data returned model=%s baseURL=%s", model, config.BaseURL)
return nil, fmt.Errorf("no embedding returned")
}
log.Debugf("embedding: success model=%s dimensions=%d usage={prompt=%d,total=%d}",
model, len(resp.Data[0].Embedding), resp.Usage.PromptTokens, resp.Usage.TotalTokens)
return resp.Data[0].Embedding, nil
}