blob: bf7d4ff89b58874c21ebd5ab57fa878c3eaf22f1 [file] [log] [blame]
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
r"""Core Bloodhound Search components."""
from trac.config import ExtensionOption, OrderedExtensionsOption
from trac.core import (Interface, Component, ExtensionPoint, TracError,
implements)
from trac.env import IEnvironmentSetupParticipant
from multiproduct.api import ISupportMultiProductEnvironment
from multiproduct.core import MultiProductExtensionPoint
ASC = "asc"
DESC = "desc"
SCORE = "score"
class IndexFields(object):
TYPE = "type"
ID = "id"
TIME = 'time'
AUTHOR = 'author'
CONTENT = 'content'
STATUS = 'status'
PRODUCT = 'product'
REQUIRED_PERMISSION = 'required_permission'
NAME = 'name'
class QueryResult(object):
def __init__(self):
self.hits = 0
self.page_count = 0
self.page_number = 0
self.offset = 0
self.docs = []
self.highlighting = []
self.facets = None
self.query_suggestion = None
self.debug = {}
class SortInstruction(object):
def __init__(self, field, order):
self.field = field
self.order = self._parse_sort_order(order)
def _parse_sort_order(self, order):
if not order:
return ASC
order = order.strip().lower()
if order == ASC:
return ASC
elif order == DESC:
return DESC
else:
raise TracError(
"Invalid sort order %s in sort instruction" % order)
def build_sort_expression(self):
return "%s %s" % (self.field, self.order)
def __str__(self):
return str(self.__dict__)
def __eq__(self, other):
if not isinstance(other, SortInstruction):
return False
return self.__dict__ == other.__dict__
class ISearchWikiSyntaxFormatter(Interface):
"""Extension point interface for wiki syntax processing.
"""
def format(self, wiki_text):
"""
Process wiki syntax and return text representation suitable for search
"""
class ISearchBackend(Interface):
"""Extension point interface for search backend systems.
"""
def add_doc(doc, operation_context):
"""
Called when new document instance must be added
"""
def delete_doc(product, doc_type, doc_id, operation_context):
"""
Delete document from index
"""
def optimize():
"""
Optimize index if needed
"""
def is_index_outdated():
"""
Check if index is outdated and needs to be recreated.
"""
def recreate_index():
"""
Create a new index, if index exists, it will be deleted
"""
def open_or_create_index_if_missing():
"""
Open existing index, if index does not exist, create new one
"""
def query(
query,
sort = None,
fields = None,
filter = None,
facets = None,
pagenum = 1,
pagelen = 20,
highlight=False,
highlight_fields=None,
context=None):
"""
Perform query implementation
:param query: Parsed query object
:param sort: list of SortInstruction objects
:param fields: list of fields to select
:param boost: list of fields with boost values
:param filter: filter query object
:param facets: list of facet fields
:param pagenum: page number
:param pagelen: page length
:param highlight: highlight matched terms in fields
:param highlight_fields: list of fields to highlight
:return: ResultsPage
"""
def start_operation(self):
"""Used to get arguments for batch operation withing single commit"""
class IIndexParticipant(Interface):
"""Extension point interface for components that should be searched.
"""
def get_entries_for_index():
"""List entities for index creation"""
class ISearchParticipant(Interface):
"""Extension point interface for components that should be searched.
"""
def format_search_results(contents):
"""Called to see if the module wants to format the search results."""
def is_allowed(req):
"""Called when we want to build the list of components with search.
Passes the request object to do permission checking."""
def get_participant_type():
"""Return type of search participant"""
def get_required_permission(self):
"""Return permission required to view components in search results"""
def get_title():
"""Return resource title."""
def get_default_facets():
"""Return default facets for the specific resource type."""
def get_default_view():
"""Return True if grid is enabled by default for specific resource."""
def get_default_view_fields(view):
"""Return list of fields should be returned in grid by default."""
class IQueryParser(Interface):
"""Extension point for Bloodhound Search query parser.
"""
def parse(query_string, context):
"""Parse query from string"""
def parse_filters(filters):
"""Parse query filters"""
class IDocIndexPreprocessor(Interface):
"""Extension point for Bloodhound Search document pre-processing before
adding or update documents into index.
"""
def pre_process(doc):
"""Process document"""
class IResultPostprocessor(Interface):
"""Extension point for Bloodhound Search result post-processing before
returning result to caller.
"""
def post_process(query_result):
"""Process document"""
class IQueryPreprocessor(Interface):
"""Extension point for Bloodhound Search query pre processing.
"""
def query_pre_process(query_parameters, context):
"""Process query parameters"""
class IMetaKeywordParser(Interface):
"""Extension point for custom meta keywords."""
def match(text, context):
"""If text matches the keyword, return its transformed value."""
class BloodhoundSearchApi(Component):
"""Implements core indexing functionality, provides methods for
searching, adding and deleting documents from index.
"""
implements(IEnvironmentSetupParticipant, ISupportMultiProductEnvironment)
backend = ExtensionOption('bhsearch', 'search_backend',
ISearchBackend, 'WhooshBackend',
'Name of the component implementing Bloodhound Search backend \
interface: ISearchBackend.')
parser = ExtensionOption('bhsearch', 'query_parser',
IQueryParser, 'DefaultQueryParser',
'Name of the component implementing Bloodhound Search query \
parser.')
index_pre_processors = OrderedExtensionsOption(
'bhsearch', 'index_preprocessors', IDocIndexPreprocessor,
['SecurityPreprocessor'],
)
result_post_processors = ExtensionPoint(IResultPostprocessor)
query_processors = ExtensionPoint(IQueryPreprocessor)
index_participants = MultiProductExtensionPoint(IIndexParticipant)
def query(
self,
query,
sort = None,
fields = None,
filter = None,
facets = None,
pagenum = 1,
pagelen = 20,
highlight = False,
highlight_fields = None,
context = None):
"""Return query result from an underlying search backend.
Arguments:
:param query: query string e.g. “bla status:closed” or a parsed
representation of the query.
:param sort: optional sorting
:param boost: optional list of fields with boost values e.g.
{“id”: 1000, “subject” :100, “description”:10}.
:param filter: optional list of terms. Usually can be cached by
underlying search framework. For example {“type”: “wiki”}
:param facets: optional list of facet terms, can be field or expression
:param page: paging support
:param pagelen: paging support
:param highlight: highlight matched terms in fields
:param highlight_fields: list of fields to highlight
:param context: request context
:return: result QueryResult
"""
# pylint: disable=too-many-locals
self.env.log.debug("Receive query request: %s", locals())
parsed_query = self.parser.parse(query, context)
parsed_filters = self.parser.parse_filters(filter)
# TODO: add query parsers and meta keywords post-parsing
# TODO: apply security filters
query_parameters = dict(
query = parsed_query,
query_string = query,
sort = sort,
fields = fields,
filter = parsed_filters,
facets = facets,
pagenum = pagenum,
pagelen = pagelen,
highlight = highlight,
highlight_fields = highlight_fields,
)
for query_processor in self.query_processors:
query_processor.query_pre_process(query_parameters, context)
query_result = self.backend.query(**query_parameters)
for post_processor in self.result_post_processors:
post_processor.post_process(query_result)
query_result.debug["api_parameters"] = query_parameters
return query_result
def start_operation(self):
return self.backend.start_operation()
def rebuild_index(self):
"""Rebuild underlying index"""
self.log.info('Rebuilding the search index.')
self.backend.recreate_index()
with self.backend.start_operation() as operation_context:
doc = None
try:
for participant in self.index_participants:
self.log.info(
"Reindexing resources provided by %s in product %s" %
(participant.__class__.__name__,
getattr(participant.env.product, 'name', "''"))
)
docs = participant.get_entries_for_index()
for doc in docs:
self.log.debug(
"Indexing document %s:%s/%s" % (
doc['product'],
doc['type'],
doc['id'],
)
)
self.add_doc(doc, operation_context)
self.log.info("Reindexing complete.")
except Exception, ex:
self.log.error(ex)
if doc:
self.log.error("Doc that triggers the error: %s" % doc)
raise
def change_doc_id(self, doc, old_id, operation_context=None):
if operation_context is None:
with self.backend.start_operation() as operation_context:
self._change_doc_id(doc, old_id, operation_context)
else:
self._change_doc_id(doc, old_id, operation_context)
def _change_doc_id(self, doc, old_id, operation_context):
self.backend.delete_doc(
doc[IndexFields.PRODUCT],
doc[IndexFields.TYPE],
old_id,
operation_context
)
self.add_doc(doc, operation_context)
def optimize(self):
"""Optimize underlying index"""
self.backend.optimize()
def add_doc(self, doc, operation_context = None):
"""Add a document to underlying search backend.
The doc must be dictionary with obligatory "type" field
"""
for preprocessor in self.index_pre_processors:
preprocessor.pre_process(doc)
self.backend.add_doc(doc, operation_context)
def delete_doc(self, product, doc_type, doc_id):
"""Delete the document from underlying search backend.
"""
self.backend.delete_doc(product, doc_type, doc_id)
# IEnvironmentSetupParticipant methods
def environment_created(self):
self.upgrade_environment(self.env.db_transaction)
def environment_needs_upgrade(self, db):
# pylint: disable=unused-argument
return self.backend.is_index_outdated()
def upgrade_environment(self, db):
# pylint: disable=unused-argument
self.rebuild_index()