blob: 3f759012f26c65034ec1cade2cb0df9f485c51d4 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parcel Lucy;
/** Build inverted indexes.
*
* The Indexer class is Apache Lucy's primary tool for managing the content of
* inverted indexes, which may later be searched using
* [](cfish:IndexSearcher).
*
* In general, only one Indexer at a time may write to an index safely. If a
* write lock cannot be secured, new() will throw an exception.
*
* If an index is located on a shared volume, each writer application must
* identify itself by supplying an
* [](cfish:IndexManager) with a unique
* `host` id to Indexer's constructor or index corruption will
* occur. See [](FileLocking) for a detailed
* discussion.
*
* Note: at present, [](cfish:.Delete_By_Term) and [](cfish:.Delete_By_Query) only affect
* documents which had been previously committed to the index -- and not any
* documents added this indexing session but not yet committed. This may
* change in a future update.
*/
public class Lucy::Index::Indexer inherits Clownfish::Obj {
Schema *schema;
Folder *folder;
Segment *segment;
IndexManager *manager;
String *latest_snapfile;
PolyReader *polyreader;
Snapshot *snapshot;
SegWriter *seg_writer;
DeletionsWriter *del_writer;
FilePurger *file_purger;
Lock *write_lock;
Lock *merge_lock;
Doc *stock_doc;
String *snapfile;
bool truncate;
bool optimize;
bool needs_commit;
bool prepared;
public inert int32_t TRUNCATE;
public inert int32_t CREATE;
/** Open a new Indexer. If the index already exists, update it.
*
* @param schema A Schema.
* @param index Either a string filepath or a Folder.
* @param manager An IndexManager.
* @param flags Flags governing behavior.
*/
public inert incremented Indexer*
new(Schema *schema = NULL, Obj *index, IndexManager *manager = NULL,
int32_t flags = 0);
/** Initialize an Indexer.
*
* @param schema A Schema.
* @param index Either a string filepath or a Folder.
* @param manager An IndexManager.
* @param flags Flags governing behavior.
*/
public inert Indexer*
init(Indexer *self, Schema *schema = NULL, Obj *index,
IndexManager *manager = NULL, int32_t flags = 0);
/** Add a document to the index.
*
* @param doc A Lucy::Document::Doc object.
* @param boost A floating point weight which affects how this document
* scores.
*/
public void
Add_Doc(Indexer *self, Doc *doc, float boost = 1.0);
/** Absorb an existing index into this one. The two indexes must
* have matching Schemas.
*
* @param index Either an index path name or a Folder.
*/
public void
Add_Index(Indexer *self, Obj *index);
/** Mark documents which contain the supplied term as deleted, so that
* they will be excluded from search results and eventually removed
* altogether. The change is not apparent to search apps until after
* [](cfish:.Commit) succeeds.
*
* @param field The name of an indexed field. (If it is not spec'd as
* `indexed`, an error will occur.)
* @param term The term which identifies docs to be marked as deleted. If
* `field` is associated with an Analyzer, `term`
* will be processed automatically (so don't pre-process it yourself).
*/
public void
Delete_By_Term(Indexer *self, String *field, Obj *term);
/** Mark documents which match the supplied Query as deleted.
*
* @param query A [](cfish:Query).
*/
public void
Delete_By_Query(Indexer *self, Query *query);
/** Mark the document identified by the supplied document ID as deleted.
*
* @param doc_id A [document id](DocIDs).
*/
public void
Delete_By_Doc_ID(Indexer *self, int32_t doc_id);
/** Optimize the index for search-time performance. This may take a
* while, as it can involve rewriting large amounts of data.
*
* Every Indexer session which changes index content and ends in a
* [](cfish:.Commit) creates a new segment. Once written, segments are never
* modified. However, they are periodically recycled by feeding their
* content into the segment currently being written.
*
* The [](cfish:.Optimize) method causes all existing index content to be fed back
* into the Indexer. When [](cfish:.Commit) completes after an [](cfish:.Optimize), the
* index will consist of one segment. So [](cfish:.Optimize) must be called
* before [](cfish:.Commit). Also, optimizing a fresh index created from scratch
* has no effect.
*
* Historically, there was a significant search-time performance benefit
* to collapsing down to a single segment versus even two segments. Now
* the effect of collapsing is much less significant, and calling
* [](cfish:.Optimize) is rarely justified.
*/
public void
Optimize(Indexer *self);
/** Commit any changes made to the index. Until this is called, none of
* the changes made during an indexing session are permanent.
*
* Calling [](cfish:.Commit) invalidates the Indexer, so if you want to make more
* changes you'll need a new one.
*/
public void
Commit(Indexer *self);
/** Perform the expensive setup for [](cfish:.Commit) in advance, so that [](cfish:.Commit)
* completes quickly. (If [](cfish:.Prepare_Commit) is not called explicitly by
* the user, [](cfish:.Commit) will call it internally.)
*/
public void
Prepare_Commit(Indexer *self);
/** Accessor for schema.
*/
public Schema*
Get_Schema(Indexer *self);
/** Accessor for seg_writer member var.
*/
SegWriter*
Get_Seg_Writer(Indexer *self);
Doc*
Get_Stock_Doc(Indexer *self);
public void
Destroy(Indexer *self);
}