blob: ad30281e1061a26fc63d30cab8d7b31f1ee09e1a [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parcel Lucy;
/** Build inverted indexes.
*
* The Indexer class is Apache Lucy's primary tool for managing the content of
* inverted indexes, which may later be searched using
* L<IndexSearcher|Lucy::Search::IndexSearcher>.
*
* In general, only one Indexer at a time may write to an index safely. If a
* write lock cannot be secured, new() will throw an exception.
*
* If an index is located on a shared volume, each writer application must
* identify itself by supplying an
* L<IndexManager|Lucy::Index::IndexManager> with a unique
* <code>host</code> id to Indexer's constructor or index corruption will
* occur. See L<Lucy::Docs::FileLocking> for a detailed discussion.
*
* Note: at present, Delete_By_Term() and Delete_By_Query() only affect
* documents which had been previously committed to the index -- and not any
* documents added this indexing session but not yet committed. This may
* change in a future update.
*/
class Lucy::Index::Indexer inherits Lucy::Object::Obj {
Schema *schema;
Folder *folder;
Segment *segment;
IndexManager *manager;
PolyReader *polyreader;
Snapshot *snapshot;
SegWriter *seg_writer;
DeletionsWriter *del_writer;
FilePurger *file_purger;
Lock *write_lock;
Lock *merge_lock;
Doc *stock_doc;
CharBuf *snapfile;
bool_t truncate;
bool_t optimize;
bool_t needs_commit;
bool_t prepared;
public inert int32_t TRUNCATE;
public inert int32_t CREATE;
public inert incremented Indexer*
new(Schema *schema = NULL, Obj *index, IndexManager *manager = NULL,
int32_t flags = 0);
/** Open a new Indexer. If the index already exists, update it.
*
* @param schema A Schema.
* @param index Either a string filepath or a Folder.
* @param manager An IndexManager.
* @param flags Flags governing behavior.
*/
public inert Indexer*
init(Indexer *self, Schema *schema = NULL, Obj *index,
IndexManager *manager = NULL, int32_t flags = 0);
/** Add a document to the index.
*
* @param doc A Lucy::Document::Doc object.
* @param boost A floating point weight which affects how this document
* scores.
*/
public void
Add_Doc(Indexer *self, Doc *doc, float boost = 1.0);
/** Absorb an existing index into this one. The two indexes must
* have matching Schemas.
*
* @param index Either an index path name or a Folder.
*/
public void
Add_Index(Indexer *self, Obj *index);
/** Mark documents which contain the supplied term as deleted, so that
* they will be excluded from search results and eventually removed
* altogether. The change is not apparent to search apps until after
* Commit() succeeds.
*
* @param field The name of an indexed field. (If it is not spec'd as
* <code>indexed</code>, an error will occur.)
* @param term The term which identifies docs to be marked as deleted. If
* <code>field</code> is associated with an Analyzer, <code>term</code>
* will be processed automatically (so don't pre-process it yourself).
*/
public void
Delete_By_Term(Indexer *self, CharBuf *field, Obj *term);
/** Mark documents which match the supplied Query as deleted.
*
* @param query A L<Query|Lucy::Search::Query>.
*/
public void
Delete_By_Query(Indexer *self, Query *query);
/** Optimize the index for search-time performance. This may take a
* while, as it can involve rewriting large amounts of data.
*/
public void
Optimize(Indexer *self);
/** Commit any changes made to the index. Until this is called, none of
* the changes made during an indexing session are permanent.
*
* Calling Commit() invalidates the Indexer, so if you want to make more
* changes you'll need a new one.
*/
public void
Commit(Indexer *self);
/** Perform the expensive setup for Commit() in advance, so that Commit()
* completes quickly. (If Prepare_Commit() is not called explicitly by
* the user, Commit() will call it internally.)
*/
public void
Prepare_Commit(Indexer *self);
/** Accessor for seg_writer member var.
*/
public SegWriter*
Get_Seg_Writer(Indexer *self);
Doc*
Get_Stock_Doc(Indexer *self);
public void
Destroy(Indexer *self);
}