| // # Deleting and Updating (?) documents |
| // |
| // This example explains how to delete and update documents. |
| // In fact there is actually no such thing as an update in tantivy. |
| // |
| // To update a document, you need to delete a document and then reinsert |
| // its new version. |
| // |
| // --- |
| // Importing tantivy... |
| use tantivy::collector::TopDocs; |
| use tantivy::query::TermQuery; |
| use tantivy::schema::*; |
| use tantivy::{doc, Index, IndexReader}; |
| |
| // A simple helper function to fetch a single document |
| // given its id from our index. |
| // It will be helpful to check our work. |
| fn extract_doc_given_isbn( |
| reader: &IndexReader, |
| isbn_term: &Term, |
| ) -> tantivy::Result<Option<Document>> { |
| let searcher = reader.searcher(); |
| |
| // This is the simplest query you can think of. |
| // It matches all of the documents containing a specific term. |
| // |
| // The second argument is here to tell we don't care about decoding positions, |
| // or term frequencies. |
| let term_query = TermQuery::new(isbn_term.clone(), IndexRecordOption::Basic); |
| let top_docs = searcher.search(&term_query, &TopDocs::with_limit(1))?; |
| |
| if let Some((_score, doc_address)) = top_docs.first() { |
| let doc = searcher.doc(*doc_address)?; |
| Ok(Some(doc)) |
| } else { |
| // no doc matching this ID. |
| Ok(None) |
| } |
| } |
| |
| fn main() -> tantivy::Result<()> { |
| // # Defining the schema |
| // |
| // Check out the *basic_search* example if this makes |
| // small sense to you. |
| let mut schema_builder = Schema::builder(); |
| |
| // Tantivy does not really have a notion of primary id. |
| // This may change in the future. |
| // |
| // Still, we can create a `isbn` field and use it as an id. This |
| // field can be `u64` or a `text`, depending on your use case. |
| // It just needs to be indexed. |
| // |
| // If it is `text`, let's make sure to keep it `raw` and let's avoid |
| // running any text processing on it. |
| // This is done by associating this field to the tokenizer named `raw`. |
| // Rather than building our |
| // [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually, We |
| // use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions) |
| // and untokenized. |
| // |
| // Because we also want to be able to see this `id` in our returned documents, |
| // we also mark the field as stored. |
| let isbn = schema_builder.add_text_field("isbn", STRING | STORED); |
| let title = schema_builder.add_text_field("title", TEXT | STORED); |
| let schema = schema_builder.build(); |
| |
| let index = Index::create_in_ram(schema.clone()); |
| |
| let mut index_writer = index.writer(50_000_000)?; |
| |
| // Let's add a couple of documents, for the sake of the example. |
| let mut old_man_doc = Document::default(); |
| old_man_doc.add_text(title, "The Old Man and the Sea"); |
| index_writer.add_document(doc!( |
| isbn => "978-0099908401", |
| title => "The old Man and the see" |
| ))?; |
| index_writer.add_document(doc!( |
| isbn => "978-0140177398", |
| title => "Of Mice and Men", |
| ))?; |
| index_writer.add_document(doc!( |
| title => "Frankentein", //< Oops there is a typo here. |
| isbn => "978-9176370711", |
| ))?; |
| index_writer.commit()?; |
| let reader = index.reader()?; |
| |
| let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711"); |
| |
| // Oops our frankenstein doc seems misspelled |
| let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap(); |
| assert_eq!( |
| schema.to_json(&frankenstein_doc_misspelled), |
| r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#, |
| ); |
| |
| // # Update = Delete + Insert |
| // |
| // Here we will want to update the typo in the `Frankenstein` book. |
| // |
| // Tantivy does not handle updates directly, we need to delete |
| // and reinsert the document. |
| // |
| // This can be complicated as it means you need to have access |
| // to the entire document. It is good practise to integrate tantivy |
| // with a key value store for this reason. |
| // |
| // To remove one of the document, we just call `delete_term` |
| // on its id. |
| // |
| // Note that `tantivy` does nothing to enforce the idea that |
| // there is only one document associated with this id. |
| // |
| // Also you might have noticed that we apply the delete before |
| // having committed. This does not matter really... |
| index_writer.delete_term(frankenstein_isbn.clone()); |
| |
| // We now need to reinsert our document without the typo. |
| index_writer.add_document(doc!( |
| title => "Frankenstein", |
| isbn => "978-9176370711", |
| ))?; |
| |
| // You are guaranteed that your clients will only observe your index in |
| // the state it was in after a commit. |
| // In this example, your search engine will at no point be missing the *Frankenstein* document. |
| // Everything happened as if the document was updated. |
| index_writer.commit()?; |
| // We reload our searcher to make our change available to clients. |
| reader.reload()?; |
| |
| // No more typo! |
| let frankenstein_new_doc = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap(); |
| assert_eq!( |
| schema.to_json(&frankenstein_new_doc), |
| r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#, |
| ); |
| |
| Ok(()) |
| } |