| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| parcel Lucy; |
| |
| /** Create and highlight excerpts. |
| * |
| * The Highlighter can be used to select relevant snippets from a document, |
| * and to surround search terms with highlighting tags. It handles both stems |
| * and phrases correctly and efficiently, using special-purpose data generated |
| * at index-time. |
| */ |
| class Lucy::Highlight::Highlighter inherits Lucy::Object::Obj { |
| |
| Searcher *searcher; |
| Query *query; |
| CharBuf *field; |
| uint32_t excerpt_length; |
| uint32_t window_width; |
| uint32_t slop; |
| CharBuf *pre_tag; |
| CharBuf *post_tag; |
| Compiler *compiler; |
| |
| inert incremented Highlighter* |
| new(Searcher *searcher, Obj *query, const CharBuf *field, |
| uint32_t excerpt_length = 200); |
| |
| /** |
| * @param searcher An object which inherits from |
| * L<Searcher|Lucy::Search::Searcher>, such as an |
| * L<IndexSearcher|Lucy::Search::IndexSearcher>. |
| * @param query Query object or a query string. |
| * @param field The name of the field from which to draw the excerpt. The |
| * field must marked as be C<highlightable> (see |
| * L<FieldType|Lucy::Plan::FieldType>). |
| * @param excerpt_length Maximum length of the excerpt, in characters. |
| */ |
| public inert Highlighter* |
| init(Highlighter *self, Searcher *searcher, Obj *query, |
| const CharBuf *field, uint32_t excerpt_length = 200); |
| |
| /** Take a HitDoc object and return a highlighted excerpt as a string if |
| * the HitDoc has a value for the specified <code>field</code>. |
| */ |
| public incremented CharBuf* |
| Create_Excerpt(Highlighter *self, HitDoc *hit_doc); |
| |
| /** Encode text with HTML entities. This method is called internally by |
| * Create_Excerpt() for each text fragment when assembling an excerpt. A |
| * subclass can override this if the text should be encoded differently or |
| * not at all. |
| */ |
| public incremented CharBuf* |
| Encode(Highlighter *self, CharBuf *text); |
| |
| /** Find sentence boundaries within the specified range, returning them as |
| * an array of Spans. The "offset" of each Span indicates the start of |
| * the sentence, and is measured from 0, not from <code>offset</code>. |
| * The Span's "length" member indicates the sentence length in code |
| * points. |
| * |
| * @param text The string to scan. |
| * @param offset The place to start looking for offsets, measured in |
| * Unicode code points from the top of <code>text</code>. |
| * @param length The number of code points from <code>offset</code> to |
| * scan. The default value of 0 is a sentinel which indicates to scan |
| * until the end of the string. |
| */ |
| incremented VArray* |
| Find_Sentences(Highlighter *self, CharBuf *text, int32_t offset = 0, |
| int32_t length = 0); |
| |
| /** Highlight a small section of text. By default, prepends pre-tag and |
| * appends post-tag. This method is called internally by Create_Excerpt() |
| * when assembling an excerpt. |
| */ |
| public incremented CharBuf* |
| Highlight(Highlighter *self, const CharBuf *text); |
| |
| /** Setter. The default value is "<strong>". |
| */ |
| public void |
| Set_Pre_Tag(Highlighter *self, const CharBuf *pre_tag); |
| |
| /** Setter. The default value is "</strong>". |
| */ |
| public void |
| Set_Post_Tag(Highlighter *self, const CharBuf *post_tag); |
| |
| /** Accessor. |
| */ |
| public CharBuf* |
| Get_Pre_Tag(Highlighter *self); |
| |
| /** Accessor. |
| */ |
| public CharBuf* |
| Get_Post_Tag(Highlighter *self); |
| |
| /** Accessor. |
| */ |
| public CharBuf* |
| Get_Field(Highlighter *self); |
| |
| /** Accessor. |
| */ |
| public uint32_t |
| Get_Excerpt_Length(Highlighter *self); |
| |
| /** Accessor. |
| */ |
| public Searcher* |
| Get_Searcher(Highlighter *self); |
| |
| /** Accessor. |
| */ |
| public Query* |
| Get_Query(Highlighter *self); |
| |
| /** Accessor for the Lucy::Search::Compiler object derived from |
| * <code>query</code> and <code>searcher</code>. |
| */ |
| public Compiler* |
| Get_Compiler(Highlighter *self); |
| |
| /** Decide based on heat map the best fragment of field to concentrate on. |
| * Place the result into <code>fragment<code> and return its offset in |
| * code points from the top of the field. |
| * |
| * (Helper function for Create_Excerpt only exposed for testing purposes.) |
| */ |
| int32_t |
| Find_Best_Fragment(Highlighter *self, const CharBuf *field_val, |
| ViewCharBuf *fragment, HeatMap *heat_map); |
| |
| /** Take the fragment and determine the best edges for it based on |
| * sentence boundaries when possible. Add ellipses when boundaries cannot |
| * be found. |
| * |
| * (Helper function for Create_Excerpt only exposed for testing purposes.) |
| */ |
| int32_t |
| Raw_Excerpt(Highlighter *self, const CharBuf *field_val, |
| const CharBuf *fragment, CharBuf *raw_excerpt, int32_t top, |
| HeatMap *heat_map, VArray *sentences); |
| |
| /** Take the text in raw_excerpt, add highlight tags, encode, and place |
| * the result into <code>highlighted</code>. |
| * |
| * (Helper function for Create_Excerpt only exposed for testing purposes.) |
| */ |
| void |
| Highlight_Excerpt(Highlighter *self, VArray *spans, CharBuf *raw_excerpt, |
| CharBuf *highlighted, int32_t top); |
| |
| public void |
| Destroy(Highlighter *self); |
| } |
| |
| |