| #if FEATURE_BREAKITERATOR |
| using System; |
| using System.Text; |
| |
| namespace Lucene.Net.Search.PostingsHighlight |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// Creates a formatted snippet from the top passages. |
| /// <para/> |
| /// The default implementation marks the query terms as bold, and places |
| /// ellipses between unconnected passages. |
| /// </summary> |
| public class DefaultPassageFormatter : PassageFormatter |
| { |
| /// <summary>text that will appear before highlighted terms</summary> |
| protected readonly string m_preTag; |
| /// <summary>text that will appear after highlighted terms</summary> |
| protected readonly string m_postTag; |
| /// <summary>text that will appear between two unconnected passages</summary> |
| protected readonly string m_ellipsis; |
| /// <summary>true if we should escape for html</summary> |
| protected readonly bool m_escape; |
| |
| /// <summary> |
| /// Creates a new DefaultPassageFormatter with the default tags. |
| /// </summary> |
| public DefaultPassageFormatter() |
| : this("<b>", "</b>", "... ", false) |
| { |
| } |
| |
| /// <summary> |
| /// Creates a new <see cref="DefaultPassageFormatter"/> with custom tags. |
| /// </summary> |
| /// <param name="preTag">text which should appear before a highlighted term.</param> |
| /// <param name="postTag">text which should appear after a highlighted term.</param> |
| /// <param name="ellipsis">text which should be used to connect two unconnected passages.</param> |
| /// <param name="escape">true if text should be html-escaped</param> |
| public DefaultPassageFormatter(string preTag, string postTag, string ellipsis, bool escape) |
| { |
| if (preTag == null || postTag == null || ellipsis == null) |
| { |
| throw new ArgumentException(); //throw new NullPointerException(); |
| } |
| this.m_preTag = preTag; |
| this.m_postTag = postTag; |
| this.m_ellipsis = ellipsis; |
| this.m_escape = escape; |
| } |
| |
| public override object Format(Passage[] passages, string content) |
| { |
| StringBuilder sb = new StringBuilder(); |
| int pos = 0; |
| foreach (Passage passage in passages) |
| { |
| // don't add ellipsis if its the first one, or if its connected. |
| if (passage.startOffset > pos && pos > 0) |
| { |
| sb.Append(m_ellipsis); |
| } |
| pos = passage.startOffset; |
| for (int i = 0; i < passage.numMatches; i++) |
| { |
| int start = passage.matchStarts[i]; |
| int end = passage.matchEnds[i]; |
| // its possible to have overlapping terms |
| if (start > pos) |
| { |
| Append(sb, content, pos, start); |
| } |
| if (end > pos) |
| { |
| sb.Append(m_preTag); |
| Append(sb, content, Math.Max(pos, start), end); |
| sb.Append(m_postTag); |
| pos = end; |
| } |
| } |
| // its possible a "term" from the analyzer could span a sentence boundary. |
| Append(sb, content, pos, Math.Max(pos, passage.endOffset)); |
| pos = passage.endOffset; |
| } |
| return sb.ToString(); |
| } |
| |
| /// <summary> |
| /// Appends original text to the response. |
| /// </summary> |
| /// <param name="dest">resulting text, possibly transformed or encoded</param> |
| /// <param name="content">original text content</param> |
| /// <param name="start">index of the first character in content</param> |
| /// <param name="end">index of the character following the last character in content</param> |
| protected virtual void Append(StringBuilder dest, string content, int start, int end) |
| { |
| if (m_escape) |
| { |
| // note: these are the rules from owasp.org |
| for (int i = start; i < end; i++) |
| { |
| char ch = content[i]; |
| switch (ch) |
| { |
| case '&': |
| dest.Append("&"); |
| break; |
| case '<': |
| dest.Append("<"); |
| break; |
| case '>': |
| dest.Append(">"); |
| break; |
| case '"': |
| dest.Append("""); |
| break; |
| case '\'': |
| dest.Append("'"); |
| break; |
| case '/': |
| dest.Append("/"); |
| break; |
| default: |
| if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) |
| { |
| dest.Append(ch); |
| } |
| else if (ch < 0xff) |
| { |
| dest.Append("&#"); |
| dest.Append((int)ch); |
| dest.Append(";"); |
| } |
| else |
| { |
| dest.Append(ch); |
| } |
| break; |
| } |
| } |
| } |
| else |
| { |
| dest.Append(content, start, end - start); |
| } |
| } |
| } |
| } |
| #endif |