| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| #ifndef __com_sun_star_lang_XTextSearch_idl__ |
| #define __com_sun_star_lang_XTextSearch_idl__ |
| |
| |
| #include <com/sun/star/lang/Locale.idl> |
| #include <com/sun/star/uno/XInterface.idl> |
| //#include <com/sun/star/lang/CascadeTransliterator.idl> |
| |
| //============================================================================= |
| |
| module com { module sun { module star { module util { |
| |
| //============================================================================= |
| |
| |
| published enum SearchAlgorithms |
| { |
| /// Literal |
| ABSOLUTE, // implemented as a kind of Boyer-Moore |
| /// Regular expression |
| REGEXP, |
| /// Weighted Levenshtein Distance |
| APPROXIMATE |
| }; |
| |
| /// Flags for search methods |
| published constants SearchFlags |
| { |
| /** |
| @deprecated The constant ALL_IGNORE_CASE is never supported - use |
| <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const> |
| with |
| <member>SearchOptions::transliterateFlags</member> |
| instead. |
| |
| @see <type scope="com::sun::star::i18n">TransliterationModules</type> |
| */ |
| const long ALL_IGNORE_CASE = 0x00000001; |
| |
| /** Flag for normal (Boyer-Moore) search / Search for word only. */ |
| const long NORM_WORD_ONLY = 0x00000010; |
| |
| /** Flag for "regular expression" search / Interpret as extended |
| regular expression. |
| |
| @deprecated The flag is currently not supported by OOo. |
| */ |
| const long REG_EXTENDED = 0x00000100; |
| |
| /** Flag for "regular expression" search / No register information |
| or backreferences, i.e., avoid sub expressions. Return only |
| true/false if matched or not. |
| |
| @deprecated The flag is currently not supported by OOo. |
| */ |
| const long REG_NOSUB = 0x00000200; |
| |
| /** Flag for "regular expression" search / Special new line |
| treatment. |
| |
| @deprecated The flag is currently not supported by OOo. |
| |
| <p> A NEWLINE character in string will not be matched by a |
| period outside bracket expression or by any form of a non |
| matching list. </p> |
| |
| <p> A circumflex (^) in pattern when used to specify expression |
| anchoring will match the zero length string immediately after a |
| newline in string, regardless of the setting of |
| REG_NOT_BEGINOFLINE. </p> |
| |
| <p> A dollar-sign ($) in pattern when used to specify expression |
| anchoring, will match zero-length string immediately before a |
| new line in string, regardless of the setting of |
| REG_NOT_ENDOFLINE. </p> |
| */ |
| const long REG_NEWLINE = 0x00000400; |
| |
| /** The first character in the string is not the beginning of the |
| line therefore ^ will not match with first character of the |
| string. |
| */ |
| const long REG_NOT_BEGINOFLINE = 0x00000800; |
| |
| /** The last character in the string is not the end of the line |
| therefore $ will not match with last character of the string. |
| */ |
| const long REG_NOT_ENDOFLINE = 0x00001000; |
| |
| /** Flag for "Weighted Levenshtein Distance" search / Relaxed |
| checking of limit, split weigh pools. |
| |
| <p> If not specified (<b>strict</b>), the search is sucessful if |
| the WLD is within a calculated limit where each insertion, |
| deletion and replacement adds a weight to a common pool of |
| weights. This is the mathematically correct WLD. </p> |
| |
| <p> From a user's point of view the strict WLD is an |
| exclusive-OR of the arguments given, for example if allowed |
| insertions=2 and allowed replacements=2, the search fails if 2 |
| characters had been inserted and an additional operation would |
| be needed to match. Depending on the weights it may also fail if |
| 1 character was inserted and 1 character replaced and an |
| additional operation would be needed to match. The strict |
| algorithm may match less than expected from a first glance of |
| the specified arguments, but does not return false positives. </p> |
| |
| <p> If specified (<b>relaxed</b>), the search is also successful |
| if the combined pool for insertions and deletions is below a |
| doubled calculated limit and replacements are treated |
| differently. Additionally, swapped characters are counted as one |
| replacement. </p> |
| |
| <p> From a user's point of view the relaxed WLD is an |
| inclusive-OR of the arguments given, for example if allowed |
| insertions=2 and allowed replacements=2, the search succeeds if |
| 2 characters had been inserted and an additional replacement is |
| needed to match. The relaxed algorithm may return false |
| positives, but meets user expectation better. </p> |
| */ |
| const long LEV_RELAXED = 0x00010000; |
| }; |
| |
| |
| published struct SearchOptions { |
| //------------------------------------------------------------------------- |
| /** search type */ |
| SearchAlgorithms algorithmType; |
| |
| /** some flags - can be mixed |
| |
| @see <type>SearchFlags</type> |
| */ |
| long searchFlag; |
| |
| /** The text or pattern to be searched. */ |
| string searchString; |
| |
| /** The replacement text |
| (is for optional replacing - SearchOption is only the data container for it) */ |
| string replaceString; |
| |
| /** The locale for case insensitive search. */ |
| ::com::sun::star::lang::Locale Locale; |
| |
| /** This many characters can be different (as a replacement) between |
| the found word and the search pattern in a "Weighted Levenshtein |
| Distance" search. */ |
| long changedChars; |
| |
| /** This many characters can be missing in the found word in a |
| "Weighted Levenshtein Distance" search. */ |
| long deletedChars; |
| |
| /** This many characters can be additional in the found word in a |
| "Weighted Levenshtein Distance" search. */ |
| long insertedChars; |
| |
| /** Flags for the transliteration. Same meaning as the enum of |
| <type scope="com::sun::star::i18n">TransliterationModules</type> |
| */ |
| long transliterateFlags; |
| }; |
| |
| |
| published struct SearchResult { |
| //------------------------------------------------------------------------- |
| /** Number of subexpressions, |
| if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match. |
| The start and endOffset are always dependent on the search direction. |
| For example: |
| if you search "X" in the text "-X-" the offset are: |
| for forward: start = 1, end = 2 |
| for backward: start = 2, end = 1 |
| Forward, the startOffset is inclusive, the endOffset exclusive. |
| Backward, the startOffset is exclusive, the endOffset inclusive. |
| |
| For regular expressions it can be greater than 1. |
| If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string |
| if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression |
| startoffset[i] and endoffset[i] points to the matching substring of i th matching substring. |
| */ |
| long subRegExpressions; |
| sequence<long> startOffset; // inclusive |
| sequence<long> endOffset; // exclusive |
| }; |
| |
| |
| |
| /** enables an object to search in its content. |
| */ |
| published interface XTextSearch : com::sun::star::uno::XInterface |
| { |
| //------------------------------------------------------------------------- |
| /** set the options for the forward or backward search. |
| |
| */ |
| void setOptions ([in] SearchOptions options); |
| //------------------------------------------------------------------------- |
| /** search forward in the searchStr, starts at startPos and ends by endpos. |
| The result is returned in the SearchResult. |
| |
| */ |
| SearchResult searchForward ([in] string searchStr, [in] long startPos, [in] long endPos ); |
| //------------------------------------------------------------------------- |
| /** search backward in the searchStr, starts at startPos and ends by endpos. |
| The endpos must be lower then the startpos, because the function searches backward! |
| The result is returned in the SearchResult. |
| |
| */ |
| SearchResult searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos ); |
| }; |
| |
| //============================================================================= |
| }; }; }; }; |
| |
| #endif |