|  | /************************************************************** | 
|  | * | 
|  | * Licensed to the Apache Software Foundation (ASF) under one | 
|  | * or more contributor license agreements.  See the NOTICE file | 
|  | * distributed with this work for additional information | 
|  | * regarding copyright ownership.  The ASF licenses this file | 
|  | * to you under the Apache License, Version 2.0 (the | 
|  | * "License"); you may not use this file except in compliance | 
|  | * with the License.  You may obtain a copy of the License at | 
|  | * | 
|  | *   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, | 
|  | * software distributed under the License is distributed on an | 
|  | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | * KIND, either express or implied.  See the License for the | 
|  | * specific language governing permissions and limitations | 
|  | * under the License. | 
|  | * | 
|  | *************************************************************/ | 
|  |  | 
|  |  | 
|  |  | 
|  | #ifndef __com_sun_star_lang_XTextSearch_idl__ | 
|  | #define __com_sun_star_lang_XTextSearch_idl__ | 
|  |  | 
|  |  | 
|  | #include <com/sun/star/lang/Locale.idl> | 
|  | #include <com/sun/star/uno/XInterface.idl> | 
|  | //#include <com/sun/star/lang/CascadeTransliterator.idl> | 
|  |  | 
|  | //============================================================================= | 
|  |  | 
|  | module com { module sun { module star { module util { | 
|  |  | 
|  | //============================================================================= | 
|  |  | 
|  |  | 
|  | published enum SearchAlgorithms | 
|  | { | 
|  | /// Literal | 
|  | ABSOLUTE,   // implemented as a kind of Boyer-Moore | 
|  | /// Regular expression | 
|  | REGEXP, | 
|  | /// Weighted Levenshtein Distance | 
|  | APPROXIMATE | 
|  | }; | 
|  |  | 
|  | /// Flags for search methods | 
|  | published constants SearchFlags | 
|  | { | 
|  | /** | 
|  | @deprecated The constant ALL_IGNORE_CASE is never supported - use | 
|  | <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const> | 
|  | with | 
|  | <member>SearchOptions::transliterateFlags</member> | 
|  | instead. | 
|  |  | 
|  | @see <type scope="com::sun::star::i18n">TransliterationModules</type> | 
|  | */ | 
|  | const long  ALL_IGNORE_CASE     = 0x00000001; | 
|  |  | 
|  | /** Flag for normal (Boyer-Moore) search / Search for word only. */ | 
|  | const long  NORM_WORD_ONLY      = 0x00000010; | 
|  |  | 
|  | /** Flag for "regular expression" search / Interpret as extended | 
|  | regular expression. | 
|  |  | 
|  | @deprecated The flag is currently not supported by OOo. | 
|  | */ | 
|  | const long  REG_EXTENDED        = 0x00000100; | 
|  |  | 
|  | /** Flag for "regular expression" search / No register information | 
|  | or backreferences, i.e., avoid sub expressions. Return only | 
|  | true/false if matched or not. | 
|  |  | 
|  | @deprecated The flag is currently not supported by OOo. | 
|  | */ | 
|  | const long  REG_NOSUB           = 0x00000200; | 
|  |  | 
|  | /** Flag for "regular expression" search / Special new line | 
|  | treatment. | 
|  |  | 
|  | @deprecated The flag is currently not supported by OOo. | 
|  |  | 
|  | <p> A NEWLINE character in string will not be matched by a | 
|  | period outside bracket expression or by any form of a non | 
|  | matching list. </p> | 
|  |  | 
|  | <p> A circumflex (^) in pattern when used to specify expression | 
|  | anchoring will match the zero length string immediately after a | 
|  | newline in string, regardless of the setting of | 
|  | REG_NOT_BEGINOFLINE. </p> | 
|  |  | 
|  | <p> A dollar-sign ($) in pattern when used to specify expression | 
|  | anchoring, will match zero-length string immediately before a | 
|  | new line in string, regardless of the setting of | 
|  | REG_NOT_ENDOFLINE. </p> | 
|  | */ | 
|  | const long  REG_NEWLINE         = 0x00000400; | 
|  |  | 
|  | /** The first character in the string is not the beginning of the | 
|  | line therefore ^ will not match with first character of the | 
|  | string. | 
|  | */ | 
|  | const long  REG_NOT_BEGINOFLINE = 0x00000800; | 
|  |  | 
|  | /** The last character in the string is not the end of the line | 
|  | therefore $ will not match with last character of the string. | 
|  | */ | 
|  | const long  REG_NOT_ENDOFLINE   = 0x00001000; | 
|  |  | 
|  | /** Flag for "Weighted Levenshtein Distance" search / Relaxed | 
|  | checking of limit, split weigh pools. | 
|  |  | 
|  | <p> If not specified (<b>strict</b>), the search is sucessful if | 
|  | the WLD is within a calculated limit where each insertion, | 
|  | deletion and replacement adds a weight to a common pool of | 
|  | weights. This is the mathematically correct WLD. </p> | 
|  |  | 
|  | <p> From a user's point of view the strict WLD is an | 
|  | exclusive-OR of the arguments given, for example if allowed | 
|  | insertions=2 and allowed replacements=2, the search fails if 2 | 
|  | characters had been inserted and an additional operation would | 
|  | be needed to match. Depending on the weights it may also fail if | 
|  | 1 character was inserted and 1 character replaced and an | 
|  | additional operation would be needed to match. The strict | 
|  | algorithm may match less than expected from a first glance of | 
|  | the specified arguments, but does not return false positives. </p> | 
|  |  | 
|  | <p> If specified (<b>relaxed</b>), the search is also successful | 
|  | if the combined pool for insertions and deletions is below a | 
|  | doubled calculated limit and replacements are treated | 
|  | differently. Additionally, swapped characters are counted as one | 
|  | replacement. </p> | 
|  |  | 
|  | <p> From a user's point of view the relaxed WLD is an | 
|  | inclusive-OR of the arguments given, for example if allowed | 
|  | insertions=2 and allowed replacements=2, the search succeeds if | 
|  | 2 characters had been inserted and an additional replacement is | 
|  | needed to match. The relaxed algorithm may return false | 
|  | positives, but meets user expectation better. </p> | 
|  | */ | 
|  | const long  LEV_RELAXED     = 0x00010000; | 
|  | }; | 
|  |  | 
|  |  | 
|  | published  struct SearchOptions  { | 
|  | //------------------------------------------------------------------------- | 
|  | /** search type */ | 
|  | SearchAlgorithms	algorithmType; | 
|  |  | 
|  | /** some flags - can be mixed | 
|  |  | 
|  | @see <type>SearchFlags</type> | 
|  | */ | 
|  | long 			searchFlag; | 
|  |  | 
|  | /** The text or pattern to be searched. */ | 
|  | string			searchString; | 
|  |  | 
|  | /** The replacement text | 
|  | (is for optional replacing - SearchOption is only the data container for it) */ | 
|  | string			replaceString; | 
|  |  | 
|  | /** The locale for case insensitive search. */ | 
|  | ::com::sun::star::lang::Locale  Locale; | 
|  |  | 
|  | /** This many characters can be different (as a replacement) between | 
|  | the found word and the search pattern in a "Weighted Levenshtein | 
|  | Distance" search. */ | 
|  | long			changedChars; | 
|  |  | 
|  | /** This many characters can be missing in the found word in a | 
|  | "Weighted Levenshtein Distance" search. */ | 
|  | long			deletedChars; | 
|  |  | 
|  | /** This many characters can be additional in the found word in a | 
|  | "Weighted Levenshtein Distance" search. */ | 
|  | long			insertedChars; | 
|  |  | 
|  | /** Flags for the transliteration. Same meaning as the enum of | 
|  | <type scope="com::sun::star::i18n">TransliterationModules</type> | 
|  | */ | 
|  | long			transliterateFlags; | 
|  | }; | 
|  |  | 
|  |  | 
|  | published  struct SearchResult  { | 
|  | //------------------------------------------------------------------------- | 
|  | /** Number of subexpressions, | 
|  | if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match. | 
|  | The start and endOffset are always dependent on the search direction. | 
|  | For example: | 
|  | if you search "X" in the text "-X-" the offset are: | 
|  | for forward: 	start = 1, end = 2 | 
|  | for backward:   start = 2, end = 1 | 
|  | Forward, the startOffset is inclusive, the endOffset exclusive. | 
|  | Backward, the startOffset is exclusive, the endOffset inclusive. | 
|  |  | 
|  | For regular expressions it can be greater than 1. | 
|  | If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string | 
|  | if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression | 
|  | startoffset[i] and endoffset[i] points to the matching substring of i th matching substring. | 
|  | */ | 
|  | long subRegExpressions; | 
|  | sequence<long> startOffset;		// inclusive | 
|  | sequence<long> endOffset;  		// exclusive | 
|  | }; | 
|  |  | 
|  |  | 
|  |  | 
|  | /** enables an object to search in its content. | 
|  | */ | 
|  | published interface XTextSearch : com::sun::star::uno::XInterface | 
|  | { | 
|  | //------------------------------------------------------------------------- | 
|  | /** set the options for the forward or backward search. | 
|  |  | 
|  | */ | 
|  | void setOptions ([in] SearchOptions options); | 
|  | //------------------------------------------------------------------------- | 
|  | /** search forward in the searchStr, starts at startPos and ends by endpos. | 
|  | The result is returned in the SearchResult. | 
|  |  | 
|  | */ | 
|  | SearchResult  searchForward  ([in] string searchStr, [in] long startPos, [in] long endPos ); | 
|  | //------------------------------------------------------------------------- | 
|  | /** search backward in the searchStr, starts at startPos and ends by endpos. | 
|  | The endpos must be lower then the startpos, because the function searches backward! | 
|  | The result is returned in the SearchResult. | 
|  |  | 
|  | */ | 
|  | SearchResult  searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos ); | 
|  | }; | 
|  |  | 
|  | //============================================================================= | 
|  | }; }; }; }; | 
|  |  | 
|  | #endif |