blob: e906527fdfb8873096b6882d5663ad5eb2fb550b [file] [log] [blame]
////////////////////////////////////////////////////////////////////////////////
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////
/*
* ToDo: Create ASDoc style comment to generate the API document.
*/
package com.adobe.linguistics.utils
{
/**
* <p>This class provides some methods to filter out certain characters from the text.</p>
*/
public class TextFilter
{
public static const kTextChar_DiscretionaryHyphen:int = 0x00AD;
public static const kTextChar_BreakRunInStyle:int = 0x0003;
public static const kTextChar_IndentToHere:int = 0x0007;
public static const kTextChar_InvisibleSeparator:int = 0x2063;
public static const kTextChar_ZeroWidthNonJoiner:int = 0x200C;
public static const kTextChar_ZeroWidthJoiner:int = 0x200D;
public static const kTextChar_ZeroSpaceBreak:int = 0x200B;
public static const kTextChar_ZeroSpaceNoBreak:int = 0xFEFF;
public static const kTextChar_RightSingleQuotationMark:int = 0x2019;
public static const kTextChar_Apostrophe:int = 0x0027;
public static const kTextChar_NoBreakHyphen:int = 0x2011;
public static const kTextChar_UnicodeHyphen:int = 0x2010;
public static const kTextChar_HyphenMinus:int = 0x002D;
public function TextFilter()
{
}
public function filterWord(inpWord:String):String
{
return replaceIgnoredCharacter((removeIgnoredCharacter(inpWord)));
}
private function removeIgnoredCharacter(inpWord:String):String
{
if(!inpWord || inpWord.length<=0)
return inpWord;
var tempWord:String= new String;
for(var i:int=0; i< inpWord.length; i++)
{
if ( inpWord.charCodeAt(i)==kTextChar_DiscretionaryHyphen
|| inpWord.charCodeAt(i)==kTextChar_BreakRunInStyle
|| inpWord.charCodeAt(i)==kTextChar_IndentToHere
|| inpWord.charCodeAt(i)==kTextChar_InvisibleSeparator
|| inpWord.charCodeAt(i)==kTextChar_ZeroWidthNonJoiner
|| inpWord.charCodeAt(i)==kTextChar_ZeroWidthJoiner
|| inpWord.charCodeAt(i)==kTextChar_ZeroSpaceBreak
|| inpWord.charCodeAt(i)==kTextChar_ZeroSpaceNoBreak
)
continue;
tempWord=tempWord+inpWord.charAt(i);
}
return tempWord;
}
private function replaceIgnoredCharacter(inpWord:String):String
{
for(var i:int=0; inpWord && i<inpWord.length; i++)
{
if(inpWord.charCodeAt(i)==kTextChar_RightSingleQuotationMark)
inpWord= inpWord.slice(0,i)+String.fromCharCode(kTextChar_Apostrophe)+inpWord.slice(i+1);
else if(inpWord.charCodeAt(i)==kTextChar_NoBreakHyphen || inpWord.charCodeAt(i)==kTextChar_UnicodeHyphen)
inpWord= inpWord.slice(0,i)+String.fromCharCode(kTextChar_HyphenMinus)+inpWord.slice(i+1);;
}
return inpWord;
}
}
}