blob: bae23e5ef94a13169b85b1de842293df81838279 [file] [log] [blame]
////////////////////////////////////////////////////////////////////////////////
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////
package com.adobe.linguistics.spelling.core.utils
{
import com.adobe.linguistics.spelling.core.LinguisticRule;
import com.adobe.linguistics.spelling.core.SquigglyDictionary;
import com.adobe.linguistics.spelling.core.env.InternalConstants;
import com.adobe.linguistics.spelling.core.error.*;
import com.adobe.linguistics.spelling.core.logging.*;
import flash.errors.IllegalOperationError;
import flash.events.Event;
import flash.events.EventDispatcher;
import flash.events.IOErrorEvent;
import flash.events.SecurityErrorEvent;
import flash.net.URLRequest;
import flash.utils.*;
public class SquigglyDictionaryLoader extends EventDispatcher
{
private var _dict:SquigglyDictionary;
private var _dictionaryPath:String;
private var _attrMgr:LinguisticRule;
private var snp:SimpleNumberParser = new SimpleNumberParser();
//setup logger for sending message.
private var className:String = getQualifiedClassName(this).split("::").join(".");
private var logger:ILogger = Log.getLogger( className );
//vars shifted up
private var wordList:Array;
private var dp:String;//description;
private var ts:String;
private var ap:String;
private var flags:String;
private var dpPattern1:RegExp; //patterns to split rule line into fileds according to new morphological field separator
private var dpPattern2:RegExp; //patterns to split rule line into fileds according to old morphological field separator
private var tsPattern1:RegExp; //extracts the affix string
private var strArr:Array;
private var lineContent:String;
private var totalPart:int;
private var delay:int;
//vars shifted up
//variables added to accomodate property
private var _enableDictionarySplit:Boolean;
private var _wordsPerDictionarySplit:int;
public function SquigglyDictionaryLoader()
{
_dict = null;
_dictionaryPath = null;
_attrMgr = null;
dpPattern1= /^(.+)[ \t]+.{2}:(.+)$/ ; //patterns to split rule line into fileds according to new morphological field separator
dpPattern2= /^(.+)[\t]{1}(.+)$/ ; //patterns to split rule line into fileds according to old morphological field separator
tsPattern1= /^(.*[^\\])\/(.+)$/ ;
delay=InternalConstants.DICT_LOAD_DELAY; //the timeout period between two parts 10 ms
}
public function set linguisticRule(value:LinguisticRule):void {
this._attrMgr = value;
}
public function get linguisticRule():LinguisticRule {
return this._attrMgr;
}
public function load(dictionaryURL:String,enableDictionarySplit:Boolean,wordsPerDictionarySplit:int):void {
if ( dictionaryURL == null ) {
throw new IllegalOperationError("load function did not receive two valid URLs.");
}
_dictionaryPath = dictionaryURL;
_enableDictionarySplit=enableDictionarySplit;
_wordsPerDictionarySplit=wordsPerDictionarySplit;
var request:URLRequest = new URLRequest( _dictionaryPath );
var dloader:DictionaryLoader = new DictionaryLoader(request);
dloader.addEventListener(Event.COMPLETE,loadDictionaryComplete);
dloader.addEventListener(IOErrorEvent.IO_ERROR,handleError);
dloader.addEventListener(SecurityErrorEvent.SECURITY_ERROR,handleError);
}
//Private method to dispatch an error event.
private function handleError(evt:Event):void {
bounceEvent(evt);
}
private function bounceEvent(evt:Event):void {
dispatchEvent(evt.clone());
}
public function get squigglyDictionary():SquigglyDictionary {
return _dict;
}
private function loadDictionaryComplete(evt:Event):void {
_dict = new SquigglyDictionary(_attrMgr);
var bytes:ByteArray = evt.target.data;
//trace("load complete handler: "+getTimer()+" "+bytes.bytesAvailable);
var charSet:String;
if ( _attrMgr ) {
charSet = _attrMgr.encoding;
}else {
charSet = InternalConstants.DEFAULTENCODING;
}
var str:String = bytes.readMultiByte(bytes.length, charSet);
//trace("Bytes read at: "+getTimer());
/*
In DOS, a line break is a single '\n'.
In Unix, a line break is the sequence '\r\n'.
In MAC, a line break is a single '\r'.
Correct me if I'm wrong.
so step 1:
replace "\r\n" with "\n"
step 2:
replace "\r" with "\n";
finally ,we get "\n" line seperator.
*/
str=str.split("\r\n").join("\n");
str=str.split("\r").join("\n");
wordList= str.split("\n");
//bof of parsing the text.
//trace("Wordlist length: "+wordList.length);
if(wordList== null ) {
logger.error("Dictionary File[{0}] Operation[parsing] Error[corrupt data]",this._dictionaryPath);
}
if(wordList.length== 1){
logger.error("Dictionary File[{0}] Operation[parsing] Error[corrupt data]",this._dictionaryPath );
// logger.error("Dictionary File[{0}] Operation[parsing] Error[corrupt data] Line Number:{1}, Content:{2}",this._rulePath , (lineNumber+1).toString(),lineContent );
}
//fix for Bug #: 2855795 now if greater load to a max of wordList.length
if(wordList.length<this._wordsPerDictionarySplit){
this._wordsPerDictionarySplit=wordList.length;
}
/* remove byte order mark */
if ( wordList[0].match(new RegExp("^\xEF\xBB\xBF[.]+$") ) ) wordList[0]=wordList[0].substr(3);
var snp:SimpleNumberParser = new SimpleNumberParser();
/* get the table size */
var tablesize:int = snp.parse(wordList[0]);
if (tablesize == 0) throw new ContentError(ErrorTable.CONTENTPARSINGERROR_MSG,ErrorTable.CONTENTPARSINGERROR_ID);
//trace("Before parsing: "+getTimer());
if(this._enableDictionarySplit ){
setTimeout(loadWithTimeouts,1,1);//go to loadWithTimeouts
}
else
{
setTimeout(loadWithoutTimeouts,1,1);//directly go to finish. This will cause words to load from 1 to wordList.length
}
}
private function loadWithTimeouts(part:int):void {
for( var line:int = part; line<part+(this._wordsPerDictionarySplit)-1; ++line) {
lineContent = wordList[line];
/* : is the new morphological field separator */
if ( (strArr = lineContent.match(dpPattern1)) != null ) {
dp=strArr[2];
ts = strArr[1];
/* tabulator is the old morphological field separator */
}else if ( (strArr = lineContent.match(dpPattern2)) != null ) {
ts = strArr[1];
dp=strArr[2];
}else {
ts = lineContent;//so ts has the word and dp has morphological desc
dp = null;
}
ts= StringUtils.trim(ts);
if( dp != null ) dp= StringUtils.trim(dp);
// eof: split each line into word and morphological description
// bof:split each line into word and affix char strings
/*
"\/" signs slash in words (not affix separator)
"/" at beginning of the line is word character (not affix separator)
*/
if ( (strArr = ts.match(tsPattern1))!= null ) {
ap = strArr[2];
ts = strArr[1];
ts = ts.split('\\/').join('/'); /* replace "\/" with "/" */
if(_attrMgr){
if(_attrMgr.aliasfTable && _attrMgr.aliasfTable.length>0)
{
flags=_attrMgr.aliasfTable[parseInt(ap,10)-1];
}
else
{
flags = decodeFlags( ap, _attrMgr.flagMode);
}
}
/*
// Todo, will remove this comments after we have complex-affix support and compound-word support.
if (aliasf) {
int index = atoi(ap + 1);
al = get_aliasf(index, &flags, dict);
if (!al) {
HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
*ap = '\0';
}
} else {
al = decode_flags(&flags, ap + 1, dict);
flag_qsort(flags, 0, al);
}
*/
}else {
ap=null;
flags=null;
}
// eof:split each line into word and affix char strings
//trace(line+" WORD:"+ts);
_dict.addWord( ts, flags, dp );
}
if(line+(this._wordsPerDictionarySplit)>=wordList.length)//Last part reached.
{
setTimeout(loadWithoutTimeouts,delay,line);//the last part left which is less than wordsPerDictionarySplit is loaded w/o Timeouts
}
else
{
setTimeout(loadWithTimeouts,delay, line);//get delay of 10 ms
}
// } //loop ends
//eof of parsing the text.
//function ends here
}
/* Squiggly function to end Asynchronous looping
*
*
*/
private function loadWithoutTimeouts(line:int):void
{
for( ; line<wordList.length; ++line) {
// bof: split each line into word and morphological description
lineContent = wordList[line];
/* : is the new morphological field separator */
if ( (strArr = lineContent.match(dpPattern1)) != null ) {
dp=strArr[2];
ts = strArr[1];
/* tabulator is the old morphological field separator */
}else if ( (strArr = lineContent.match(dpPattern2)) != null ) {
ts = strArr[1];
dp=strArr[2];
}else {
ts = lineContent;
dp = null;
}
ts= StringUtils.trim(ts);
if( dp != null ) dp= StringUtils.trim(dp);
// eof: split each line into word and morphological description
// bof:split each line into word and affix char strings
/*
"\/" signs slash in words (not affix separator)
"/" at beginning of the line is word character (not affix separator)
*/
if ( (strArr = ts.match(tsPattern1))!= null ) {
ap = strArr[2];
ts = strArr[1];
ts = ts.split('\\/').join('/'); /* replace "\/" with "/" */
if(_attrMgr)
{
if(_attrMgr.aliasfTable && _attrMgr.aliasfTable.length>0)
{
flags=_attrMgr.aliasfTable[parseInt(ap,10)-1];
}
else
{
flags = decodeFlags( ap, _attrMgr.flagMode);
}
}
/*
// Todo, will remove this comments after we have complex-affix support and compound-word support.
if (aliasf) {
int index = atoi(ap + 1);
al = get_aliasf(index, &flags, dict);
if (!al) {
HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
*ap = '\0';
}
} else {
al = decode_flags(&flags, ap + 1, dict);
flag_qsort(flags, 0, al);
}
*/
}else {
ap=null;
flags=null;
}
// eof:split each line into word and affix char strings
//trace("WORD2:"+ts);
_dict.addWord( ts, flags, dp );
}
//trace("After Parsing: "+getTimer());
logger.info("Dictionary File[{0}] Operation[parsing] Total Lines:{1}, Unrecognized Lines:{2}", this._dictionaryPath ,wordList.length.toString(), 0 );
dispatchEvent(new Event(Event.COMPLETE));
}
/*
* Squiggly is using utf16 by default...
* for utf8 string conversion,we need extra work later.
* we might also need a better toString() fuction for Flag_long and flag_num mode.
*/
private function decodeFlags(flags:String, flagMode:int):String {
var result:String="";
var i:int;
switch ( flagMode ) {
case InternalConstants.FLAG_LONG:
if ( (flags.length)%2 == 1 ) {
logger.error("Dictionary File[{0}] Operation[parsing] Error[decoding error] target flags:{1}",this._dictionaryPath , flags );
throw new ContentError(ErrorTable.CONTENTPARSINGERROR_MSG,ErrorTable.CONTENTPARSINGERROR_ID);
}
var len:int = flags.length/2;
for ( i = 0; i< len; ++i ) {
result += String.fromCharCode( ((flags.charCodeAt((i * 2))) << 8) + (flags.charCodeAt((i * 2 + 1))) );
}
break;
case InternalConstants.FLAG_NUM:
var strArr:Array = flags.split(",");
for ( i = 0;i< strArr.length;++i) {
var num:Number = snp.parse(strArr[i]);
if ( num >= InternalConstants.DEFAULTFLAGS ) {
logger.error("Dictionary File[{0}] Operation[parsing] Error[decoding error] target flags:{1}",this._dictionaryPath , flags );
}
result += String.fromCharCode(num);
}
break;
case InternalConstants.FLAG_UNI:
result = flags;
break;
default:
result = flags;
break;
}
return result;
}
}
}