blob: 48fae13d13ac9a4bcb5caa8e738fbe1f54dd3bb8 [file] [log] [blame]
// Copyright 2009 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @fileoverview The decompressor for Base88 compressed character lists.
*
* The compression is by base 88 encoding the delta between two adjacent
* characters in ths list. The deltas can be positive or negative. Also, there
* would be character ranges. These three types of values
* are given enum values 0, 1 and 2 respectively. Initial 3 bits are used for
* encoding the type and total length of the encoded value. Length enums 0, 1
* and 2 represents lengths 1, 2 and 4. So (value * 8 + type * 3 + length enum)
* is encoded in base 88 by following characters for numbers from 0 to 87:
* 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ (continued in next line)
* abcdefghijklmnopqrstuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~
*
* Value uses 0 based counting. That is value for the range [a, b] is 0 and
* that of [a, c] is 1. Simillarly, the delta of "ab" is 0.
*
* Following python script can be used to compress character lists taken
* standard input: http://go/charlistcompressor.py
*
*/
goog.provide('goog.i18n.CharListDecompressor');
goog.require('goog.array');
goog.require('goog.i18n.uChar');
/**
* Class to decompress base88 compressed character list.
* @constructor
* @final
*/
goog.i18n.CharListDecompressor = function() {
this.buildCharMap_('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr' +
'stuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~');
};
/**
* 1-1 mapping from ascii characters used in encoding to an integer in the
* range 0 to 87.
* @type {Object}
* @private
*/
goog.i18n.CharListDecompressor.prototype.charMap_ = null;
/**
* Builds the map from ascii characters used for the base88 scheme to number
* each character represents.
* @param {string} str The string of characters used in base88 scheme.
* @private
*/
goog.i18n.CharListDecompressor.prototype.buildCharMap_ = function(str) {
if (!this.charMap_) {
this.charMap_ = {};
for (var i = 0; i < str.length; i++) {
this.charMap_[str.charAt(i)] = i;
}
}
};
/**
* Gets the number encoded in base88 scheme by a substring of given length
* and placed at the a given position of the string.
* @param {string} str String containing sequence of characters encoding a
* number in base 88 scheme.
* @param {number} start Starting position of substring encoding the number.
* @param {number} leng Length of the substring encoding the number.
* @return {number} The encoded number.
* @private
*/
goog.i18n.CharListDecompressor.prototype.getCodeAt_ = function(str, start,
leng) {
var result = 0;
for (var i = 0; i < leng; i++) {
var c = this.charMap_[str.charAt(start + i)];
result += c * Math.pow(88, i);
}
return result;
};
/**
* Add character(s) specified by the value and type to given list and return
* the next character in the sequence.
* @param {Array<string>} list The list of characters to which the specified
* characters are appended.
* @param {number} lastcode The last codepoint that was added to the list.
* @param {number} value The value component that representing the delta or
* range.
* @param {number} type The type component that representing whether the value
* is a positive or negative delta or range.
* @return {number} Last codepoint that is added to the list.
* @private
*/
goog.i18n.CharListDecompressor.prototype.addChars_ = function(list, lastcode,
value, type) {
if (type == 0) {
lastcode += value + 1;
goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
} else if (type == 1) {
lastcode -= value + 1;
goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
} else if (type == 2) {
for (var i = 0; i <= value; i++) {
lastcode++;
goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
}
}
return lastcode;
};
/**
* Gets the list of characters specified in the given string by base 88 scheme.
* @param {string} str The string encoding character list.
* @return {!Array<string>} The list of characters specified by the given
* string in base 88 scheme.
*/
goog.i18n.CharListDecompressor.prototype.toCharList = function(str) {
var metasize = 8;
var result = [];
var lastcode = 0;
var i = 0;
while (i < str.length) {
var c = this.charMap_[str.charAt(i)];
var meta = c % metasize;
var type = Math.floor(meta / 3);
var leng = (meta % 3) + 1;
if (leng == 3) {
leng++;
}
var code = this.getCodeAt_(str, i, leng);
var value = Math.floor(code / metasize);
lastcode = this.addChars_(result, lastcode, value, type);
i += leng;
}
return result;
};