blob: 676b6460df370e6a77fe5472eff89feaf1448d3c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.expression.function;
import java.io.DataInput;
import java.io.IOException;
import java.sql.SQLException;
import java.text.Collator;
import java.util.List;
import java.util.Locale;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.phoenix.expression.Expression;
import org.apache.phoenix.parse.FunctionParseNode;
import org.apache.phoenix.schema.tuple.Tuple;
import org.apache.phoenix.schema.types.PBoolean;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.schema.types.PInteger;
import org.apache.phoenix.schema.types.PVarbinary;
import org.apache.phoenix.schema.types.PVarchar;
import org.apache.phoenix.util.VarBinaryFormatter;
import org.apache.phoenix.util.i18n.LinguisticSort;
import org.apache.phoenix.util.i18n.LocaleUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Phoenix Function that calculates a collation key for an input string based
* on a caller-provided locale and collator strength and decomposition settings.
*
* The locale should be specified as xx_yy_variant where xx is the ISO 639-1
* 2-letter language code, yy is the the ISO 3166 2-letter country code. Both
* countryCode and variant are optional. For example, zh_TW_STROKE, zh_TW and zh
* are all valid locale representations. Note the language code, country code
* and variant are used as arguments to the constructor of java.util.Locale.
*
* This function originally used the open-source i18n-util package to obtain the
* collators it needs from the provided locale. As i18n-util is not maintained
* anymore, the relevant parts from it were copied into Phoenix.
* See: https://issues.apache.org/jira/browse/PHOENIX-6818
*
* The LinguisticSort implementation from i18n-util encapsulates sort-related
* functionality for a substantive list of locales. For each locale, it provides
* a collator and an Oracle-specific database function that can be used to sort
* strings according to the natural language rules of that locale.
*
* This function uses the collator returned by LinguisticSort.getCollator to
* produce a collation key for its input string. A user can expect that the
* sorting semantics of this function for a given locale is equivalent to the
* sorting behaviour of an Oracle query that is constructed using the Oracle
* functions returned by LinguisticSort for that locale.
*
* The optional third argument to the function is a boolean that specifies
* whether to use the upper-case collator (case-insensitive) returned by
* LinguisticSort.getUpperCaseCollator.
*
* The optional fourth and fifth arguments are used to set respectively the
* strength and composition of the collator returned by LinguisticSort using the
* setStrength and setDecomposition methods of java.text.Collator.
*
*/
@FunctionParseNode.BuiltInFunction(name = CollationKeyFunction.NAME, args = {
// input string
@FunctionParseNode.Argument(allowedTypes = { PVarchar.class }),
// ISO Code for Locale
@FunctionParseNode.Argument(allowedTypes = { PVarchar.class }, isConstant = true),
// whether to use special upper case collator
@FunctionParseNode.Argument(allowedTypes = { PBoolean.class }, defaultValue = "false", isConstant = true),
// collator strength
@FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true),
// collator decomposition
@FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true) })
public class CollationKeyFunction extends ScalarFunction {
private static final Logger LOGGER = LoggerFactory.getLogger(CollationKeyFunction.class);
public static final String NAME = "COLLATION_KEY";
private Collator collator;
public CollationKeyFunction() {
}
public CollationKeyFunction(List<Expression> children) throws SQLException {
super(children);
initialize();
}
@Override
public void readFields(DataInput input) throws IOException {
super.readFields(input);
initialize();
}
@Override
public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) {
Expression expression = getChildren().get(0);
if (!expression.evaluate(tuple, ptr)) {
return false;
}
String inputString = (String) PVarchar.INSTANCE.toObject(ptr, expression.getSortOrder());
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("CollationKey inputString: " + inputString);
}
if (inputString == null) {
return true;
}
byte[] collationKeyByteArray = collator.getCollationKey(inputString).toByteArray();
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("CollationKey bytes: " +
VarBinaryFormatter.INSTANCE.format(collationKeyByteArray));
}
ptr.set(collationKeyByteArray);
return true;
}
private void initialize() {
String localeISOCode = getLiteralValue(1, String.class);
Boolean useSpecialUpperCaseCollator = getLiteralValue(2, Boolean.class);
Integer collatorStrength = getLiteralValue(3, Integer.class);
Integer collatorDecomposition = getLiteralValue(4, Integer.class);
if (LOGGER.isTraceEnabled()) {
StringBuilder logInputsMessage = new StringBuilder();
logInputsMessage.append("Input (literal) arguments:").append("localeISOCode: " + localeISOCode)
.append(", useSpecialUpperCaseCollator: " + useSpecialUpperCaseCollator)
.append(", collatorStrength: " + collatorStrength)
.append(", collatorDecomposition: " + collatorDecomposition);
LOGGER.trace(logInputsMessage.toString());
}
Locale locale = LocaleUtils.get().getLocaleByIsoCode(localeISOCode);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(String.format("Locale: " + locale.toLanguageTag()));
}
LinguisticSort linguisticSort = LinguisticSort.get(locale);
collator = BooleanUtils.isTrue(useSpecialUpperCaseCollator) ? linguisticSort.getUpperCaseCollator(false)
: linguisticSort.getCollator();
if (collatorStrength != null) {
collator.setStrength(collatorStrength);
}
if (collatorDecomposition != null) {
collator.setDecomposition(collatorDecomposition);
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(String.format(
"Collator: [strength: %d, decomposition: %d], Special-Upper-Case: %s",
collator.getStrength(), collator.getDecomposition(),
BooleanUtils.isTrue(useSpecialUpperCaseCollator)));
}
}
@Override
public PDataType getDataType() {
return PVarbinary.INSTANCE;
}
@Override
public String getName() {
return NAME;
}
@Override
public boolean isThreadSafe() {
// ICU4J Collators are not thread-safe unless they are frozen.
// TODO: Look into calling freeze() on them to be able return true here.
return false;
}
@Override
public boolean isNullable() {
return getChildren().get(0).isNullable();
}
}