blob: f086410f99cec714cd28823b9e63310eddffbc64 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.schema;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.apache.tajo.TajoConstants;
import org.apache.tajo.schema.IdentifierPolicy.IdentifierCase;
import org.apache.tajo.util.Pair;
import org.apache.tajo.util.StringUtils;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import static org.apache.tajo.schema.Identifier._;
import static org.apache.tajo.schema.IdentifierPolicy.DefaultPolicy;
import static org.apache.tajo.schema.QualifiedIdentifier.$;
/**
* Util methods for Identifiers
*/
public class IdentifierUtil {
public final static String IDENTIFIER_DELIMITER_REGEXP = "\\.";
public final static String IDENTIFIER_DELIMITER = ".";
public final static String IDENTIFIER_QUOTE_STRING = "\"";
public final static int MAX_IDENTIFIER_LENGTH = 128;
public static final Set<String> RESERVED_KEYWORDS_SET = new HashSet<>();
public static final String [] RESERVED_KEYWORDS = {
"AS", "ALL", "AND", "ANY", "ASYMMETRIC", "ASC",
"BOTH",
"CASE", "CAST", "CREATE", "CROSS", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
"DESC", "DISTINCT",
"END", "ELSE", "EXCEPT",
"FALSE", "FULL", "FROM",
"GROUP",
"HAVING",
"ILIKE", "IN", "INNER", "INTERSECT", "INTO", "IS",
"JOIN",
"LEADING", "LEFT", "LIKE", "LIMIT",
"NATURAL", "NOT", "NULL",
"ON", "OUTER", "OR", "ORDER",
"RIGHT",
"SELECT", "SOME", "SYMMETRIC",
"TABLE", "THEN", "TRAILING", "TRUE",
"OVER",
"UNION", "UNIQUE", "USING",
"WHEN", "WHERE", "WINDOW", "WITH"
};
public static QualifiedIdentifier makeIdentifier(String raw, IdentifierPolicy policy) {
if (raw == null || raw.equals("")) {
return $(raw);
}
final String [] splitted = raw.split(IDENTIFIER_DELIMITER_REGEXP);
final ImmutableList.Builder<Identifier> builder = ImmutableList.builder();
for (String part : splitted) {
boolean quoted = isDelimited(part, policy);
if (quoted) {
builder.add(_(stripQuote(part), true));
} else {
builder.add(internIdentifierPart(part, policy));
}
}
return $(builder.build());
}
public static Identifier internIdentifierPart(String rawPart, IdentifierPolicy policy) {
IdentifierCase unquotedIdentifierCase = policy.storesUnquotedIdentifierAs();
final String interned;
if (unquotedIdentifierCase == IdentifierCase.LowerCase) {
interned = rawPart.toLowerCase(Locale.ENGLISH);
} else if (unquotedIdentifierCase == IdentifierCase.UpperCase) {
interned = rawPart.toUpperCase(Locale.ENGLISH);
} else {
interned = rawPart;
}
return _(interned, false);
}
/**
* Normalize an identifier. Normalization means a translation from a identifier to be a refined identifier name.
*
* Identifier can be composed of multiple parts as follows:
* <pre>
* database_name.table_name.column_name
* </pre>
*
* Each regular identifier part can be composed alphabet ([a-z][A-Z]), number([0-9]), and underscore([_]).
* Also, the first letter must be an alphabet character.
*
* <code>normalizeIdentifier</code> normalizes each part of an identifier.
*
* In detail, for each part, it performs as follows:
* <ul>
* <li>changing a part without double quotation to be lower case letters</li>
* <li>eliminating double quotation marks from identifier</li>
* </ul>
*
* @param identifier The identifier to be normalized
* @return The normalized identifier
*/
public static String normalizeIdentifier(String identifier) {
if (identifier == null || identifier.equals("")) {
return identifier;
}
String [] splitted = identifier.split(IDENTIFIER_DELIMITER_REGEXP);
StringBuilder sb = new StringBuilder();
boolean first = true;
for (String part : splitted) {
if (first) {
first = false;
} else {
sb.append(IDENTIFIER_DELIMITER);
}
sb.append(normalizeIdentifierPart(part));
}
return sb.toString();
}
public static String normalizeIdentifierPart(String part) {
return isDelimited(part) ? stripQuote(part) : part.toLowerCase();
}
/**
* Denormalize an identifier. Denormalize means a translation from a stored identifier
* to be a printable identifier name.
*
* In detail, for each part, it performs as follows:
* <ul>
* <li>changing a part including upper case character or non-ascii character to be lower case letters</li>
* <li>eliminating double quotation marks from identifier</li>
* </ul>
*
* @param identifier The identifier to be normalized
* @return The denormalized identifier
*/
public static String denormalizeIdentifier(String identifier) {
String [] splitted = identifier.split(IDENTIFIER_DELIMITER_REGEXP);
StringBuilder sb = new StringBuilder();
boolean first = true;
for (String part : splitted) {
if (first) {
first = false;
} else {
sb.append(IDENTIFIER_DELIMITER);
}
sb.append(denormalizePart(part));
}
return sb.toString();
}
public static String denormalizePart(String identifier) {
if (isShouldBeQuoted(identifier)) {
return StringUtils.doubleQuote(identifier);
} else {
return identifier;
}
}
public static boolean isShouldBeQuoted(String interned) {
for (char character : interned.toCharArray()) {
if (Character.isUpperCase(character)) {
return true;
}
if (!StringUtils.isPartOfAnsiSQLIdentifier(character)) {
return true;
}
if (RESERVED_KEYWORDS_SET.contains(interned.toUpperCase())) {
return true;
}
}
return false;
}
public static String stripQuote(String raw) {
return raw.substring(1, raw.length() - 1);
}
public static boolean isDelimited(String raw) {
return isDelimited(raw, DefaultPolicy());
}
public static boolean isDelimited(String raw, IdentifierPolicy policy) {
char quoteString = policy.getIdentifierQuoteString();
boolean openQuote = raw.charAt(0) == quoteString;
boolean closeQuote = raw.charAt(raw.length() - 1) == quoteString;
// if at least one quote mark exists, the identifier must be grater than equal to 2 characters,
if (openQuote ^ closeQuote && raw.length() < 2) {
throw new IllegalArgumentException("Invalid Identifier: " + raw);
}
// does not allow the empty identifier (''),
if (openQuote && closeQuote && raw.length() == 2) {
throw new IllegalArgumentException("zero-length delimited identifier: " + raw);
}
// Ensure the quote open and close
return openQuote && closeQuote;
}
/**
* True if a given name is a simple identifier, meaning is not a dot-chained name.
*
* @param columnOrTableName Column or Table name to be checked
* @return True if a given name is a simple identifier. Otherwise, it will return False.
*/
public static boolean isSimpleIdentifier(String columnOrTableName) {
return columnOrTableName.split(IDENTIFIER_DELIMITER_REGEXP).length == 1;
}
public static boolean isFQColumnName(String tableName) {
return tableName.split(IDENTIFIER_DELIMITER_REGEXP).length == 3;
}
public static boolean isFQTableName(String tableName) {
int lastDelimiterIdx = tableName.lastIndexOf(IDENTIFIER_DELIMITER);
return lastDelimiterIdx > -1;
}
public static String [] splitFQTableName(String qualifiedName) {
String [] splitted = splitTableName(qualifiedName);
if (splitted.length == 1) {
throw new IllegalArgumentException("Table name is expected to be qualified, but was \""
+ qualifiedName + "\".");
}
return splitted;
}
public static String [] splitTableName(String tableName) {
int lastDelimiterIdx = tableName.lastIndexOf(IDENTIFIER_DELIMITER);
if (lastDelimiterIdx > -1) {
return new String [] {
tableName.substring(0, lastDelimiterIdx),
tableName.substring(lastDelimiterIdx + 1, tableName.length())
};
} else {
return new String [] {tableName};
}
}
public static String buildFQName(String... identifiers) {
boolean first = true;
StringBuilder sb = new StringBuilder();
for(String id : identifiers) {
if (first) {
first = false;
} else {
sb.append(IDENTIFIER_DELIMITER);
}
sb.append(id);
}
return sb.toString();
}
public static Pair<String, String> separateQualifierAndName(String name) {
Preconditions.checkArgument(isFQTableName(name), "Must be a qualified name.");
return new Pair<>(extractQualifier(name), extractSimpleName(name));
}
/**
* Extract a qualification name from an identifier.
*
* For example, consider a table identifier like 'database1.table1'.
* In this case, this method extracts 'database1'.
*
* @param name The identifier to be extracted
* @return The extracted qualifier
*/
public static String extractQualifier(String name) {
int lastDelimiterIdx = name.lastIndexOf(IDENTIFIER_DELIMITER);
if (lastDelimiterIdx > -1) {
return name.substring(0, lastDelimiterIdx);
} else {
return TajoConstants.EMPTY_STRING;
}
}
/**
* Extract a simple name from an identifier.
*
* For example, consider a table identifier like 'database1.table1'.
* In this case, this method extracts 'table1'.
*
* @param name The identifier to be extracted
* @return The extracted simple name
*/
public static String extractSimpleName(String name) {
int lastDelimiterIdx = name.lastIndexOf(IDENTIFIER_DELIMITER);
if (lastDelimiterIdx > -1) {
// plus one means skipping a delimiter.
return name.substring(lastDelimiterIdx + 1, name.length());
} else {
return name;
}
}
public static String getCanonicalTableName(String databaseName, String tableName) {
StringBuilder sb = new StringBuilder(databaseName);
sb.append(IDENTIFIER_DELIMITER);
sb.append(tableName);
return sb.toString();
}
}