blob: 8972beed15333de91cccdc1c4b361bcf66d12a8e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.cql3.functions.types;
import java.text.*;
import java.util.Date;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
/**
* Simple utility class used to help parsing CQL values (mainly UDT and collection ones).
*/
public abstract class ParseUtils
{
/**
* Valid ISO-8601 patterns for CQL timestamp literals.
*/
private static final String[] iso8601Patterns =
new String[]{
"yyyy-MM-dd HH:mm",
"yyyy-MM-dd HH:mm:ss",
"yyyy-MM-dd HH:mmZ",
"yyyy-MM-dd HH:mm:ssZ",
"yyyy-MM-dd HH:mm:ss.SSS",
"yyyy-MM-dd HH:mm:ss.SSSZ",
"yyyy-MM-dd'T'HH:mm",
"yyyy-MM-dd'T'HH:mmZ",
"yyyy-MM-dd'T'HH:mm:ss",
"yyyy-MM-dd'T'HH:mm:ssZ",
"yyyy-MM-dd'T'HH:mm:ss.SSS",
"yyyy-MM-dd'T'HH:mm:ss.SSSZ",
"yyyy-MM-dd",
"yyyy-MM-ddZ"
};
/**
* Returns the index of the first character in toParse from idx that is not a "space".
*
* @param toParse the string to skip space on.
* @param idx the index to start skipping space from.
* @return the index of the first character in toParse from idx that is not a "space.
*/
static int skipSpaces(String toParse, int idx)
{
while (isBlank(toParse.charAt(idx))) ++idx;
return idx;
}
/**
* Assuming that idx points to the beginning of a CQL value in toParse, returns the index of the
* first character after this value.
*
* @param toParse the string to skip a value form.
* @param idx the index to start parsing a value from.
* @return the index ending the CQL value starting at {@code idx}.
* @throws IllegalArgumentException if idx doesn't point to the start of a valid CQL value.
*/
static int skipCQLValue(String toParse, int idx)
{
if (idx >= toParse.length()) throw new IllegalArgumentException();
if (isBlank(toParse.charAt(idx))) throw new IllegalArgumentException();
int cbrackets = 0;
int sbrackets = 0;
int parens = 0;
boolean inString = false;
do
{
char c = toParse.charAt(idx);
if (inString)
{
if (c == '\'')
{
if (idx + 1 < toParse.length() && toParse.charAt(idx + 1) == '\'')
{
++idx; // this is an escaped quote, skip it
}
else
{
inString = false;
if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1;
}
}
// Skip any other character
}
else if (c == '\'')
{
inString = true;
}
else if (c == '{')
{
++cbrackets;
}
else if (c == '[')
{
++sbrackets;
}
else if (c == '(')
{
++parens;
}
else if (c == '}')
{
if (cbrackets == 0) return idx;
--cbrackets;
if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1;
}
else if (c == ']')
{
if (sbrackets == 0) return idx;
--sbrackets;
if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1;
}
else if (c == ')')
{
if (parens == 0) return idx;
--parens;
if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1;
}
else if (isBlank(c) || !isIdentifierChar(c))
{
if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx;
}
} while (++idx < toParse.length());
if (inString || cbrackets != 0 || sbrackets != 0 || parens != 0)
throw new IllegalArgumentException();
return idx;
}
/**
* Assuming that idx points to the beginning of a CQL identifier in toParse, returns the index of
* the first character after this identifier.
*
* @param toParse the string to skip an identifier from.
* @param idx the index to start parsing an identifier from.
* @return the index ending the CQL identifier starting at {@code idx}.
* @throws IllegalArgumentException if idx doesn't point to the start of a valid CQL identifier.
*/
static int skipCQLId(String toParse, int idx)
{
if (idx >= toParse.length()) throw new IllegalArgumentException();
char c = toParse.charAt(idx);
if (isIdentifierChar(c))
{
while (idx < toParse.length() && isIdentifierChar(toParse.charAt(idx))) idx++;
return idx;
}
if (c != '"') throw new IllegalArgumentException();
while (++idx < toParse.length())
{
c = toParse.charAt(idx);
if (c != '"') continue;
if (idx + 1 < toParse.length() && toParse.charAt(idx + 1) == '\"')
++idx; // this is an escaped double quote, skip it
else return idx + 1;
}
throw new IllegalArgumentException();
}
/**
* Return {@code true} if the given character is allowed in a CQL identifier, that is, if it is in
* the range: {@code [0..9a..zA..Z-+._&]}.
*
* @param c The character to inspect.
* @return {@code true} if the given character is allowed in a CQL identifier, {@code false}
* otherwise.
*/
static boolean isIdentifierChar(int c)
{
return (c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '-'
|| c == '+'
|| c == '.'
|| c == '_'
|| c == '&';
}
/**
* Return {@code true} if the given character is a valid whitespace character in CQL, that is, if
* it is a regular space, a tabulation sign, or a new line sign.
*
* @param c The character to inspect.
* @return {@code true} if the given character is a valid whitespace character, {@code false}
* otherwise.
*/
static boolean isBlank(int c)
{
return c == ' ' || c == '\t' || c == '\n';
}
/**
* Check whether the given string corresponds to a valid CQL long literal. Long literals are
* composed solely by digits, but can have an optional leading minus sign.
*
* @param str The string to inspect.
* @return {@code true} if the given string corresponds to a valid CQL integer literal, {@code
* false} otherwise.
*/
static boolean isLongLiteral(String str)
{
if (str == null || str.isEmpty()) return false;
char[] chars = str.toCharArray();
for (int i = 0; i < chars.length; i++)
{
char c = chars[i];
if ((c < '0' && (i != 0 || c != '-')) || c > '9') return false;
}
return true;
}
/**
* Return {@code true} if the given string is surrounded by single quotes, and {@code false}
* otherwise.
*
* @param value The string to inspect.
* @return {@code true} if the given string is surrounded by single quotes, and {@code false}
* otherwise.
*/
static boolean isQuoted(String value)
{
return isQuoted(value, '\'');
}
/**
* Quote the given string; single quotes are escaped. If the given string is null, this method
* returns a quoted empty string ({@code ''}).
*
* @param value The value to quote.
* @return The quoted string.
*/
public static String quote(String value)
{
return quote(value, '\'');
}
/**
* Unquote the given string if it is quoted; single quotes are unescaped. If the given string is
* not quoted, it is returned without any modification.
*
* @param value The string to unquote.
* @return The unquoted string.
*/
static String unquote(String value)
{
return unquote(value, '\'');
}
/**
* Double quote the given string; double quotes are escaped. If the given string is null, this
* method returns a quoted empty string ({@code ""}).
*
* @param value The value to double quote.
* @return The double quoted string.
*/
static String doubleQuote(String value)
{
return quote(value, '"');
}
/**
* Unquote the given string if it is double quoted; double quotes are unescaped. If the given
* string is not double quoted, it is returned without any modification.
*
* @param value The string to un-double quote.
* @return The un-double quoted string.
*/
static String unDoubleQuote(String value)
{
return unquote(value, '"');
}
/**
* Parse the given string as a date, using one of the accepted ISO-8601 date patterns.
*
* <p>This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is
* used Cassandra side to parse date strings)..
*
* @throws ParseException If the given string is not a valid ISO-8601 date.
* @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtimestamps">'Working with
* timestamps' section of CQL specification</a>
*/
static Date parseDate(String str) throws ParseException
{
SimpleDateFormat parser = new SimpleDateFormat();
parser.setLenient(false);
// set a default timezone for patterns that do not provide one
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
// Java 6 has very limited support for ISO-8601 time zone formats,
// so we need to transform the string first
// so that accepted patterns are correctly handled,
// such as Z for UTC, or "+00:00" instead of "+0000".
// Note: we cannot use the X letter in the pattern
// because it has been introduced in Java 7.
str = str.replaceAll("(\\+|\\-)(\\d\\d):(\\d\\d)$", "$1$2$3");
str = str.replaceAll("Z$", "+0000");
ParsePosition pos = new ParsePosition(0);
for (String parsePattern : iso8601Patterns)
{
parser.applyPattern(parsePattern);
pos.setIndex(0);
Date date = parser.parse(str, pos);
if (date != null && pos.getIndex() == str.length())
{
return date;
}
}
throw new ParseException("Unable to parse the date: " + str, -1);
}
/**
* Parse the given string as a date, using the supplied date pattern.
*
* <p>This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is
* used Cassandra side to parse date strings)..
*
* @throws ParseException If the given string cannot be parsed with the given pattern.
* @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtimestamps">'Working with
* timestamps' section of CQL specification</a>
*/
static Date parseDate(String str, String pattern) throws ParseException
{
SimpleDateFormat parser = new SimpleDateFormat();
parser.setLenient(false);
// set a default timezone for patterns that do not provide one
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
// Java 6 has very limited support for ISO-8601 time zone formats,
// so we need to transform the string first
// so that accepted patterns are correctly handled,
// such as Z for UTC, or "+00:00" instead of "+0000".
// Note: we cannot use the X letter in the pattern
// because it has been introduced in Java 7.
str = str.replaceAll("(\\+|\\-)(\\d\\d):(\\d\\d)$", "$1$2$3");
str = str.replaceAll("Z$", "+0000");
ParsePosition pos = new ParsePosition(0);
parser.applyPattern(pattern);
pos.setIndex(0);
Date date = parser.parse(str, pos);
if (date != null && pos.getIndex() == str.length())
{
return date;
}
throw new ParseException("Unable to parse the date: " + str, -1);
}
/**
* Parse the given string as a time, using the following time pattern: {@code
* hh:mm:ss[.fffffffff]}.
*
* <p>This method is loosely based on {@code java.sql.Timestamp}.
*
* @param str The string to parse.
* @return A long value representing the number of nanoseconds since midnight.
* @throws ParseException if the string cannot be parsed.
* @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtime">'Working with time'
* section of CQL specification</a>
*/
static long parseTime(String str) throws ParseException
{
String nanos_s;
long hour;
long minute;
long second;
long a_nanos = 0;
String formatError = "Timestamp format must be hh:mm:ss[.fffffffff]";
String zeros = "000000000";
if (str == null) throw new IllegalArgumentException(formatError);
str = str.trim();
// Parse the time
int firstColon = str.indexOf(':');
int secondColon = str.indexOf(':', firstColon + 1);
// Convert the time; default missing nanos
if (firstColon > 0 && secondColon > 0 && secondColon < str.length() - 1)
{
int period = str.indexOf('.', secondColon + 1);
hour = Integer.parseInt(str.substring(0, firstColon));
if (hour < 0 || hour >= 24) throw new IllegalArgumentException("Hour out of bounds.");
minute = Integer.parseInt(str.substring(firstColon + 1, secondColon));
if (minute < 0 || minute >= 60) throw new IllegalArgumentException("Minute out of bounds.");
if (period > 0 && period < str.length() - 1)
{
second = Integer.parseInt(str.substring(secondColon + 1, period));
if (second < 0 || second >= 60) throw new IllegalArgumentException("Second out of bounds.");
nanos_s = str.substring(period + 1);
if (nanos_s.length() > 9) throw new IllegalArgumentException(formatError);
if (!Character.isDigit(nanos_s.charAt(0))) throw new IllegalArgumentException(formatError);
nanos_s = nanos_s + zeros.substring(0, 9 - nanos_s.length());
a_nanos = Integer.parseInt(nanos_s);
}
else if (period > 0) throw new ParseException(formatError, -1);
else
{
second = Integer.parseInt(str.substring(secondColon + 1));
if (second < 0 || second >= 60) throw new ParseException("Second out of bounds.", -1);
}
}
else throw new ParseException(formatError, -1);
long rawTime = 0;
rawTime += TimeUnit.HOURS.toNanos(hour);
rawTime += TimeUnit.MINUTES.toNanos(minute);
rawTime += TimeUnit.SECONDS.toNanos(second);
rawTime += a_nanos;
return rawTime;
}
/**
* Format the given long value as a CQL time literal, using the following time pattern: {@code
* hh:mm:ss[.fffffffff]}.
*
* @param value A long value representing the number of nanoseconds since midnight.
* @return The formatted value.
* @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtime">'Working with time'
* section of CQL specification</a>
*/
static String formatTime(long value)
{
int nano = (int) (value % 1000000000);
value -= nano;
value /= 1000000000;
int seconds = (int) (value % 60);
value -= seconds;
value /= 60;
int minutes = (int) (value % 60);
value -= minutes;
value /= 60;
int hours = (int) (value % 24);
value -= hours;
value /= 24;
assert (value == 0);
StringBuilder sb = new StringBuilder();
leftPadZeros(hours, 2, sb);
sb.append(':');
leftPadZeros(minutes, 2, sb);
sb.append(':');
leftPadZeros(seconds, 2, sb);
sb.append('.');
leftPadZeros(nano, 9, sb);
return sb.toString();
}
/**
* Return {@code true} if the given string is surrounded by the quote character given, and {@code
* false} otherwise.
*
* @param value The string to inspect.
* @return {@code true} if the given string is surrounded by the quote character, and {@code
* false} otherwise.
*/
private static boolean isQuoted(String value, char quoteChar)
{
return value != null
&& value.length() > 1
&& value.charAt(0) == quoteChar
&& value.charAt(value.length() - 1) == quoteChar;
}
/**
* @param quoteChar " or '
* @return A quoted empty string.
*/
private static String emptyQuoted(char quoteChar)
{
// don't handle non quote characters, this is done so that these are interned and don't create
// repeated empty quoted strings.
assert quoteChar == '"' || quoteChar == '\'';
if (quoteChar == '"') return "\"\"";
else return "''";
}
/**
* Quotes text and escapes any existing quotes in the text. {@code String.replace()} is a bit too
* inefficient (see JAVA-67, JAVA-1262).
*
* @param text The text.
* @param quoteChar The character to use as a quote.
* @return The text with surrounded in quotes with all existing quotes escaped with (i.e. '
* becomes '')
*/
private static String quote(String text, char quoteChar)
{
if (text == null || text.isEmpty()) return emptyQuoted(quoteChar);
int nbMatch = 0;
int start = -1;
do
{
start = text.indexOf(quoteChar, start + 1);
if (start != -1) ++nbMatch;
} while (start != -1);
// no quotes found that need to be escaped, simply surround in quotes and return.
if (nbMatch == 0) return quoteChar + text + quoteChar;
// 2 for beginning and end quotes.
// length for original text
// nbMatch for escape characters to add to quotes to be escaped.
int newLength = 2 + text.length() + nbMatch;
char[] result = new char[newLength];
result[0] = quoteChar;
result[newLength - 1] = quoteChar;
int newIdx = 1;
for (int i = 0; i < text.length(); i++)
{
char c = text.charAt(i);
if (c == quoteChar)
{
// escape quote with another occurrence.
result[newIdx++] = c;
result[newIdx++] = c;
}
else
{
result[newIdx++] = c;
}
}
return new String(result);
}
/**
* Unquotes text and unescapes non surrounding quotes. {@code String.replace()} is a bit too
* inefficient (see JAVA-67, JAVA-1262).
*
* @param text The text
* @param quoteChar The character to use as a quote.
* @return The text with surrounding quotes removed and non surrounding quotes unescaped (i.e. ''
* becomes ')
*/
private static String unquote(String text, char quoteChar)
{
if (!isQuoted(text, quoteChar)) return text;
if (text.length() == 2) return "";
String search = emptyQuoted(quoteChar);
int nbMatch = 0;
int start = -1;
do
{
start = text.indexOf(search, start + 2);
// ignore the second to last character occurrence, as the last character is a quote.
if (start != -1 && start != text.length() - 2) ++nbMatch;
} while (start != -1);
// no escaped quotes found, simply remove surrounding quotes and return.
if (nbMatch == 0) return text.substring(1, text.length() - 1);
// length of the new string will be its current length - the number of occurrences.
int newLength = text.length() - nbMatch - 2;
char[] result = new char[newLength];
int newIdx = 0;
// track whenever a quoteChar is encountered and the previous character is not a quoteChar.
boolean firstFound = false;
for (int i = 1; i < text.length() - 1; i++)
{
char c = text.charAt(i);
if (c == quoteChar)
{
if (firstFound)
{
// The previous character was a quoteChar, don't add this to result, this action in
// effect removes consecutive quotes.
firstFound = false;
}
else
{
// found a quoteChar and the previous character was not a quoteChar, include in result.
firstFound = true;
result[newIdx++] = c;
}
}
else
{
// non quoteChar encountered, include in result.
result[newIdx++] = c;
firstFound = false;
}
}
return new String(result);
}
private static void leftPadZeros(int value, int digits, StringBuilder sb)
{
sb.append(String.format("%0" + digits + 'd', value));
}
private ParseUtils()
{
}
}