blob: 7f5fab8a40095bc0376bff6f3ce0e10c6182b859 [file] [log] [blame]
"SECTION","TOPIC","SYNTAX","TEXT","EXAMPLE"
"Commands","SELECT","
SELECT [/*+ hint */] [DISTINCT | ALL] selectExpression [,...]
FROM tableExpression [( columnDef [,...] )] [ WHERE expression ]
[ GROUP BY expression [,...] ] [ HAVING expression ]
[ ORDER BY order [,...] ] [ LIMIT {bindParameter | number} ]
","
Selects data from a table.
DISTINCT filters out duplicate results while ALL, the default, includes all results.
FROM identifies the table being queried (single table only currently - no joins or derived tables yet).
Dynamic columns not declared at create time may be defined in parenthesis after the table name and then
used in the query.
GROUP BY groups the the result by the given expression(s).
HAVING filter rows after grouping.
ORDER BY sorts the result by the given column(s) or expression(s) and is only allowed for aggregate
queries or queries with a LIMIT clause.
LIMIT limits the number of rows returned by the query with no limit applied if specified as null or
less than zero. The LIMIT clause is executed after the ORDER BY clause to support TopN type queries.
An optional hint overrides the default query plan.
","
SELECT * FROM TEST;
SELECT a.* FROM TEST;
SELECT DISTINCT NAME FROM TEST;
SELECT ID, COUNT(1) FROM TEST GROUP BY ID;
SELECT NAME, SUM(VAL) FROM TEST GROUP BY NAME HAVING COUNT(1) > 2;
SELECT 'ID' COL, MAX(ID) AS MAX FROM TEST;
SELECT * FROM TEST LIMIT 1000;
"
"Commands","UPSERT VALUES","
UPSERT INTO tableName [( { columnRef | columnDef } [,...] )] VALUES ( constantTerm [,...] )
","
Inserts if not present and updates otherwise the value in the table. The list of
columns is optional and if not present, the values will map to the column in the
order they are declared in the schema. The values must evaluate to constants.
","
UPSERT INTO TEST VALUES('foo','bar',3);
UPSERT INTO TEST(NAME,ID) VALUES('foo',123);
"
"Commands","UPSERT SELECT","
UPSERT [/*+ hint */] INTO tableName [( { columnRef | columnDef } [,...] )] select
","
Inserts if not present and updates otherwise rows in the table based on the
results of running another query. The values are set based on their matching
position between the source and target tables. The list of columns is optional
and if not present will map to the column in the order they are declared in the
schema. If auto commit is on, and both a) the target
table matches the source table, and b) the select performs no aggregation, then the population of the target table will
be done completely on the server-side (with constraint violations logged, but otherwise ignored). Otherwise, data is
buffered on the client and, if auto commit is on, committed in row batches as specified by the UpsertBatchSize
connection property (or the phoenix.mutate.upsertBatchSize HBase config property which defaults to 10000 rows)
","
UPSERT INTO test.targetTable(col1, col2) SELECT col3, col4 FROM test.sourceTable WHERE col5 < 100
UPSERT INTO foo SELECT * FROM bar;
"
"Commands","DELETE","
DELETE [/*+ hint */] FROM tableName [ WHERE expression ]
[ ORDER BY order [,...] ] [ LIMIT {bindParameter | number} ]
","
Deletes the rows selected by the where clause. If auto commit is on, the
deletion is performed completely server-side.
","
DELETE FROM TEST;
DELETE FROM TEST WHERE ID=123;
DELETE FROM TEST WHERE NAME LIKE 'foo%';
"
"Commands","CREATE","
CREATE { TABLE | VIEW } [IF NOT EXISTS] tableRef
( columnDef [,...] [constraint] )
[tableOptions] [ SPLIT ON ( splitPoint [,...] ) ]
","
Creates a new table or view. For the creation of a table, the HBase table and any column families referenced are created
if they don't already exist (using uppercase names unless they are double quoted in which case they are case sensitive).
Column families outside of the ones listed are not affected.
At create time, an empty key value is added to the first column family of any existing rows. Upserts will also add this empty key value. This
is done to improve query performance by having a key value column we can guarantee always being there (minimizing the amount of data that
must be projected). Alternately, if a view is
created, the HBase table and column families must already exist. No empty key value is added to existing rows and no
data mutations are allowed - the view is read-only. Query performance for a view will not be as good as performance for
a table. For a table only, HBase table and column configuration options may
be passed through as key/value pairs to setup the HBase table as needed.
","
CREATE TABLE my_schema.my_table ( id BIGINT not null primary key, date DATE not null)
CREATE TABLE my_table ( id INTEGER not null primary key desc, date DATE not null,
m.db_utilization DECIMAL, i.db_utilization)
m.DATA_BLOCK_ENCODING='DIFF'
CREATE TABLE stats.prod_metrics ( host char(50) not null, created_date date not null,
txn_count bigint CONSTRAINT pk PRIMARY KEY (host, created_date) )
CREATE TABLE IF NOT EXISTS my_table ( id char(10) not null primary key, value integer)
DATA_BLOCK_ENCODING='NONE',VERSIONS=?,MAX_FILESIZE=2000000 split on (?, ?, ?)
"
"Commands","DROP","
DROP {TABLE | VIEW} [IF EXISTS] tableRef
","
Drops a table or view. When dropping a table, the data in the table is deleted. For a view, on the other hand, the data
is not affected. Note that the schema is versioned, such that snapshot queries connecting at an earlier time stamp may
still query against the dropped table, as the HBase table itself is not deleted.
","
DROP TABLE my_schema.my_table
DROP VIEW my_view
"
"Commands","ALTER TABLE","
ALTER TABLE tableRef { { ADD [IF NOT EXISTS] columnDef [options] } | { DROP COLUMN [IF EXISTS] columnRef } | { SET options } }
","
Alters an existing table by adding or removing a column or updating table options. When a column is dropped from a table, the data in
that column is deleted as well. PK columns may not be dropped, and only nullable PK columns may be added. For a view,
the data is not affected when a column is dropped. Note that creating or dropping columns
only affects subsequent queries and data modifications. Snapshot queries that are connected at an earlier timestamp
will still use the prior schema that was in place when the data was written.
","
ALTER TABLE my_schema.my_table ADD d.dept_id char(10) VERSIONS=10
ALTER TABLE my_table ADD dept_name char(50)
ALTER TABLE my_table ADD parent_id char(15) null primary key
ALTER TABLE my_table DROP COLUMN d.dept_id
ALTER TABLE my_table DROP COLUMN dept_name
ALTER TABLE my_table DROP COLUMN parent_id
ALTER TABLE my_table SET IMMUTABLE_ROWS=true
"
"Commands","CREATE INDEX","
CREATE INDEX [IF NOT EXISTS] indexName
ON tableRef ( columnRef [ASC | DESC] [,...] )
[ INCLUDE ( columnRef [,...] ) ]
[indexOptions] [ SPLIT ON ( splitPoint [,...] ) ]
","
Creates a new secondary index on a table or view. The index will be automatically kept in sync with the table as the data changes.
At query time, the optimizer will use the index if it contains all columns referenced in the query and produces the most efficient
execution plan.
If a table has rows that are write-once and append-only, then the table may set the IMMUTABLE_ROWS property to true (either up-front
in the CREATE TABLE statement or afterwards in an ALTER TABLE statement). This reduces the overhead at write time to maintain the index.
Otherwise, if this property is not set on the table, then incremental index maintenance will be performed on the server side when
the data changes.
","
CREATE INDEX my_idx ON sales.opportunity(last_updated_date DESC)
CREATE INDEX my_idx ON log.event(created_date DESC) INCLUDE (name, payload) SALT_BUCKETS=10
CREATE INDEX IF NOT EXISTS my_comp_idx ON server_metrics ( gc_time DESC, created_date DESC )
DATA_BLOCK_ENCODING='NONE',VERSIONS=?,MAX_FILESIZE=2000000 split on (?, ?, ?)
"
"Commands","DROP INDEX","
DROP INDEX [IF EXISTS] indexName ON tableRef
","
Drops an index from a table. When dropping an index, the data in the index is deleted. Note that since metadata is versioned,
snapshot queries connecting at an earlier time stamp may still use the index, as the HBase table backing the index is not deleted.
","
DROP INDEX my_idx ON sales.opportunity
DROP INDEX IF EXISTS my_idx ON server_metrics
"
"Commands","ALTER INDEX","
ALTER INDEX [IF EXISTS] indexName ON tableRef { DISABLE | REBUILD | UNUSABLE | USABLE }
","
Alters the state of an existing index. DISABLE will cause the no further index maintenance to be performed on the index and it
will no longer be considered for use in queries. REBUILD will completely rebuild the index and upon completion will enable
the index to be used in queries again. UNUSABLE will cause the index to no longer be considered for use in queries, however
index maintenance will continue to be performed. USABLE will cause the index to again be considered for use in queries. Note
that a disabled index must be rebuild and cannot be set as USABLE.
","
ALTER INDEX my_idx ON sales.opportunity DISABLE
ALTER INDEX IF EXISTS my_idx ON server_metrics REBUILD
"
"Commands","EXPLAIN","
EXPLAIN {select|upsertSelect|delete}
","
Computes the logical steps necessary to execute the given command. Each step is represented as a string in a single column result set row.
","
EXPLAIN SELECT NAME, COUNT(*) FROM TEST GROUP BY NAME HAVING COUNT(*) > 2;
EXPLAIN SELECT entity_id FROM CORE.CUSTOM_ENTITY_DATA WHERE organization_id='00D300000000XHP' AND SUBSTR(entity_id,1,3) = '002' AND created_date < CURRENT_DATE()-1;
"
"Other Grammar","Constraint","
CONSTRAINT constraintName PRIMARY KEY (columnName [ASC | DESC] [,...])
","
Defines a multi-part primary key constraint. Each column may be declared to be
sorted in ascending or descending ordering. The default is ascending.
","
CONSTRAINT my_pk PRIMARY KEY (host,created_date)
CONSTRAINT my_pk PRIMARY KEY (host ASC,created_date DESC)
"
"Other Grammar","Options","
{ [familyName .] name= {value | bindParameter}} [,...]
","
Sets an option on an HBase table or column by modifying the respective HBase metadata.
The option applies to the named family or if omitted to all families if the name
references an HColumnDescriptor property. Otherwise, the option applies to the
HTableDescriptor.
One built-in option is SALT_BUCKETS. This option causes an extra byte to be transparently
prepended to every row key to ensure an even distribution of write load across all
your region servers. This is useful when your row key is always monotonically increasing
causing hot spotting on a single region server. The byte is determined by hashing the row
key and modding it with the SALT_BUCKETS value. The value may be from 1 to 256. If not
split points are defined for the table, it will automatically be pre-split at each possible
salt bucket value. For an excellent write-up of this technique, see
http://blog.sematext.com/2012/04/09/hbasewd-avoid-regionserver-hotspotting-despite-writing-records-with-sequential-keys/
Another built-in options is IMMUTABLE_ROWS. Only tables with immutable rows are allowed to have indexes.
Immutable rows are expected to be inserted once in their entirety and then never updated. This limitation will be removed
once incremental index maintenance has been implemented. The current implementation inserts the index rows when the data
row is inserted.
","
IMMUTABLE_ROWS=true
SALT_BUCKETS=10
DATA_BLOCK_ENCODING='NONE',a.VERSIONS=10
MAX_FILESIZE=2000000000,MEMSTORE_FLUSHSIZE=80000000
"
"Other Grammar","Hint","
name [,...]
","
Advanced features that overrides default query processing behavior. The
supported hints include 1) SKIP_SCAN to force a skip scan to be performed on the query when
it otherwise would not be. This option may improve performance if a query does
not include the leading primary key column, but does include other, very
selective primary key columns. 2) RANGE_SCAN to force a range scan to be
performed on the query. This option may improve performance if a query
filters on a range for non selective leading primary key column along
with other primary key columns 3) NO_INTRA_REGION_PARALLELIZATION to prevent the
spawning of multiple threads to process data within a single region. This
option is useful when the overall data set being queries is known to be
small. 4) NO_INDEX to force the data table to be used for a query, and
5) INDEX(<table_name> <index_name>...) to suggest which index to use for a given
query. Double quotes may be used to surround a table_name and/or index_name that
is case sensitive.
","
/*+ SKIP_SCAN */
/*+ RANGE_SCAN */
/*+ NO_INTRA_REGION_PARALLELIZATION */
/*+ NO_INDEX */
/*+ INDEX(employee emp_name_idx emp_start_date_idx) */
"
"Other Grammar","Column Def","
columnRef dataType [[NOT] NULL] [PRIMARY KEY [ASC | DESC] ]
","
Define a new primary key column. The column name is case insensitive by default and
case sensitive if double quoted. The sort order of a primary key may be ascending (ASC)
or descending. The default is ascending.
","
id char(15) not null primary key
key integer null
m.response_time bigint
"
"Other Grammar","Table Ref","
[schemaName .] tableName
","
References a table with an optional schema name qualifier
","
Sales.Contact
HR.Employee
Department
"
"Other Grammar","Column Ref","
[familyName .] columnName
","
References a column with an optional family name qualifier
","
e.salary
dept_name
"
"Other Grammar","Select Expression","
* | ( familyName . *) | term [ [ AS ] columnAlias ]
","
An expression in a SELECT statement. All columns in a table may be selected using
*, and all columns in a column family may be selected using <familyName>.*.
","
*
cf.*
ID AS VALUE
VALUE + 1 VALUE_PLUS_ONE
"
"Other Grammar","Split Point","
value | bindParameter
","
Defines a split point for a table. Use a bind parameter with
preparedStatement.setBinary(int,byte[]) to supply arbitrary bytes.
","
'A'
"
"Other Grammar","Table Expression","
{ [ schemaName. ] tableName } [ [ AS ] tableAlias ]
","
A reference to a table. Joins and sub queries are not currently supported.
","
PRODUCT_METRICS AS PM
"
"Other Grammar","Order","
{ expression } [ ASC | DESC ] [ NULLS { FIRST | LAST } ]
","
Sorts the result by an expression.
","
NAME DESC NULLS LAST
"
"Other Grammar","Expression","
andCondition [ { OR andCondition } [...] ]
","
Value or condition.
","
ID=1 OR NAME='Hi'
"
"Other Grammar","And Condition","
condition [ { AND condition } [...] ]
","
Value or condition.
","
ID=1 AND NAME='Hi'
"
"Other Grammar","Condition","
operand [ compare { operand }
| [ NOT ] IN ( { constantOperand [,...] } )
| [ NOT ] LIKE operand
| [ NOT ] BETWEEN operand AND operand
| IS [ NOT ] NULL ]
| NOT expression
| ( expression )
","
Boolean value or condition.
When comparing with LIKE, the wildcards characters are ""_"" (any one character)
and ""%"" (any characters). To search for the characters ""%"" and
""_"", the characters need to be escaped. The escape character is "" \ "" (backslash).
Patterns that end with an escape character are invalid and the expression returns NULL.
BETWEEN does an inclusive comparison for both operands.
","
NAME LIKE 'Jo%'
"
"Other Grammar","Compare","
<> | <= | >= | = | < | > | !=
","
Comparison operator. The operator != is the same as <>.
","
<>
"
"Other Grammar","Operand","
summand [ { || } summand [...] ]
","
A string concatenation.
","
'foo'|| s
"
"Other Grammar","Summand","
factor [ { + | - } factor [...] ]
","
An addition or subtraction of numeric or date type values
","
a + b
a - b
"
"Other Grammar","Factor","
term [ { * | / } term [...] ]
","
A multiplication or division.
","
c * d
e / 5
"
"Other Grammar","Term","
value
| bindParameter
| function
| case
| caseWhen
| ( operand )
| [ tableAlias. ] columnRef
| rowValueConstructor
","
A value.
","
'Hello'
"
"Other Grammar","Row Value Constructor","
( term { ,term } [...] )
","
A row value constructor is a list of other terms which are treated together as
a kind of composite structure. They may be compared to each other or to other
other terms. The main use case is 1) to enable efficiently stepping through
a set of rows in support of query-more type functionality, or 2) to allow
IN clause to perform point gets on composite row keys.
","
(col1, col2, 5)
"
"Other Grammar","Bind Parameter","
?
| :number
","
A parameters can be indexed, for example "":1"" meaning the first parameter.
","
:1
?
"
"Other Grammar","Value","
string | numeric | boolean | null
","
A literal value of any data type, or null.
","
10
"
"Other Grammar","Case","
CASE term { WHEN expression THEN term } [...]
[ ELSE expression ] END
","
Returns the first expression where the value is equal to the test expression. If
no else part is specified, return NULL.
","
CASE CNT WHEN 0 THEN 'No' WHEN 1 THEN 'One' ELSE 'Some' END
"
"Other Grammar","Case When","
CASE { WHEN expression THEN term} [...]
[ ELSE term ] END
","
Returns the first expression where the condition is true. If no else part is
specified, return NULL.
","
CASE WHEN CNT<10 THEN 'Low' ELSE 'High' END
"
"Other Grammar","Name","
{ { A-Z|_ } [ { A-Z|_|0-9 } [...] ] } | quotedName
","
Unquoted names are not case sensitive. There is no maximum name length.
","
my_column
"
"Other Grammar","Quoted Name","
""anythingExceptDoubleQuote""
","
Quoted names are case sensitive, and can contain spaces. There is no maximum
name length. Two double quotes can be used to create a single double quote
inside an identifier.
","
""first-name""
"
"Other Grammar","Alias","
name
","
An alias is a name that is only valid in the context of the statement.
","
A
"
"Other Grammar","Null","
NULL
","
NULL is a value without data type and means 'unknown value'.
","
NULL
"
"Other Grammar","Data Type","
charType | varcharType | decimalType | tinyintType | smallintType | integerType | bigintType | floatType | doubleType | timestampType | dateType | timeType | unsignedTinyintType | unsignedSmallintType | unsignedIntType | unsignedLongType | unsignedFloatType | unsignedDoubleType | binaryType | varbinaryType
","
A type name.
","
CHAR(15)
VARCHAR
VARCHAR(1000)
INTEGER
BINARY(200)
"
"Other Grammar","String","
'anythingExceptSingleQuote'
","
A string starts and ends with a single quote. Two single quotes can be used to
create a single quote inside a string.
","
'John''s car'
"
"Other Grammar","Boolean","
TRUE | FALSE
","
A boolean value.
","
TRUE
"
"Other Grammar","Numeric","
int | long | decimal
","
The data type of a numeric value is always the lowest possible for the given value.
If the number contains a dot this is decimal; otherwise it is int, long, or decimal (depending on the value).
","
SELECT -10.05
SELECT 5
SELECT 12345678912345
"
"Other Grammar","Int","
[ - ] number
","
The maximum integer number is 2147483647, the minimum is -2147483648.
","
10
"
"Other Grammar","Long","
[ - ] number
","
Long numbers are between -9223372036854775808 and 9223372036854775807.
","
100000
"
"Other Grammar","Decimal","
[ - ] { number [ . number ] }
","
A decimal number with fixed precision and scale.
Internally, ""java.lang.BigDecimal"" is used.
","
SELECT -10.5
"
"Other Grammar","Number","
0-9 [...]
","
The maximum length of the number depends on the data type used.
","
100
"
"Other Grammar","Comments","
-- anythingUntilEndOfLine | // anythingUntilEndOfLine | /* anythingUntilEndComment */
","
Comments can be used anywhere in a command and are ignored by the database. Line
comments end with a newline. Block comments cannot be nested, but can be
multiple lines long.
","
// This is a comment
"
"Data Types","INTEGER Type","
INTEGER
","
Possible values: -2147483648 to 2147483647.
Mapped to ""java.lang.Integer"". The binary representation is a 4 byte
integer with the sign bit flipped (so that negative values sorts before positive values).
","
INTEGER
"
"Data Types","UNSIGNED_INT Type","
UNSIGNED_INT
","
Possible values: 0 to 2147483647.
Mapped to ""java.lang.Integer"". The binary representation is a 4 byte
integer, matching the HBase Bytes.toBytes(int) method.
The purpose of this type is to map to existing HBase data that was serialized using
this HBase utility method. If that is not the case, use the regular signed type instead.
","
UNSIGNED_INT
"
"Data Types","BIGINT Type","
BIGINT
","
Possible values: -9223372036854775807 to 9223372036854775807.
Mapped to ""java.lang.Long"". The binary representation is an 8 byte
long with the sign bit flipped (so that negative values sorts before positive values).
","
BIGINT
"
"Data Types","UNSIGNED_LONG Type","
UNSIGNED_LONG
","
Possible values: 0 to 9223372036854775807.
Mapped to ""java.lang.Long"". The binary representation is an 8 byte
integer, matching the HBase Bytes.toBytes(long) method.
The purpose of this type is to map to existing HBase data that was serialized using
this HBase utility method. If that is not the case, use the regular signed type instead.
","
UNSIGNED_LONG
"
"Data Types","TINYINT Type","
TINYINT
","
Possible values: -128 to 127.
Mapped to ""java.lang.Byte"". The binary representation is a single byte,
with the sign bit flipped (so that negative values sorts before positive values).
","
TINYINT
"
"Data Types","UNSIGNED_TINYINT Type","
UNSIGNED_TINYINT
","
Possible values: 0 to 127.
Mapped to ""java.lang.Byte"". The binary representation is a single byte,
matching the HBase Bytes.toBytes(byte) method.
The purpose of this type is to map to existing HBase data that was serialized using
this HBase utility method. If that is not the case, use the regular signed type instead.
","
UNSIGNED_TINYINT
"
"Data Types","SMALLINT Type","
SMALLINT
","
Possible values: -32768 to 32767.
Mapped to ""java.lang.Short"". The binary representation is a 2 byte
short with the sign bit flipped (so that negative values sort before positive values).
","
SMALLINT
"
"Data Types","UNSIGNED_SMALLINT Type","
UNSIGNED_SMALLINT
","
Possible values: 0 to 32767.
Mapped to ""java.lang.Short"". The binary representation is an 2 byte
integer, matching the HBase Bytes.toBytes(short) method.
The purpose of this type is to map to existing HBase data that was serialized using
this HBase utility method. If that is not the case, use the regular signed type instead.
","
UNSIGNED_SMALLINT
"
"Data Types","FLOAT Type","
FLOAT
","
Possible values: -3.402823466 E + 38 to 3.402823466 E + 38.
Mapped to ""java.lang.Float"". The binary representation is an 4 byte
float with the sign bit flipped (so that negative values sort before
positive values).
","
FLOAT
"
"Data Types","UNSIGNED_FLOAT Type","
UNSIGNED_FLOAT
","
Possible values: 0 to 3.402823466 E + 38.
Mapped to ""java.lang.Float"". The binary representation is an 4 byte
float matching the HBase Bytes.toBytes(float) method.
The purpose of this type is to map to existing HBase data that was serialized using
this HBase utility method. If that is not the case, use the regular signed type instead.
","
UNSIGNED_FLOAT
"
"Data Types","DOUBLE Type","
DOUBLE
","
Possible values: -1.7976931348623158 E + 308 to 1.7976931348623158 E + 308.
Mapped to ""java.lang.Double"". The binary representation is an 8 byte
double with the sign bit flipped (so that negative values sort before positive value).
","
DOUBLE
"
"Data Types","UNSIGNED_DOUBLE Type","
UNSIGNED_DOUBLE
","
Possible values: 0 to 1.7976931348623158 E + 308.
Mapped to ""java.lang.Double"". The binary representation is an 8 byte
double matching the HBase Bytes.toBytes(double) method.
The purpose of this type is to map to existing HBase data that was serialized using
this HBase utility method. If that is not the case, use the regular signed type instead.
","
UNSIGNED_DOUBLE
"
"Data Types","DECIMAL Type","
DECIMAL
","
Data type with fixed precision and scale. The maximum precision is 18 digits.
Mapped to ""java.math.BigDecimal"". The binary representation is binary
comparable, variable length format. When used in a row key, it is terminated
with a null byte unless it is the last column.
","
DECIMAL
"
"Data Types","BOOLEAN Type","
BOOLEAN
","
Possible values: TRUE and FALSE.
Mapped to ""java.lang.Boolean"". The binary representation is a single byte with 0 for false and 1 for true
","
BOOLEAN
"
"Data Types","TIME Type","
TIME
","
The time data type. The format is yyyy-MM-dd hh:mm:ss, with both the date
and time parts maintained. Mapped to ""java.sql.Time"".
The binary representation is an 8 byte long (the number of milliseconds from the epoch).
","
TIME
"
"Data Types","DATE Type","
DATE
","
The date data type. The format is yyyy-MM-dd hh:mm:ss, with both the date
and time parts maintained to a millisecond accuracy. Mapped to ""java.sql.Date"".
The binary representation is an 8 byte long (the number of milliseconds from the epoch).
","
DATE
"
"Data Types","TIMESTAMP Type","
TIMESTAMP
","
The timestamp data type. The format is yyyy-MM-dd hh:mm:ss[.nnnnnnnnn].
Mapped to ""java.sql.Timestamp"" with an internal representation of the
number of nanos from the epoch. The binary representation is 12 bytes:
an 8 byte long for the epoch time plus a 4 byte integer for the nanos.
","
TIMESTAMP
"
"Data Types","VARCHAR Type","
VARCHAR [ ( precisionInt ) ]
","
A variable length String with an optional max byte length. The binary representation
is UTF8 matching the HBase Bytes.toBytes(String) method. When used in a row key, it
is terminated with a null byte unless it is the last column.
Mapped to ""java.lang.String"".
","
VARCHAR
VARCHAR(255)
"
"Data Types","CHAR Type","
CHAR ( precisionInt )
","
A fixed length String with single-byte characters. The binary representation
is UTF8 matching the HBase Bytes.toBytes(String) method.
Mapped to ""java.lang.String"".
","
CHAR(10)
"
"Data Types","BINARY Type","
BINARY ( precisionInt )
","
Raw fixed length byte array.
Mapped to ""byte[]"".
","
BINARY
"
"Data Types","VARBINARY Type","
VARBINARY
","
Raw variable length byte array.
Mapped to ""byte[]"".
","
VARBINARY
"
"Functions (Aggregate)","AVG","
AVG ( { numericTerm } )
","
The average (mean) value.
If no rows are selected, the result is NULL.
Aggregates are only allowed in select statements.
The returned value is of the same data type as the parameter.
","
AVG(X)
"
"Functions (Aggregate)","COUNT","
COUNT( [ DISTINCT ] { * | { term } } )
","
The count of all row, or of the non-null values.
This method returns a long.
When DISTINCT is used, it counts only distinct values.
If no rows are selected, the result is 0.
Aggregates are only allowed in select statements.
","
COUNT(*)
"
"Functions (Aggregate)","MAX","
MAX(term)
","
The highest value.
If no rows are selected, the result is NULL.
Aggregates are only allowed in select statements.
The returned value is of the same data type as the parameter.
","
MAX(NAME)
"
"Functions (Aggregate)","MIN","
MIN(term)
","
The lowest value.
If no rows are selected, the result is NULL.
Aggregates are only allowed in select statements.
The returned value is of the same data type as the parameter.
","
MIN(NAME)
"
"Functions (Aggregate)","SUM","
SUM( { numericTerm } )
","
The sum of all values.
If no rows are selected, the result is NULL.
Aggregates are only allowed in select statements.
The returned value is of the same data type as the parameter.
","
SUM(X)
"
"Functions (Aggregate)","PERCENTILE_CONT","
PERCENTILE_CONT( { numeric } ) WITHIN GROUP (ORDER BY { numericTerm } { ASC | DESC } )
","
The nth percentile of values in the column.
The percentile value can be between 0 and 1 inclusive.
Aggregates are only allowed in select statements.
The returned value is of decimal data type.
","
PERCENTILE_CONT( 0.9 ) WITHIN GROUP (ORDER BY X ASC)
"
"Functions (Aggregate)","PERCENTILE_DISC","
PERCENTILE_DIST( { numeric } ) WITHIN GROUP (ORDER BY { numericTerm } { ASC | DESC } )
","
PERCENTILE_DISC is an inverse distribution function that assumes a discrete distribution model. It takes a percentile value and a sort specification and returns an element from the set. Nulls are ignored in the calculation.
","
PERCENTILE_DISC( 0.9 ) WITHIN GROUP (ORDER BY X DESC)
"
"Functions (Aggregate)","PERCENT_RANK","
PERCENT_RANK( { numeric } ) WITHIN GROUP (ORDER BY { numericTerm } { ASC | DESC } )
","
The percentile rank for a hypothetical value, if inserted into the column.
Aggregates are only allowed in select statements.
The returned value is of decimal data type.
","
PERCENT_RANK( 100 ) WITHIN GROUP (ORDER BY X ASC)
"
"Functions (Aggregate)","STDDEV_POP","
STDDEV_POP( { numericTerm } )
","
The population standard deviation of all values.
Aggregates are only allowed in select statements.
The returned value is of decimal data type.
","
STDDEV_POP( X )
"
"Functions (Aggregate)","STDDEV_SAMP","
STDDEV_SAMP( { numericTerm } )
","
The sample standard deviation of all values.
Aggregates are only allowed in select statements.
The returned value is of decimal data type.
","
STDDEV_SAMP( X )
"
"Functions (Time and Date)","ROUND","
ROUND(timestampTerm, {'DAY' | 'HOUR' | 'MINUTE' | 'SECOND' | 'MILLISECOND'} [, multiplierNumber])
","
Rounds the timestamp to the nearest time unit specified. The multiplier is used to
round to a multiple of a time unit (i.e. 10 minute) and defaults to 1 if not specified.
This method returns a date.
","
ROUND(date, 'MINUTE', 30)
ROUND(time, 'HOUR')
"
"Functions (Time and Date)","TRUNCATE","
TRUNC(timestampTerm, {'DAY' | 'HOUR' | 'MINUTE' | 'SECOND' | 'MILLISECOND'} [, multiplierInt])
","
Truncates the timestamp to the next time unit closer to 0. The multiplier is used to
truncate to a multiple of a time unit (i.e. 10 minute) and defaults to 1 if not specified.
This method returns a date.
","
TRUNCATE(timestamp, 'SECOND', 30)
TRUNCATE(date, 'DAY', 7)
"
"Functions (String)","SUBSTR","
SUBSTR( stringTerm, startInt [, lengthInt ] )
","
Returns a substring of a string starting at the one-based position.
If zero is used, the position is zero-based. If the start index is
negative, then the start index is relative to the end of the string.
The length is optional and if not supplied, the rest of the string
will be returned.
","
SUBSTR('[Hello]', 2, 5)
SUBSTR('Hello World', -5)
"
"Functions (String)","TRIM","
TRIM( stringTerm )
","
Removes leading and trailing spaces from the input string.
","
TRIM(' Hello ')
"
"Functions (String)","LTRIM","
LTRIM( stringTerm )
","
Removes leading spaces from the input string.
","
LTRIM(' Hello')
"
"Functions (String)","RTRIM","
RTRIM( stringTerm )
","
Removes trailing spaces from the input string.
","
RTRIM('Hello ')
"
"Functions (String)","LENGTH","
LENGTH( stringTerm )
","
Returns the length of the string in characters.
","
LENGTH('Hello')
"
"Functions (String)","REGEXP_SUBSTR","
REGEXP_SUBSTR( stringTerm, patternString [, startInt ] )
","
Returns a substring of a string by applying a regular
expression start from the offset of a one-based position. Just like with SUBSTR,
if the start index is negative, then it is relative to the end of the string. If not
specified, the start index defaults to 1.
","
REGEXP_SUBSTR('na1-appsrv35-sj35', '[^-]+') evaluates to 'na1'
"
"Functions (String)","REGEXP_REPLACE","
REGEXP_REPLACE( stringTerm, patternString [, replacementString ] )
","
Returns a string by applying a regular expression and replacing the matches with the replacement
string. If the replacement string is not specified, it defaults to an empty string.
","
REGEXP_REPLACE('abc123ABC', '[0-9]+', '#') evaluates to 'abc#ABC'
"
"Functions (General)","MD5","
MD5( term )
","
Computes the MD5 hash of the argument, returning the result as a BINARY(16).
","
MD5(my_column)
"
"Functions (General)","INVERT","
INVERT( term )
","
Inverts the bits of the argument. The return type will be the same as the argument.
","
INVERT(my_column)
"
"Functions (General)","TO_NUMBER","
TO_NUMBER( stringTerm | timeStampTerm [, formatString] )
","
Formats a string or date/time/timeStamp as a number, optionally accepting a format string.
For details on the format, see ""java.text.DecimalFormat"". For date, time, and timeStamp
terms, the result is the time in milliseconds since the epoch.
This method returns a decimal number.
","
TO_NUMBER('$123.33', '\u00A4###.##')
"
"Functions (String)","UPPER","
UPPER( stringTerm )
","
Returns upper case string of the string argument.
","
UPPER('Hello')
"
"Functions (String)","LOWER","
LOWER( stringTerm )
","
Returns lower case string of the string argument.
","
LOWER('HELLO')
"
"Functions (String)","REVERSE","
REVERSE( stringTerm )
","
Returns reversed string of the string argument.
","
REVERSE('Hello')
"
"Functions (String)","TO_CHAR","
TO_CHAR( timestampTerm | numberTerm [, formatString] )
","
Formats a date, time, timestamp, or number as a string.
The default date format is ""yyyy-MM-dd HH:mm:ss"" and
the default number format is ""#,##0.###"".
For details, see ""java.text.SimpleDateFormat""
for date/time values and ""java.text.DecimalFormat"" for
numbers. This method returns a string.
","
TO_CHAR(myDate, '2001-02-03 04:05:06')
TO_CHAR(myDecimal, '#,##0.###')
"
"Functions (Time and Date)","TO_DATE","
TO_DATE( stringTerm [, formatString] )
","
Parses a string and returns a date.
The most important format characters are:
y year, M month, d day, H hour, m minute, s second.
The default format string is ""yyyy-MM-dd HH:mm:ss"".
For details of the format, see ""java.text.SimpleDateFormat"".
","
TO_DATE('Sat, 3 Feb 2001 03:05:06 GMT', 'EEE, d MMM yyyy HH:mm:ss z')
"
"Functions (Time and Date)","CURRENT_DATE","
CURRENT_DATE()
","
Returns the current server-side date, bound at the start of the execution
of a query based on the current time on the region server owning the
metadata of the table being queried.
","
CURRENT_DATE()
"
"Functions (Time and Date)","CURRENT_TIME","
CURRENT_TIME()
","
Same as CURRENT_DATE(), except returns a value of type TIME. In either case,
the underlying representation is the epoch time as a long value.
","
CURRENT_TIME()
"
"Functions (General)","COALESCE","
COALESCE( firstTerm, secondTerm )
","
Returns the value of the first argument if not null and the second argument otherwise. Useful
to guarantee that a column in an UPSERT SELECT command will evaluate to a non null value.
","
COALESCE(last_update_date, CURRENT_DATE())
"