docs/topics/impala_alter_table.xml - impala - Git at Google

 <?xml version="1.0" encoding="UTF-8"?>
 <!--
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
 regarding copyright ownership.  The ASF licenses this file
 to you under the Apache License, Version 2.0 (the
 "License"); you may not use this file except in compliance
 with the License.  You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing,
 software distributed under the License is distributed on an
 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 KIND, either express or implied.  See the License for the
 specific language governing permissions and limitations
 under the License.
 -->
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
 <concept id="alter_table">

   <title>ALTER TABLE Statement</title>

   <titlealts audience="PDF">

     <navtitle>ALTER TABLE</navtitle>

   </titlealts>

   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="Impala Data Types"/>
       <data name="Category" value="SQL"/>
       <data name="Category" value="DDL"/>
       <data name="Category" value="HDFS Caching"/>
       <data name="Category" value="Tables"/>
       <data name="Category" value="Schemas"/>
       <data name="Category" value="S3"/>
       <data name="Category" value="Developers"/>
       <data name="Category" value="Data Analysts"/>
       <data name="Category" value="Kudu"/>
     </metadata>
   </prolog>

   <conbody>

     <p> The <codeph>ALTER TABLE</codeph> statement changes the structure or
       properties of an existing Impala table. </p>

     <p>
       In Impala, this is primarily a logical operation that updates the table metadata in the
       metastore database that Impala shares with Hive. Most <codeph>ALTER TABLE</codeph>
       operations do not actually rewrite, move, and so on the actual data files. (The
       <codeph>RENAME TO</codeph> clause is the one exception; it can cause HDFS files to be
       moved to different paths.) When you do an <codeph>ALTER TABLE</codeph> operation, you
       typically need to perform corresponding physical filesystem operations, such as rewriting
       the data files to include extra fields, or converting them to a different file format.
     </p>

     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>

 <codeblock>ALTER TABLE [<varname>old_db_name</varname>.]<varname>old_table_name</varname> RENAME TO [<varname>new_db_name</varname>.]<varname>new_table_name</varname>

 ALTER TABLE <varname>name</varname> ADD [IF NOT EXISTS] COLUMNS (<varname>col_spec</varname>[, <varname>col_spec</varname> ...])
 ALTER TABLE <varname>name</varname> REPLACE COLUMNS (<varname>col_spec</varname>[, <varname>col_spec</varname> ...])

 ALTER TABLE <varname>name</varname> ADD COLUMN [IF NOT EXISTS] <varname>col_spec</varname>
 ALTER TABLE <varname>name</varname> DROP [COLUMN] <varname>column_name</varname>
 ALTER TABLE <varname>name</varname> CHANGE <varname>column_name</varname> <varname>col_spec</varname>

 <ph rev="3.1 IMPALA-6988">ALTER TABLE <varname>name</varname> SET OWNER USER <varname>user_name</varname>
 ALTER TABLE <varname>name</varname> SET OWNER ROLE <varname>role_name</varname>
 </ph>
 <ph rev="2.10.0 IMPALA-4622">-- Kudu tables only.
 ALTER TABLE <varname>name</varname> ALTER [COLUMN] <varname>column_name</varname>
   { SET <varname>kudu_storage_attr</varname> <varname>attr_value</varname>
     | DROP DEFAULT }

 kudu_storage_attr ::= { DEFAULT | BLOCK_SIZE | ENCODING | COMPRESSION }</ph>

 <ph rev="2.10.0 IMPALA-4622">-- Non-Kudu tables only.
 ALTER TABLE <varname>name</varname> ALTER [COLUMN] <varname>column_name</varname>
   SET COMMENT '<varname>comment_text</varname>'</ph>

 ALTER TABLE <varname>name</varname> ADD [IF NOT EXISTS] PARTITION (<varname>partition_spec</varname>)
   <ph rev="IMPALA-4390">[<varname>location_spec</varname>]</ph>
   <ph rev="IMPALA-4390">[<varname>cache_spec</varname>]</ph>
 <ph rev="kudu">ALTER TABLE <varname>name</varname> ADD [IF NOT EXISTS] RANGE PARTITION <varname>kudu_partition_spec</varname></ph>

 ALTER TABLE <varname>name</varname> DROP [IF EXISTS] PARTITION (<varname>partition_spec</varname>)
   <ph rev="2.3.0">[PURGE]</ph>
 <ph rev="kudu">ALTER TABLE <varname>name</varname> DROP [IF EXISTS] RANGE PARTITION <varname>kudu_partition_spec</varname></ph>

 <ph rev="2.3.0 IMPALA-1568">ALTER TABLE <varname>name</varname> RECOVER PARTITIONS</ph>

 ALTER TABLE <varname>name</varname> [PARTITION (<varname>partition_spec</varname>)]
   SET { FILEFORMAT <varname>file_format</varname>
   | ROW FORMAT <varname>row_format</varname>
   | LOCATION '<varname>hdfs_path_of_directory</varname>'
   | TBLPROPERTIES (<varname>table_properties</varname>)
   | SERDEPROPERTIES (<varname>serde_properties</varname>) }

 <ph rev="2.6.0 IMPALA-3369">ALTER TABLE <varname>name</varname> <varname>colname</varname>
   ('<varname>statsKey</varname>'='<varname>val</varname>, ...)

 statsKey ::= numDVs | numNulls | avgSize | maxSize</ph>

 <ph rev="1.4.0">ALTER TABLE <varname>name</varname> [PARTITION (<varname>partition_spec</varname>)] SET { CACHED IN '<varname>pool_name</varname>' <ph rev="2.2.0">[WITH REPLICATION = <varname>integer</varname>]</ph> | UNCACHED }</ph>

 <varname>new_name</varname> ::= [<varname>new_database</varname>.]<varname>new_table_name</varname>

 <varname>col_spec</varname> ::= <varname>col_name</varname> <varname>type_name</varname> <ph rev="2.9.0 IMPALA-4616"> COMMENT '<varname>column-comment</varname>' [<varname>kudu_attributes</varname>]</ph>

 <ph rev="2.9.0 IMPALA-4616"><varname>kudu_attributes</varname> ::= { [NOT] NULL | ENCODING <varname>codec</varname> | COMPRESSION <varname>algorithm</varname> |
   DEFAULT <varname>constant</varname> | BLOCK_SIZE <varname>number</varname> }</ph>

 <varname>partition_spec</varname> ::= <varname>simple_partition_spec</varname> | <ph rev="IMPALA-1654"><varname>complex_partition_spec</varname></ph>

 <varname>simple_partition_spec</varname> ::= <varname>partition_col</varname>=<varname>constant_value</varname>

 <ph rev="IMPALA-1654"><varname>complex_partition_spec</varname> ::= <varname>comparison_expression_on_partition_col</varname></ph>

 <ph rev="kudu"><varname>kudu_partition_spec</varname> ::= <varname>constant</varname> <varname>range_operator</varname> VALUES <varname>range_operator</varname> <varname>constant</varname> | VALUE = <varname>constant</varname></ph>

 <ph rev="IMPALA-4390">cache_spec ::= CACHED IN '<varname>pool_name</varname>' [WITH REPLICATION = <varname>integer</varname>] | UNCACHED</ph>

 <ph rev="IMPALA-4390">location_spec ::= LOCATION '<varname>hdfs_path_of_directory</varname>'</ph>

 <varname>table_properties</varname> ::= '<varname>name</varname>'='<varname>value</varname>'[, '<varname>name</varname>'='<varname>value</varname>' ...]

 <varname>serde_properties</varname> ::= '<varname>name</varname>'='<varname>value</varname>'[, '<varname>name</varname>'='<varname>value</varname>' ...]

 <varname>file_format</varname> ::= { PARQUET | TEXTFILE | RCFILE | SEQUENCEFILE | AVRO }

 <varname>row_format</varname> ::= DELIMITED
   [FIELDS TERMINATED BY '<varname>char</varname>' [ESCAPED BY '<varname>char</varname>']]
   [LINES TERMINATED BY '<varname>char</varname>']</codeblock>

     <p conref="../shared/impala_common.xml#common/ddl_blurb"/>

     <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>

     <p rev="2.3.0">
       In <keyword keyref="impala23_full"/> and higher, the <codeph>ALTER TABLE</codeph>
       statement can change the metadata for tables containing complex types
       (<codeph>ARRAY</codeph>, <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>). For example,
       you can use an <codeph>ADD COLUMNS</codeph>, <codeph>DROP COLUMN</codeph>, or
       <codeph>CHANGE</codeph> clause to modify the table layout for complex type columns.
       Although Impala queries only work for complex type columns in Parquet tables, the complex
       type support in the <codeph>ALTER TABLE</codeph> statement applies to all file formats.
       For example, you can use Impala to update metadata for a staging table in a non-Parquet
       file format where the data is populated by Hive. Or you can use <codeph>ALTER TABLE SET
       FILEFORMAT</codeph> to change the format of an existing table to Parquet so that Impala
       can query it. Remember that changing the file format for a table does not convert the data
       files within the table; you must prepare any Parquet data files containing complex types
       outside Impala, and bring them into the table using <codeph>LOAD DATA</codeph> or updating
       the table's <codeph>LOCATION</codeph> property. See
       <xref
         href="impala_complex_types.xml#complex_types"/> for details about using
       complex types.
     </p>

     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>

     <p>
       Whenever you specify partitions in an <codeph>ALTER TABLE</codeph> statement, through the
       <codeph>PARTITION (<varname>partition_spec</varname>)</codeph> clause, you must include
       all the partitioning columns in the specification.
     </p>

     <p>
       Most of the <codeph>ALTER TABLE</codeph> operations work the same for internal tables
       (managed by Impala) as for external tables (with data files located in arbitrary
       locations). The exception is renaming a table; for an external table, the underlying data
       directory is not renamed or moved.
     </p>

     <p>
       <b>To drop or alter multiple partitions:</b>
     </p>

     <p rev="IMPALA-1654">
       In <keyword keyref="impala28_full"/> and higher, the expression for the partition clause
       with a <codeph>DROP</codeph> or <codeph>SET</codeph> operation can include comparison
       operators such as <codeph>&lt;</codeph>, <codeph>IN</codeph>, or <codeph>BETWEEN</codeph>,
       and Boolean operators such as <codeph>AND</codeph> and <codeph>OR</codeph>.
     </p>

     <p rev="IMPALA-1654">
       For example, you might drop a group of partitions corresponding to a particular date range
       after the data <q>ages out</q>:
     </p>

 <codeblock><![CDATA[
 alter table historical_data drop partition (year < 1995);
 alter table historical_data drop partition (year = 1996 and month between 1 and 6);
 ]]>
 </codeblock>

     <p rev="IMPALA-1654">
       For tables with multiple partition keys columns, you can specify multiple conditions
       separated by commas, and the operation only applies to the partitions that match all the
       conditions (similar to using an <codeph>AND</codeph> clause):
     </p>

 <codeblock><![CDATA[
 alter table historical_data drop partition (year < 1995, last_name like 'A%');
 ]]>
 </codeblock>

     <p rev="IMPALA-1654">
       This technique can also be used to change the file format of groups of partitions, as part
       of an ETL pipeline that periodically consolidates and rewrites the underlying data files
       in a different file format:
     </p>

 <codeblock><![CDATA[
 alter table fast_growing_data partition (year = 2016, month in (10,11,12)) set fileformat parquet;
 ]]>
 </codeblock>

     <note>
       <p rev="IMPALA-1654">
         The extended syntax involving comparison operators and multiple partitions applies to
         the <codeph>SET FILEFORMAT</codeph>, <codeph>SET TBLPROPERTIES</codeph>, <codeph>SET
         SERDEPROPERTIES</codeph>, and <codeph>SET [UN]CACHED</codeph> clauses. You can also use
         this syntax with the <codeph>PARTITION</codeph> clause in the <codeph>COMPUTE
         INCREMENTAL STATS</codeph> statement, and with the <codeph>PARTITION</codeph> clause of
         the <codeph>SHOW FILES</codeph> statement. Some forms of <codeph>ALTER TABLE</codeph>
         still only apply to one partition at a time: the <codeph>SET LOCATION</codeph> and
         <codeph>ADD PARTITION</codeph> clauses. The <codeph>PARTITION</codeph> clauses in the
         <codeph>LOAD DATA</codeph> and <codeph>INSERT</codeph> statements also only apply to one
         partition at a time.
       </p>

       <p>
         A DDL statement that applies to multiple partitions is considered successful (resulting
         in no changes) even if no partitions match the conditions. The results are the same as
         if the <codeph>IF EXISTS</codeph> clause was specified.
       </p>

       <p>
         The performance and scalability of this technique is similar to issuing a sequence of
         single-partition <codeph>ALTER TABLE</codeph> statements in quick succession. To
         minimize bottlenecks due to communication with the metastore database, or causing other
         DDL operations on the same table to wait, test the effects of performing <codeph>ALTER
         TABLE</codeph> statements that affect large numbers of partitions.
       </p>
     </note>

     <p conref="../shared/impala_common.xml#common/s3_blurb"/>

     <p rev="2.6.0 IMPALA-1878">
       You can specify an <codeph>s3a://</codeph> prefix on the <codeph>LOCATION</codeph>
       attribute of a table or partition to make Impala query data from the Amazon S3 filesystem.
       In <keyword
         keyref="impala26_full"/> and higher, Impala automatically handles
       creating or removing the associated folders when you issue <codeph>ALTER TABLE</codeph>
       statements with the <codeph>ADD PARTITION</codeph> or <codeph>DROP PARTITION</codeph>
       clauses.
     </p>

     <p conref="../shared/impala_common.xml#common/s3_ddl"/>

     <p rev="1.4.0">
       <b>HDFS caching (CACHED IN clause):</b>
     </p>

     <p rev="1.4.0">
       If you specify the <codeph>CACHED IN</codeph> clause, any existing or future data files in
       the table directory or the partition subdirectories are designated to be loaded into
       memory with the HDFS caching mechanism. See
       <xref
         href="impala_perf_hdfs_caching.xml#hdfs_caching"/> for details about using
       the HDFS caching feature.
     </p>

     <p
       conref="../shared/impala_common.xml#common/impala_cache_replication_factor"/>

     <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>

     <p>
       <b>To rename a table (RENAME TO clause):</b>
     </p>

 <!-- Beefing up the syntax in its original location up to, don't need to repeat it here.
 <codeblock>ALTER TABLE <varname>old_name</varname> RENAME TO <varname>new_name</varname>;</codeblock>
 -->

     <p>
       The <codeph>RENAME TO</codeph> clause lets you change the name of an existing table, and
       optionally which database it is located in.
     </p>

     <p>
       For internal tables, this operation physically renames the directory within HDFS that
       contains the data files; the original directory name no longer exists. By qualifying the
       table names with database names, you can use this technique to move an internal table (and
       its associated data directory) from one database to another. For example:
     </p>

 <codeblock>create database d1;
 create database d2;
 create database d3;
 use d1;
 create table mobile (x int);
 use d2;
 -- Move table from another database to the current one.
 alter table d1.mobile rename to mobile;
 use d1;
 -- Move table from one database to another.
 alter table d2.mobile rename to d3.mobile;</codeblock>

     <p rev="3.1 IMPALA-6988">
       <b>To change the owner of a table:</b>
     </p>

 <codeblock>ALTER TABLE <varname>name</varname> SET OWNER USER <varname>user_name;</varname>
 ALTER TABLE <varname>name</varname> SET OWNER ROLE <varname>role_name;</varname>
 </codeblock>

     <p>
       The table owner is originally set to the user who creates the table. When object ownership
       is enabled in Sentry, an owner of a table can have the <codeph>ALL</codeph> with
       <codeph>GRANT</codeph> or <codeph>ALL</codeph> without <codeph>GRANT</codeph> privilege.
       The term <codeph>OWNER</codeph> is used to differentiate between the <codeph>ALL</codeph>
       privilege that is explicitly granted via the <codeph>GRANT</codeph> statement and a
       privilege that is implicitly granted by the <codeph>CREATE TABLE</codeph> statement.
     </p>

     <p>
       Use the <codeph>ALTER TABLE SET OWNER</codeph> to transfer the ownership from the current
       owner to another user or a role.
     </p>

     <p>
       <b>To change the physical location where Impala looks for data files associated with a
       table or partition:</b>
     </p>

 <codeblock>ALTER TABLE <varname>table_name</varname> [PARTITION (<varname>partition_spec</varname>)] SET LOCATION '<varname>hdfs_path_of_directory</varname>';</codeblock>

     <p>
       The path you specify is the full HDFS path where the data files reside, or will be
       created. Impala does not create any additional subdirectory named after the table. Impala
       does not move any data files to this new location or change any data files that might
       already exist in that directory.
     </p>

     <p>
       To set the location for a single partition, include the <codeph>PARTITION</codeph> clause.
       Specify all the same partitioning columns for the table, with a constant value for each,
       to precisely identify the single partition affected by the statement:
     </p>

 <codeblock>create table p1 (s string) partitioned by (month int, day int);
 -- Each ADD PARTITION clause creates a subdirectory in HDFS.
 alter table p1 add partition (month=1, day=1);
 alter table p1 add partition (month=1, day=2);
 alter table p1 add partition (month=2, day=1);
 alter table p1 add partition (month=2, day=2);
 -- Redirect queries, INSERT, and LOAD DATA for one partition
 -- to a specific different directory.
 alter table p1 partition (month=1, day=1) set location '/usr/external_data/new_years_day';
 </codeblock>

     <note conref="../shared/impala_common.xml#common/add_partition_set_location"/>

     <p rev="2.3.0 IMPALA-1568">
       <b>To automatically detect new partition directories added through Hive or HDFS
       operations:</b>
     </p>

     <p rev="2.3.0 IMPALA-1568">
       In <keyword keyref="impala23_full"/> and higher, the <codeph>RECOVER PARTITIONS</codeph>
       clause scans a partitioned table to detect if any new partition directories were added
       outside of Impala, such as by Hive <codeph>ALTER TABLE</codeph> statements or by
       <cmdname>hdfs dfs</cmdname> or <cmdname>hadoop fs</cmdname> commands. The <codeph>RECOVER
       PARTITIONS</codeph> clause automatically recognizes any data files present in these new
       directories, the same as the <codeph>REFRESH</codeph> statement does.
     </p>

     <p rev="2.3.0 IMPALA-1568">
       For example, here is a sequence of examples showing how you might create a partitioned
       table in Impala, create new partitions through Hive, copy data files into the new
       partitions with the <cmdname>hdfs</cmdname> command, and have Impala recognize the new
       partitions and new data:
     </p>

     <p rev="2.3.0 IMPALA-1568">
       In Impala, create the table, and a single partition for demonstration purposes:
     </p>

 <codeblock rev="2.3.0 IMPALA-1568">
 <![CDATA[
 create database recover_partitions;
 use recover_partitions;
 create table t1 (s string) partitioned by (yy int, mm int);
 insert into t1 partition (yy = 2016, mm = 1) values ('Partition exists');
 show files in t1;
 +---------------------------------------------------------------------+------+--------------+
 | Path                                                                | Size | Partition    |
 +---------------------------------------------------------------------+------+--------------+
 | /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1/data.txt | 17B  | yy=2016/mm=1 |
 +---------------------------------------------------------------------+------+--------------+
 quit;
 ]]>
 </codeblock>

     <p rev="2.3.0 IMPALA-1568">
       In Hive, create some new partitions. In a real use case, you might create the partitions
       and populate them with data as the final stages of an ETL pipeline.
     </p>

 <codeblock rev="2.3.0 IMPALA-1568">
 <![CDATA[
 hive> use recover_partitions;
 OK
 hive> alter table t1 add partition (yy = 2016, mm = 2);
 OK
 hive> alter table t1 add partition (yy = 2016, mm = 3);
 OK
 hive> quit;
 ]]>
 </codeblock>

     <p rev="2.3.0 IMPALA-1568">
       For demonstration purposes, manually copy data (a single row) into these new partitions,
       using manual HDFS operations:
     </p>

 <codeblock rev="2.3.0 IMPALA-1568">
 <![CDATA[
 $ hdfs dfs -ls /user/hive/warehouse/recover_partitions.db/t1/yy=2016/
 Found 3 items
 drwxr-xr-x - impala   hive 0 2016-05-09 16:06 /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1
 drwxr-xr-x - jrussell hive 0 2016-05-09 16:14 /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=2
 drwxr-xr-x - jrussell hive 0 2016-05-09 16:13 /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=3

 $ hdfs dfs -cp /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1/data.txt \
   /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=2/data.txt
 $ hdfs dfs -cp /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1/data.txt \
   /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=3/data.txt
 ]]>
 </codeblock>

 <codeblock rev="2.3.0 IMPALA-1568">
 <![CDATA[
 hive> select * from t1;
 OK
 Partition exists  2016  1
 Partition exists  2016  2
 Partition exists  2016  3
 hive> quit;
 ]]>
 </codeblock>

     <p rev="2.3.0 IMPALA-1568">
       In Impala, initially the partitions and data are not visible. Running <codeph>ALTER
       TABLE</codeph> with the <codeph>RECOVER PARTITIONS</codeph> clause scans the table data
       directory to find any new partition directories, and the data files inside them:
     </p>

 <codeblock rev="2.3.0 IMPALA-1568">
 <![CDATA[
 select * from t1;
 +------------------+------+----+
 | s                | yy   | mm |
 +------------------+------+----+
 | Partition exists | 2016 | 1  |
 +------------------+------+----+

 alter table t1 recover partitions;
 select * from t1;
 +------------------+------+----+
 | s                | yy   | mm |
 +------------------+------+----+
 | Partition exists | 2016 | 1  |
 | Partition exists | 2016 | 3  |
 | Partition exists | 2016 | 2  |
 +------------------+------+----+
 ]]>
 </codeblock>

     <p rev="1.2">
       <b>To change the key-value pairs of the TBLPROPERTIES and SERDEPROPERTIES fields:</b>
     </p>

 <codeblock>ALTER TABLE <varname>table_name</varname> SET TBLPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>'[, ...]);
 ALTER TABLE <varname>table_name</varname> SET SERDEPROPERTIES ('<varname>key1</varname>'='<varname>value1</varname>', '<varname>key2</varname>'='<varname>value2</varname>'[, ...]);</codeblock>

     <p>
       The <codeph>TBLPROPERTIES</codeph> clause is primarily a way to associate arbitrary
       user-specified data items with a particular table.
     </p>

     <p>
       The <codeph>SERDEPROPERTIES</codeph> clause sets up metadata defining how tables are read
       or written, needed in some cases by Hive but not used extensively by Impala. You would use
       this clause primarily to change the delimiter in an existing text table or partition, by
       setting the <codeph>'serialization.format'</codeph> and <codeph>'field.delim'</codeph>
       property values to the new delimiter character: The <codeph>SERDEPROPERTIES</codeph>
       clause does not change the existing data in the table. The change only affects the future
       insert into the table.
     </p>

     <p>
       Use the <codeph>DESCRIBE FORMATTED</codeph> statement to see the current values of these
       properties for an existing table. See
       <xref
         href="impala_create_table.xml#create_table"/> for more details about these
       clauses.
     </p>

     <p>
       <b>To manually set or update table or column statistics:</b>
     </p>

     <p>
       Although for most tables the <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL
       STATS</codeph> statement is all you need to keep table and column statistics up to date
       for a table, sometimes for a very large table or one that is updated frequently, the
       length of time to recompute all the statistics might make it impractical to run those
       statements as often as needed. As a workaround, you can use the <codeph>ALTER
       TABLE</codeph> statement to set table statistics at the level of the entire table or a
       single partition, or column statistics at the level of the entire table.
     </p>

     <p> You can set the <codeph>numrows</codeph> value for table statistics by
       changing the <codeph>TBLPROPERTIES</codeph> setting for a table or
       partition. For example: <codeblock conref="../shared/impala_common.xml#common/set_numrows_example"/>
       <codeblock conref="../shared/impala_common.xml#common/set_numrows_partitioned_example"/></p>

     <p rev="2.6.0 IMPALA-3369">
       In <keyword keyref="impala26_full"/> and higher, you can use the <codeph>SET COLUMN
       STATS</codeph> clause to set a specific stats value for a particular column.
     </p>

     <p conref="../shared/impala_common.xml#common/set_column_stats_example"/>

     <p>
       <b>To reorganize columns for a table:</b>
     </p>
     <p>You can add multiple columns at a time using the <codeph>ALTER
         TABLE</codeph> statement. If you specify the <codeph>IF NOT
         EXISTS</codeph> clause, Impala silently ignores the <codeph>ADD</codeph>
       request and does not return an error if a column with the same name exists
       in the table.</p>
     <p>When you replace columns, all the original column definitions are
       discarded. </p>
     <p>You might use these statements if you receive a new set of data files
       with different data types or columns in a different order. The data files
       are retained, so if the new columns are incompatible with the old ones,
       use <codeph>INSERT OVERWRITE</codeph> or <codeph>LOAD DATA
         OVERWRITE</codeph> to replace all the data before issuing any further
       queries.</p>

     <p rev="">
       For example, here is how you might add columns to an existing table. The first
       <codeph>ALTER TABLE</codeph> adds two new columns, and the second <codeph>ALTER
       TABLE</codeph> adds one new column. A single Impala query reads both the old and new data
       files, containing different numbers of columns. For any columns not present in a
       particular data file, all the column values are considered to be <codeph>NULL</codeph>.
     </p>

 <codeblock rev="">
 create table t1 (x int);
 insert into t1 values (1), (2);

 alter table t1 add columns (s string, t timestamp);
 insert into t1 values (3, 'three', now());

 alter table t1 add columns (b boolean);
 insert into t1 values (4, 'four', now(), true);

 select * from t1 order by x;
 +---+-------+-------------------------------+------+
 | x | s     | t                             | b    |
 +---+-------+-------------------------------+------+
 | 1 | NULL  | NULL                          | NULL |
 | 2 | NULL  | NULL                          | NULL |
 | 3 | three | 2016-05-11 11:19:45.054457000 | NULL |
 | 4 | four  | 2016-05-11 11:20:20.260733000 | true |
 +---+-------+-------------------------------+------+
 </codeblock>

     <p>
       You might use the <codeph>CHANGE</codeph> clause to rename a single column, or to treat an
       existing column as a different type than before, such as to switch between treating a
       column as <codeph>STRING</codeph> and <codeph>TIMESTAMP</codeph>, or between
       <codeph>INT</codeph> and <codeph>BIGINT</codeph>. You can only drop a single column at a
       time; to drop multiple columns, issue multiple <codeph>ALTER TABLE</codeph> statements, or
       define the new set of columns with a single <codeph>ALTER TABLE ... REPLACE
       COLUMNS</codeph> statement.
     </p>

     <p rev="">
       The following examples show some safe operations to drop or change columns. Dropping the
       final column in a table lets Impala ignore the data causing any disruption to existing
       data files. Changing the type of a column works if existing data values can be safely
       converted to the new type. The type conversion rules depend on the file format of the
       underlying table. For example, in a text table, the same value can be interpreted as a
       <codeph>STRING</codeph> or a numeric value, while in a binary format such as Parquet, the
       rules are stricter and type conversions only work between certain sizes of integers.
     </p>

 <codeblock rev="">
 create table optional_columns (x int, y int, z int, a1 int, a2 int);
 insert into optional_columns values (1,2,3,0,0), (2,3,4,100,100);

 -- When the last column in the table is dropped, Impala ignores the
 -- values that are no longer needed. (Dropping A1 but leaving A2
 -- would cause problems, as we will see in a subsequent example.)
 alter table optional_columns drop column a2;
 alter table optional_columns drop column a1;

 select * from optional_columns;
 +---+---+---+
 | x | y | z |
 +---+---+---+
 | 1 | 2 | 3 |
 | 2 | 3 | 4 |
 +---+---+---+
 </codeblock>

 <codeblock rev="">
 create table int_to_string (s string, x int);
 insert into int_to_string values ('one', 1), ('two', 2);

 -- What was an INT column will now be interpreted as STRING.
 -- This technique works for text tables but not other file formats.
 -- The second X represents the new name of the column, which we keep the same.
 alter table int_to_string change x x string;

 -- Once the type is changed, we can insert non-integer values into the X column
 -- and treat that column as a string, for example by uppercasing or concatenating.
 insert into int_to_string values ('three', 'trois');
 select s, upper(x) from int_to_string;
 +-------+----------+
 | s     | upper(x) |
 +-------+----------+
 | one   | 1        |
 | two   | 2        |
 | three | TROIS    |
 +-------+----------+
 </codeblock>

     <p rev="">
       Remember that Impala does not actually do any conversion for the underlying data files as
       a result of <codeph>ALTER TABLE</codeph> statements. If you use <codeph>ALTER
       TABLE</codeph> to create a table layout that does not agree with the contents of the
       underlying files, you must replace the files yourself, such as using <codeph>LOAD
       DATA</codeph> to load a new set of data files, or <codeph>INSERT OVERWRITE</codeph> to
       copy from another table and replace the original data.
     </p>

     <p rev="">
       The following example shows what happens if you delete the middle column from a Parquet
       table containing three columns. The underlying data files still contain three columns of
       data. Because the columns are interpreted based on their positions in the data file
       instead of the specific column names, a <codeph>SELECT *</codeph> query now reads the
       first and second columns from the data file, potentially leading to unexpected results or
       conversion errors. For this reason, if you expect to someday drop a column, declare it as
       the last column in the table, where its data can be ignored by queries after the column is
       dropped. Or, re-run your ETL process and create new data files if you drop or change the
       type of a column in a way that causes problems with existing data files.
     </p>

 <codeblock rev="">
 -- Parquet table showing how dropping a column can produce unexpected results.
 create table p1 (s1 string, s2 string, s3 string) stored as parquet;

 insert into p1 values ('one', 'un', 'uno'), ('two', 'deux', 'dos'),
   ('three', 'trois', 'tres');
 select * from p1;
 +-------+-------+------+
 | s1    | s2    | s3   |
 +-------+-------+------+
 | one   | un    | uno  |
 | two   | deux  | dos  |
 | three | trois | tres |
 +-------+-------+------+

 alter table p1 drop column s2;
 -- The S3 column contains unexpected results.
 -- Because S2 and S3 have compatible types, the query reads
 -- values from the dropped S2, because the existing data files
 -- still contain those values as the second column.
 select * from p1;
 +-------+-------+
 | s1    | s3    |
 +-------+-------+
 | one   | un    |
 | two   | deux  |
 | three | trois |
 +-------+-------+
 </codeblock>

 <codeblock rev="">
 -- Parquet table showing how dropping a column can produce conversion errors.
 create table p2 (s1 string, x int, s3 string) stored as parquet;

 insert into p2 values ('one', 1, 'uno'), ('two', 2, 'dos'), ('three', 3, 'tres');
 select * from p2;
 +-------+---+------+
 | s1    | x | s3   |
 +-------+---+------+
 | one   | 1 | uno  |
 | two   | 2 | dos  |
 | three | 3 | tres |
 +-------+---+------+

 alter table p2 drop column x;
 select * from p2;
 WARNINGS:
 File '<varname>hdfs_filename</varname>' has an incompatible Parquet schema for column 'add_columns.p2.s3'.
 Column type: STRING, Parquet schema:
 optional int32 x [i:1 d:1 r:0]

 File '<varname>hdfs_filename</varname>' has an incompatible Parquet schema for column 'add_columns.p2.s3'.
 Column type: STRING, Parquet schema:
 optional int32 x [i:1 d:1 r:0]
 </codeblock>

     <p rev="IMPALA-3092">
       In <keyword keyref="impala26_full"/> and higher, if an Avro table is created without
       column definitions in the <codeph>CREATE TABLE</codeph> statement, and columns are later
       added through <codeph>ALTER TABLE</codeph>, the resulting table is now queryable. Missing
       values from the newly added columns now default to <codeph>NULL</codeph>.
     </p>

     <p>
       <b>To change the file format that Impala expects data to be in, for a table or
       partition:</b>
     </p>

     <p>
       Use an <codeph>ALTER TABLE ... SET FILEFORMAT</codeph> clause. You can include an optional
       <codeph>PARTITION (<varname>col1</varname>=<varname>val1</varname>,
       <varname>col2</varname>=<varname>val2</varname>, ...</codeph> clause so that the file
       format is changed for a specific partition rather than the entire table.
     </p>

     <p>
       Because this operation only changes the table metadata, you must do any conversion of
       existing data using regular Hadoop techniques outside of Impala. Any new data created by
       the Impala <codeph>INSERT</codeph> statement will be in the new format. You cannot specify
       the delimiter for Text files; the data files must be comma-delimited.
 <!-- Although Impala can read Avro tables
         created through Hive, you cannot specify the Avro file format in an Impala
         <codeph>ALTER TABLE</codeph> statement. -->
     </p>

     <p>
       To set the file format for a single partition, include the <codeph>PARTITION</codeph>
       clause. Specify all the same partitioning columns for the table, with a constant value for
       each, to precisely identify the single partition affected by the statement:
     </p>

 <codeblock>create table p1 (s string) partitioned by (month int, day int);
 -- Each ADD PARTITION clause creates a subdirectory in HDFS.
 alter table p1 add partition (month=1, day=1);
 alter table p1 add partition (month=1, day=2);
 alter table p1 add partition (month=2, day=1);
 alter table p1 add partition (month=2, day=2);
 -- Queries and INSERT statements will read and write files
 -- in this format for this specific partition.
 alter table p1 partition (month=2, day=2) set fileformat parquet;
 </codeblock>

     <p>
       <b>To change the row format with different delimiter characters:</b>
     </p>

     <p>
       Use the <codeph>SET ROW FORMAT DELIMITED</codeph> clause to ingest data files that use a
       different delimiter character or a different line end character. When specifying delimiter
       and line end characters with the <codeph>FIELDS TERMINATED BY</codeph>, <codeph>ESCAPED
       BY</codeph>, and <codeph>LINES TERMINATED BY</codeph> clauses, you can use the following:
       <ul>
         <li>
           A regular ASCII character surrounded by single or double quotation marks.
         </li>

         <li>
           An octal sequence, such as <codeph>'\054'</codeph> representing a comma or
           <codeph>'\0'</codeph> for ASCII null (hex 00).
         </li>

         <li>
           Special characters, such as:
           <ul>
             <li>
               <codeph>'\t'</codeph> for tab
             </li>

             <li>
               <codeph>'\n'</codeph> for newline or linefeed
             </li>

             <li>
               <codeph>'\r'</codeph> for carriage return
             </li>
           </ul>
         </li>

         <li>
           An integer in the range '-127'..'128' (with quotation marks but no backslash)
           <p>
             Negative values are subtracted from 256. For example, <codeph>FIELDS TERMINATED BY
             '-2'</codeph> sets the field delimiter to ASCII code 254.
           </p>
         </li>
       </ul>
     </p>

     <p>
       For more examples of text tables, see <xref
         href="impala_txtfile.xml#txtfile"/>.
     </p>

     <p>
       For the <codeph>ESCAPED BY</codeph> clause, choose an escape character that is not used
       anywhere else in the file. The character following the escape character is taken literally
       as part of a field value.
     </p>

     <p>
       Surrounding field values with quotation marks does not help Impala to parse fields with
       embedded delimiter characters as the quotation marks are considered to be part of the
       column value.
     </p>

     <p>
       If you want to use <codeph>\</codeph> as the escape character, specify the clause in
       <cmdname>impala-shell</cmdname> as <codeph>ESCAPED BY '\\'</codeph>.
     </p>

     <p>
       <b>To add or drop partitions for a table</b>, the table must already be partitioned (that
       is, created with a <codeph>PARTITIONED BY</codeph> clause). The partition is a physical
       directory in HDFS, with a name that encodes a particular column value (the <b>partition
       key</b>). The Impala <codeph>INSERT</codeph> statement already creates the partition if
       necessary, so the <codeph>ALTER TABLE ... ADD PARTITION</codeph> is primarily useful for
       importing data by moving or copying existing data files into the HDFS directory
       corresponding to a partition. (You can use the <codeph>LOAD DATA</codeph> statement to
       move files into the partition directory, or <codeph>ALTER TABLE ... PARTITION (...) SET
       LOCATION</codeph> to point a partition at a directory that already contains data files.
     </p>

     <p>
       The <codeph>DROP PARTITION</codeph> clause is used to remove the HDFS directory and
       associated data files for a particular set of partition key values; for example, if you
       always analyze the last 3 months worth of data, at the beginning of each month you might
       drop the oldest partition that is no longer needed. Removing partitions reduces the amount
       of metadata associated with the table and the complexity of calculating the optimal query
       plan, which can simplify and speed up queries on partitioned tables, particularly join
       queries. Here is an example showing the <codeph>ADD PARTITION</codeph> and <codeph>DROP
       PARTITION</codeph> clauses.
     </p>

     <p>
       To avoid errors while adding or dropping partitions whose existence is not certain, add
       the optional <codeph>IF [NOT] EXISTS</codeph> clause between the <codeph>ADD</codeph> or
       <codeph>DROP</codeph> keyword and the <codeph>PARTITION</codeph> keyword. That is, the
       entire clause becomes <codeph>ADD IF NOT EXISTS PARTITION</codeph> or <codeph>DROP IF
       EXISTS PARTITION</codeph>. The following example shows how partitions can be created
       automatically through <codeph>INSERT</codeph> statements, or manually through
       <codeph>ALTER TABLE</codeph> statements. The <codeph>IF [NOT] EXISTS</codeph> clauses let
       the <codeph>ALTER TABLE</codeph> statements succeed even if a new requested partition
       already exists, or a partition to be dropped does not exist.
     </p>

     <p>
       Inserting 2 year values creates 2 partitions:
     </p>

 <codeblock>
 create table partition_t (s string) partitioned by (y int);
 insert into partition_t (s,y) values ('two thousand',2000), ('nineteen ninety',1990);
 show partitions partition_t;
 +-------+-------+--------+------+--------------+-------------------+--------+-------------------+
 | y     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats |
 +-------+-------+--------+------+--------------+-------------------+--------+-------+
 | 1990  | -1    | 1      | 16B  | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | 2000  | -1    | 1      | 13B  | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | Total | -1    | 2      | 29B  | 0B           |                   |        |       |
 +-------+-------+--------+------+--------------+-------------------+--------+-------+
 </codeblock>

     <p>
       Without the <codeph>IF NOT EXISTS</codeph> clause, an attempt to add a new partition might
       fail:
     </p>

 <codeblock>
 alter table partition_t add partition (y=2000);
 ERROR: AnalysisException: Partition spec already exists: (y=2000).
 </codeblock>

     <p>
       The <codeph>IF NOT EXISTS</codeph> clause makes the statement succeed whether or not there
       was already a partition with the specified key value:
     </p>

 <codeblock>
 alter table partition_t add if not exists partition (y=2000);
 alter table partition_t add if not exists partition (y=2010);
 show partitions partition_t;
 +-------+-------+--------+------+--------------+-------------------+--------+-------------------+
 | y     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats |
 +-------+-------+--------+------+--------------+-------------------+--------+-------+
 | 1990  | -1    | 1      | 16B  | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | 2000  | -1    | 1      | 13B  | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | 2010  | -1    | 0      | 0B   | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | Total | -1    | 2      | 29B  | 0B           |                   |        |       |
 +-------+-------+--------+------+--------------+-------------------+--------+-------+
 </codeblock>

     <p>
       Likewise, the <codeph>IF EXISTS</codeph> clause lets <codeph>DROP PARTITION</codeph>
       succeed whether or not the partition is already in the table:
     </p>

 <codeblock>
 alter table partition_t drop if exists partition (y=2000);
 alter table partition_t drop if exists partition (y=1950);
 show partitions partition_t;
 +-------+-------+--------+------+--------------+-------------------+--------+-------------------+
 | y     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | Incremental stats |
 +-------+-------+--------+------+--------------+-------------------+--------+-------+
 | 1990  | -1    | 1      | 16B  | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | 2010  | -1    | 0      | 0B   | NOT CACHED   | NOT CACHED        | TEXT   | false |
 | Total | -1    | 1      | 16B  | 0B           |                   |        |       |
 +-------+-------+--------+------+--------------+-------------------+--------+-------+
 </codeblock>

     <p rev="2.3.0">
       The optional <codeph>PURGE</codeph> keyword, available in
       <keyword keyref="impala23_full"/> and higher, is used with the <codeph>DROP
       PARTITION</codeph> clause to remove associated HDFS data files immediately rather than
       going through the HDFS trashcan mechanism. Use this keyword when dropping a partition if
       it is crucial to remove the data as quickly as possible to free up space, or if there is a
       problem with the trashcan, such as the trash cannot being configured or being in a
       different HDFS encryption zone than the data files.
     </p>

 <!--
         To do: Make example more general by partitioning by year/month/day.
         Then could show inserting into fixed year, variable month and day;
         dropping particular year/month/day partition.
     -->

 <codeblock>-- Create an empty table and define the partitioning scheme.
 create table part_t (x int) partitioned by (month int);
 -- Create an empty partition into which you could copy data files from some other source.
 alter table part_t add partition (month=1);
 -- After changing the underlying data, issue a REFRESH statement to make the data visible in Impala.
 refresh part_t;
 -- Later, do the same for the next month.
 alter table part_t add partition (month=2);

 -- Now you no longer need the older data.
 alter table part_t drop partition (month=1);
 -- If the table was partitioned by month and year, you would issue a statement like:
 -- alter table part_t drop partition (year=2003,month=1);
 -- which would require 12 ALTER TABLE statements to remove a year's worth of data.

 -- If the data files for subsequent months were in a different file format,
 -- you could set a different file format for the new partition as you create it.
 alter table part_t add partition (month=3) set fileformat=parquet;
 </codeblock>

     <p>
       The value specified for a partition key can be an arbitrary constant expression, without
       any references to columns. For example:
     </p>

 <codeblock>alter table time_data add partition (month=concat('Decem','ber'));
 alter table sales_data add partition (zipcode = cast(9021 * 10 as string));</codeblock>

     <note>
       <p>
         An alternative way to reorganize a table and its associated data files is to use
         <codeph>CREATE TABLE</codeph> to create a variation of the original table, then use
         <codeph>INSERT</codeph> to copy the transformed or reordered data to the new table. The
         advantage of <codeph>ALTER TABLE</codeph> is that it avoids making a duplicate copy of
         the data files, allowing you to reorganize huge volumes of data in a space-efficient way
         using familiar Hadoop techniques.
       </p>
     </note>

     <p>
       <b>To switch a table between internal and external:</b>
     </p>

     <p
       conref="../shared/impala_common.xml#common/switch_internal_external_table"/>

     <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>

     <p conref="../shared/impala_common.xml#common/permissions_blurb"/>

     <p rev="">
       Most <codeph>ALTER TABLE</codeph> clauses do not actually read or write any HDFS files,
       and so do not depend on specific HDFS permissions. For example, the <codeph>SET
       FILEFORMAT</codeph> clause does not actually check the file format existing data files or
       convert them to the new format, and the <codeph>SET LOCATION</codeph> clause does not
       require any special permissions on the new location. (Any permission-related failures
       would come later, when you actually query or insert into the table.)
     </p>

 <!-- Haven't rigorously tested all the assertions in the following paragraph. -->

 <!-- Most testing so far has been around RENAME TO clause. -->

     <p>
       In general, <codeph>ALTER TABLE</codeph> clauses that do touch HDFS files and directories
       require the same HDFS permissions as corresponding <codeph>CREATE</codeph>,
       <codeph>INSERT</codeph>, or <codeph>SELECT</codeph> statements. The permissions allow the
       user ID that the <cmdname>impalad</cmdname> daemon runs under, typically the
       <codeph>impala</codeph> user, to read or write files or directories, or (in the case of
       the execute bit) descend into a directory. The <codeph>RENAME TO</codeph> clause requires
       read, write, and execute permission in the source and destination database directories and
       in the table data directory, and read and write permission for the data files within the
       table. The <codeph>ADD PARTITION</codeph> and <codeph>DROP PARTITION</codeph> clauses
       require write and execute permissions for the associated partition directory.
     </p>

     <p conref="../shared/impala_common.xml#common/kudu_blurb"/>

     <p rev="kudu IMPALA-2890">
       Because of the extra constraints and features of Kudu tables, such as the <codeph>NOT
       NULL</codeph> and <codeph>DEFAULT</codeph> attributes for columns, <codeph>ALTER
       TABLE</codeph> has specific requirements related to Kudu tables:
       <ul>
         <li>
           <p>
             In an <codeph>ADD COLUMNS</codeph> operation, you can specify the
             <codeph>NULL</codeph>, <codeph>NOT NULL</codeph>, and <codeph>DEFAULT
             <varname>default_value</varname></codeph> column attributes.
           </p>
         </li>

         <li>
           <p rev="2.9.0 IMPALA-4616">
             In <keyword keyref="impala29_full"/> and higher, you can also specify the
             <codeph>ENCODING</codeph>, <codeph>COMPRESSION</codeph>, and
             <codeph>BLOCK_SIZE</codeph> attributes when adding a column.
           </p>
         </li>

         <li>
           <p>
             If you add a column with a <codeph>NOT NULL</codeph> attribute, it must also have a
             <codeph>DEFAULT</codeph> attribute, so the default value can be assigned to that
             column for all existing rows.
           </p>
         </li>

         <li>
           <p>
             The <codeph>DROP COLUMN</codeph> clause works the same for a Kudu table as for other
             kinds of tables.
           </p>
         </li>

         <li>
           <p>
             Although you can change the name of a column with the <codeph>CHANGE</codeph>
             clause, you cannot change the type of a column in a Kudu table.
           </p>
         </li>

         <li>
           <p>
             You cannot change the nullability of existing columns in a Kudu table.
           </p>
         </li>

         <li>
           <p rev="2.10.0 IMPALA-4622">
             In <keyword keyref="impala210_full"/>, you can change the default value, encoding,
             compression, or block size of existing columns in a Kudu table by using the
             <codeph>SET</codeph> clause.
           </p>
         </li>

         <li>
           <p>
             You cannot use the <codeph>REPLACE COLUMNS</codeph> clause with a Kudu table.
           </p>
         </li>

         <li>
           The <codeph>RENAME TO</codeph> clause for a Kudu table only affects the name stored in
           the metastore database that Impala uses to refer to the table. To change which
           underlying Kudu table is associated with an Impala table name, you must change the
           <codeph>TBLPROPERTIES</codeph> property of the table: <codeph>SET
           TBLPROPERTIES('kudu.table_name'='<varname>kudu_tbl_name</varname>)</codeph>. You can
           only change underlying Kudu tables for the external tables.
         </li>
       </ul>
     </p>

     <p>
       The following are some examples of using the <codeph>ADD COLUMNS</codeph> clause for a
       Kudu table:
     </p>

 <codeblock rev="2.9.0 IMPALA-4616">
 CREATE TABLE t1 ( x INT, PRIMARY KEY (x) )
   PARTITION BY HASH (x) PARTITIONS 16
   STORED AS KUDU

 ALTER TABLE t1 ADD COLUMNS (y STRING ENCODING prefix_encoding);
 ALTER TABLE t1 ADD COLUMNS (z INT DEFAULT 10);
 ALTER TABLE t1 ADD COLUMNS (a STRING NOT NULL DEFAULT '', t TIMESTAMP COMPRESSION default_compression);
 </codeblock>

     <p rev="2.10.0 IMPALA-4622">
       The following are some examples of modifying column defaults and storage attributes for a
       Kudu table:
     </p>

 <codeblock rev="2.10.0 IMPALA-4622">
 create table kt (x bigint primary key, s string default 'yes', t timestamp)
   stored as kudu;

 -- You can change the default value for a column, which affects any rows
 -- inserted after this change is made.
 alter table kt alter column s set default 'no';

 -- You can remove the default value for a column, which affects any rows
 -- inserted after this change is made. If the column is nullable, any
 -- future inserts default to NULL for this column. If the column is marked
 -- NOT NULL, any future inserts must specify a value for the column.
 alter table kt alter column s drop default;

 insert into kt values (1, 'foo', now());
 -- Because of the DROP DEFAULT above, omitting S from the insert
 -- gives it a value of NULL.
 insert into kt (x, t) values (2, now());

 select * from kt;
 +---+------+-------------------------------+
 | x | s    | t                             |
 +---+------+-------------------------------+
 | 2 | NULL | 2017-10-02 00:03:40.652156000 |
 | 1 | foo  | 2017-10-02 00:03:04.346185000 |
 +---+------+-------------------------------+

 -- Other storage-related attributes can also be changed for columns.
 -- These changes take effect for any newly inserted rows, or rows
 -- rearranged due to compaction after deletes or updates.
 alter table kt alter column s set encoding prefix_encoding;
 -- The COLUMN keyword is optional in the syntax.
 alter table kt alter x set block_size 2048;
 alter table kt alter column t set compression zlib;

 desc kt;
 +------+-----------+---------+-------------+----------+---------------+-----------------+---------------------+------------+
 | name | type      | comment | primary_key | nullable | default_value | encoding        | compression         | block_size |
 +------+-----------+---------+-------------+----------+---------------+-----------------+---------------------+------------+
 | x    | bigint    |         | true        | false    |               | AUTO_ENCODING   | DEFAULT_COMPRESSION | 2048       |
 | s    | string    |         | false       | true     |               | PREFIX_ENCODING | DEFAULT_COMPRESSION | 0          |
 | t    | timestamp |         | false       | true     |               | AUTO_ENCODING   | ZLIB                | 0          |
 +------+-----------+---------+-------------+----------+---------------+-----------------+---------------------+------------+
 </codeblock>

     <p rev="kudu">
       Kudu tables all use an underlying partitioning mechanism. The partition syntax is
       different than for non-Kudu tables. You can use the <codeph>ALTER TABLE</codeph> statement
       to add and drop <term>range partitions</term> from a Kudu table. Any new range must not
       overlap with any existing ranges. Dropping a range removes all the associated rows from
       the table. See <xref href="impala_kudu.xml#kudu_partitioning"/> for details.
     </p>

     <p conref="../shared/impala_common.xml#common/related_info"/>

     <p>
       <xref href="impala_tables.xml#tables"/>,
       <xref
         href="impala_create_table.xml#create_table"/>,
       <xref
         href="impala_drop_table.xml#drop_table"/>,
       <xref
         href="impala_partitioning.xml#partitioning"/>,
       <xref
         href="impala_tables.xml#internal_tables"/>,
       <xref
         href="impala_tables.xml#external_tables"/>
     </p>

   </conbody>

 </concept>