| # |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| module Shell |
| module Commands |
| class Scan < Command |
| def help |
| <<-EOF |
| Scan a table; pass table name and optionally a dictionary of scanner |
| specifications. Scanner specifications may include one or more of: |
| TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP, |
| MAXLENGTH, COLUMNS, CACHE, RAW, VERSIONS, ALL_METRICS, METRICS, |
| REGION_REPLICA_ID, ISOLATION_LEVEL, READ_TYPE, ALLOW_PARTIAL_RESULTS, |
| BATCH or MAX_RESULT_SIZE |
| |
| If no columns are specified, all columns will be scanned. |
| To scan all members of a column family, leave the qualifier empty as in |
| 'col_family'. |
| |
| The filter can be specified in two ways: |
| 1. Using a filterString - more information on this is available in the |
| Filter Language document attached to the HBASE-4176 JIRA |
| 2. Using the entire package name of the filter. |
| |
| If you wish to see metrics regarding the execution of the scan, the |
| ALL_METRICS boolean should be set to true. Alternatively, if you would |
| prefer to see only a subset of the metrics, the METRICS array can be |
| defined to include the names of only the metrics you care about. |
| |
| Some examples: |
| |
| hbase> scan 'hbase:meta' |
| hbase> scan 'hbase:meta', {COLUMNS => 'info:regioninfo'} |
| hbase> scan 'ns1:t1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'} |
| hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'} |
| hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804000, 1303668904000]} |
| hbase> scan 't1', {REVERSED => true} |
| hbase> scan 't1', {ALL_METRICS => true} |
| hbase> scan 't1', {METRICS => ['RPC_RETRIES', 'ROWS_FILTERED']} |
| hbase> scan 't1', {ROWPREFIXFILTER => 'row2', FILTER => " |
| (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"} |
| hbase> scan 't1', {FILTER => |
| org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)} |
| hbase> scan 't1', {CONSISTENCY => 'TIMELINE'} |
| hbase> scan 't1', {ISOLATION_LEVEL => 'READ_UNCOMMITTED'} |
| hbase> scan 't1', {MAX_RESULT_SIZE => 123456} |
| For setting the Operation Attributes |
| hbase> scan 't1', { COLUMNS => ['c1', 'c2'], ATTRIBUTES => {'mykey' => 'myvalue'}} |
| hbase> scan 't1', { COLUMNS => ['c1', 'c2'], AUTHORIZATIONS => ['PRIVATE','SECRET']} |
| For experts, there is an additional option -- CACHE_BLOCKS -- which |
| switches block caching for the scanner on (true) or off (false). By |
| default it is enabled. Examples: |
| |
| hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false} |
| |
| Also for experts, there is an advanced option -- RAW -- which instructs the |
| scanner to return all cells (including delete markers and uncollected deleted |
| cells). This option cannot be combined with requesting specific COLUMNS. |
| Disabled by default. Example: |
| |
| hbase> scan 't1', {RAW => true, VERSIONS => 10} |
| |
| There is yet another option -- READ_TYPE -- which instructs the scanner to |
| use a specific read type. Example: |
| |
| hbase> scan 't1', {READ_TYPE => 'PREAD'} |
| |
| Besides the default 'toStringBinary' format, 'scan' supports custom formatting |
| by column. A user can define a FORMATTER by adding it to the column name in |
| the scan specification. The FORMATTER can be stipulated: |
| |
| 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString) |
| 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'. |
| |
| Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: |
| hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt', |
| 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } |
| |
| Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a |
| formatter for all columns (including, all key parts) using the "FORMATTER" |
| and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is |
| "org.apache.hadoop.hbase.util.Bytes". |
| |
| hbase> scan 't1', {FORMATTER => 'toString'} |
| hbase> scan 't1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'} |
| |
| Scan can also be used directly from a table, by first getting a reference to a |
| table, like such: |
| |
| hbase> t = get_table 't' |
| hbase> t.scan |
| |
| Note in the above situation, you can still provide all the filtering, columns, |
| options, etc as described above. |
| |
| EOF |
| end |
| |
| def command(table, args = {}) |
| scan(table(table), args) |
| end |
| |
| # internal command that actually does the scanning |
| def scan(table, args = {}) |
| formatter.header(['ROW', 'COLUMN+CELL']) |
| |
| scan = table._hash_to_scan(args) |
| # actually do the scanning |
| @start_time = Time.now |
| count, is_stale = table._scan_internal(args, scan) do |row, cells| |
| formatter.row([row, cells]) |
| end |
| @end_time = Time.now |
| |
| formatter.footer(count, is_stale) |
| # if scan metrics were enabled, print them after the results |
| if !scan.nil? && scan.isScanMetricsEnabled |
| formatter.scan_metrics(scan.getScanMetrics, args['METRICS']) |
| end |
| end |
| end |
| end |
| end |
| |
| # Add the method table.scan that calls Scan.scan |
| ::Hbase::Table.add_shell_command('scan') |