| # |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| module Shell |
| module Commands |
| class Scan < Command |
| def help |
| return <<-EOF |
| Scan a table; pass table name and optionally a dictionary of scanner |
| specifications. Scanner specifications may include one or more of: |
| TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP, |
| MAXLENGTH or COLUMNS, CACHE or RAW, VERSIONS |
| |
| If no columns are specified, all columns will be scanned. |
| To scan all members of a column family, leave the qualifier empty as in |
| 'col_family'. |
| |
| The filter can be specified in two ways: |
| 1. Using a filterString - more information on this is available in the |
| Filter Language document attached to the HBASE-4176 JIRA |
| 2. Using the entire package name of the filter. |
| |
| Some examples: |
| |
| hbase> scan 'hbase:meta' |
| hbase> scan 'hbase:meta', {COLUMNS => 'info:regioninfo'} |
| hbase> scan 'ns1:t1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'} |
| hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'} |
| hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]} |
| hbase> scan 't1', {REVERSED => true} |
| hbase> scan 't1', {ROWPREFIXFILTER => 'row2', FILTER => " |
| (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"} |
| hbase> scan 't1', {FILTER => |
| org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)} |
| hbase> scan 't1', {CONSISTENCY => 'TIMELINE'} |
| For setting the Operation Attributes |
| hbase> scan 't1', { COLUMNS => ['c1', 'c2'], ATTRIBUTES => {'mykey' => 'myvalue'}} |
| hbase> scan 't1', { COLUMNS => ['c1', 'c2'], AUTHORIZATIONS => ['PRIVATE','SECRET']} |
| For experts, there is an additional option -- CACHE_BLOCKS -- which |
| switches block caching for the scanner on (true) or off (false). By |
| default it is enabled. Examples: |
| |
| hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false} |
| |
| Also for experts, there is an advanced option -- RAW -- which instructs the |
| scanner to return all cells (including delete markers and uncollected deleted |
| cells). This option cannot be combined with requesting specific COLUMNS. |
| Disabled by default. Example: |
| |
| hbase> scan 't1', {RAW => true, VERSIONS => 10} |
| |
| Besides the default 'toStringBinary' format, 'scan' supports custom formatting |
| by column. A user can define a FORMATTER by adding it to the column name in |
| the scan specification. The FORMATTER can be stipulated: |
| |
| 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString) |
| 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'. |
| |
| Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: |
| hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt', |
| 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } |
| |
| Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot |
| specify a FORMATTER for all columns of a column family. |
| |
| Scan can also be used directly from a table, by first getting a reference to a |
| table, like such: |
| |
| hbase> t = get_table 't' |
| hbase> t.scan |
| |
| Note in the above situation, you can still provide all the filtering, columns, |
| options, etc as described above. |
| |
| EOF |
| end |
| |
| def command(table, args = {}) |
| scan(table(table), args) |
| end |
| |
| #internal command that actually does the scanning |
| def scan(table, args = {}) |
| now = Time.now |
| formatter.header(["ROW", "COLUMN+CELL"]) |
| |
| #actually do the scanning |
| count = table._scan_internal(args) do |row, cells| |
| formatter.row([ row, cells ]) |
| end |
| |
| formatter.footer(now, count) |
| end |
| end |
| end |
| end |
| |
| #Add the method table.scan that calls Scan.scan |
| ::Hbase::Table.add_shell_command("scan") |