hbase-shell/src/main/ruby/shell/commands/scan.rb - hbase - Git at Google

 #
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 module Shell
   module Commands
     class Scan < Command
       def help
         <<-EOF
 Scan a table; pass table name and optionally a dictionary of scanner
 specifications.  Scanner specifications may include one or more of:
 TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP,
 MAXLENGTH, COLUMNS, CACHE, RAW, VERSIONS, ALL_METRICS, METRICS,
 REGION_REPLICA_ID, ISOLATION_LEVEL, READ_TYPE, ALLOW_PARTIAL_RESULTS,
 BATCH or MAX_RESULT_SIZE

 If no columns are specified, all columns will be scanned.
 To scan all members of a column family, leave the qualifier empty as in
 'col_family'.

 The filter can be specified in two ways:
 1. Using a filterString - more information on this is available in the
 Filter Language document attached to the HBASE-4176 JIRA
 2. Using the entire package name of the filter.

 If you wish to see metrics regarding the execution of the scan, the
 ALL_METRICS boolean should be set to true. Alternatively, if you would
 prefer to see only a subset of the metrics, the METRICS array can be
 defined to include the names of only the metrics you care about.

 Some examples:

   hbase> scan 'hbase:meta'
   hbase> scan 'hbase:meta', {COLUMNS => 'info:regioninfo'}
   hbase> scan 'ns1:t1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
   hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
   hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804000, 1303668904000]}
   hbase> scan 't1', {REVERSED => true}
   hbase> scan 't1', {ALL_METRICS => true}
   hbase> scan 't1', {METRICS => ['RPC_RETRIES', 'ROWS_FILTERED']}
   hbase> scan 't1', {ROWPREFIXFILTER => 'row2', FILTER => "
     (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"}
   hbase> scan 't1', {FILTER =>
     org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
   hbase> scan 't1', {CONSISTENCY => 'TIMELINE'}
   hbase> scan 't1', {ISOLATION_LEVEL => 'READ_UNCOMMITTED'}
   hbase> scan 't1', {MAX_RESULT_SIZE => 123456}
 For setting the Operation Attributes
   hbase> scan 't1', { COLUMNS => ['c1', 'c2'], ATTRIBUTES => {'mykey' => 'myvalue'}}
   hbase> scan 't1', { COLUMNS => ['c1', 'c2'], AUTHORIZATIONS => ['PRIVATE','SECRET']}
 For experts, there is an additional option -- CACHE_BLOCKS -- which
 switches block caching for the scanner on (true) or off (false).  By
 default it is enabled.  Examples:

   hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false}

 Also for experts, there is an advanced option -- RAW -- which instructs the
 scanner to return all cells (including delete markers and uncollected deleted
 cells). This option cannot be combined with requesting specific COLUMNS.
 Disabled by default.  Example:

   hbase> scan 't1', {RAW => true, VERSIONS => 10}

 There is yet another option -- READ_TYPE -- which instructs the scanner to
 use a specific read type. Example:

   hbase> scan 't1', {READ_TYPE => 'PREAD'}

 Besides the default 'toStringBinary' format, 'scan' supports custom formatting
 by column.  A user can define a FORMATTER by adding it to the column name in
 the scan specification.  The FORMATTER can be stipulated:

  1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
  2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.

 Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
   hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
     'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }

 Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a
 formatter for all columns (including, all key parts) using the "FORMATTER"
 and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is
 "org.apache.hadoop.hbase.util.Bytes".

   hbase> scan 't1', {FORMATTER => 'toString'}
   hbase> scan 't1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'}

 Scan can also be used directly from a table, by first getting a reference to a
 table, like such:

   hbase> t = get_table 't'
   hbase> t.scan

 Note in the above situation, you can still provide all the filtering, columns,
 options, etc as described above.

 EOF
       end

       def command(table, args = {})
         scan(table(table), args)
       end

       # internal command that actually does the scanning
       def scan(table, args = {})
         formatter.header(['ROW', 'COLUMN+CELL'])

         scan = table._hash_to_scan(args)
         # actually do the scanning
         @start_time = Time.now
         count, is_stale = table._scan_internal(args, scan) do |row, cells|
           formatter.row([row, cells])
         end
         @end_time = Time.now

         formatter.footer(count, is_stale)
         # if scan metrics were enabled, print them after the results
         if !scan.nil? && scan.isScanMetricsEnabled
           formatter.scan_metrics(scan.getScanMetrics, args['METRICS'])
         end
       end
     end
   end
 end

 # Add the method table.scan that calls Scan.scan
 ::Hbase::Table.add_shell_command('scan')
	#
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	module Shell
	module Commands
	class Scan < Command
	def help
	<<-EOF
	Scan a table; pass table name and optionally a dictionary of scanner
	specifications. Scanner specifications may include one or more of:
	TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP,
	MAXLENGTH, COLUMNS, CACHE, RAW, VERSIONS, ALL_METRICS, METRICS,
	REGION_REPLICA_ID, ISOLATION_LEVEL, READ_TYPE, ALLOW_PARTIAL_RESULTS,
	BATCH or MAX_RESULT_SIZE

	If no columns are specified, all columns will be scanned.
	To scan all members of a column family, leave the qualifier empty as in
	'col_family'.

	The filter can be specified in two ways:
	1. Using a filterString - more information on this is available in the
	Filter Language document attached to the HBASE-4176 JIRA
	2. Using the entire package name of the filter.

	If you wish to see metrics regarding the execution of the scan, the
	ALL_METRICS boolean should be set to true. Alternatively, if you would
	prefer to see only a subset of the metrics, the METRICS array can be
	defined to include the names of only the metrics you care about.

	Some examples:

	hbase> scan 'hbase:meta'
	hbase> scan 'hbase:meta', {COLUMNS => 'info:regioninfo'}
	hbase> scan 'ns1:t1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
	hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
	hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804000, 1303668904000]}
	hbase> scan 't1', {REVERSED => true}
	hbase> scan 't1', {ALL_METRICS => true}
	hbase> scan 't1', {METRICS => ['RPC_RETRIES', 'ROWS_FILTERED']}
	hbase> scan 't1', {ROWPREFIXFILTER => 'row2', FILTER => "
	(QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))"}
	hbase> scan 't1', {FILTER =>
	org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
	hbase> scan 't1', {CONSISTENCY => 'TIMELINE'}
	hbase> scan 't1', {ISOLATION_LEVEL => 'READ_UNCOMMITTED'}
	hbase> scan 't1', {MAX_RESULT_SIZE => 123456}
	For setting the Operation Attributes
	hbase> scan 't1', { COLUMNS => ['c1', 'c2'], ATTRIBUTES => {'mykey' => 'myvalue'}}
	hbase> scan 't1', { COLUMNS => ['c1', 'c2'], AUTHORIZATIONS => ['PRIVATE','SECRET']}
	For experts, there is an additional option -- CACHE_BLOCKS -- which
	switches block caching for the scanner on (true) or off (false). By
	default it is enabled. Examples:

	hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false}

	Also for experts, there is an advanced option -- RAW -- which instructs the
	scanner to return all cells (including delete markers and uncollected deleted
	cells). This option cannot be combined with requesting specific COLUMNS.
	Disabled by default. Example:

	hbase> scan 't1', {RAW => true, VERSIONS => 10}

	There is yet another option -- READ_TYPE -- which instructs the scanner to
	use a specific read type. Example:

	hbase> scan 't1', {READ_TYPE => 'PREAD'}

	Besides the default 'toStringBinary' format, 'scan' supports custom formatting
	by column. A user can define a FORMATTER by adding it to the column name in
	the scan specification. The FORMATTER can be stipulated:

	1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
	2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.

	Example formatting cf:qualifier1 and cf:qualifier2 both as Integers:
	hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
	'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] }

	Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a
	formatter for all columns (including, all key parts) using the "FORMATTER"
	and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is
	"org.apache.hadoop.hbase.util.Bytes".

	hbase> scan 't1', {FORMATTER => 'toString'}
	hbase> scan 't1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'}

	Scan can also be used directly from a table, by first getting a reference to a
	table, like such:

	hbase> t = get_table 't'
	hbase> t.scan

	Note in the above situation, you can still provide all the filtering, columns,
	options, etc as described above.

	EOF
	end

	def command(table, args = {})
	scan(table(table), args)
	end

	# internal command that actually does the scanning
	def scan(table, args = {})
	formatter.header(['ROW', 'COLUMN+CELL'])

	scan = table._hash_to_scan(args)
	# actually do the scanning
	@start_time = Time.now
	count, is_stale = table._scan_internal(args, scan) do \|row, cells\|
	formatter.row([row, cells])
	end
	@end_time = Time.now

	formatter.footer(count, is_stale)
	# if scan metrics were enabled, print them after the results
	if !scan.nil? && scan.isScanMetricsEnabled
	formatter.scan_metrics(scan.getScanMetrics, args['METRICS'])
	end
	end
	end
	end
	end

	# Add the method table.scan that calls Scan.scan
	::Hbase::Table.add_shell_command('scan')