| # File passed to org.jruby.Main by bin/hbase. Pollutes jirb with hbase imports |
| # and hbase commands and then loads jirb. Outputs a banner that tells user |
| # where to find help, shell version, and loads up a custom hirb. |
| |
| # TODO: Add 'debug' support (client-side logs show in shell). Add it as |
| # command-line option and as command. |
| # TODO: Interrupt a table creation or a connection to a bad master. Currently |
| # has to time out. Below we've set down the retries for rpc and hbase but |
| # still can be annoying (And there seem to be times when we'll retry for |
| # ever regardless) |
| # TODO: Add support for listing and manipulating catalog tables, etc. |
| # TODO: Encoding; need to know how to go from ruby String to UTF-8 bytes |
| |
| # Run the java magic include and import basic HBase types that will help ease |
| # hbase hacking. |
| include Java |
| |
| # Some goodies for hirb. Should these be left up to the user's discretion? |
| require 'irb/completion' |
| |
| # Add the $HBASE_HOME/bin directory, the location of this script, to the ruby |
| # load path so I can load up my HBase ruby modules |
| $LOAD_PATH.unshift File.dirname($PROGRAM_NAME) |
| # Require formatter |
| require 'Formatter' |
| |
| # See if there are args for this shell. If any, read and then strip from ARGV |
| # so they don't go through to irb. Output shell 'usage' if user types '--help' |
| cmdline_help = <<HERE # HERE document output as shell usage |
| HBase Shell command-line options: |
| format Formatter for outputting results: console | html. Default: console |
| format-width Width of table outputs. Default: 110 characters. |
| -d | --debug Set DEBUG log levels. |
| HERE |
| found = [] |
| format = 'console' |
| format_width = 110 |
| script2run = nil |
| logLevel = org.apache.log4j.Level::ERROR |
| for arg in ARGV |
| if arg =~ /^--format=(.+)/i |
| format = $1 |
| if format =~ /^html$/i |
| raise NoMethodError.new("Not yet implemented") |
| elsif format =~ /^console$/i |
| # This is default |
| else |
| raise ArgumentError.new("Unsupported format " + arg) |
| end |
| found.push(arg) |
| elsif arg =~ /^--format-width=(.+)/i |
| format_width = $1.to_i |
| found.push(arg) |
| elsif arg == '-h' || arg == '--help' |
| puts cmdline_help |
| exit |
| elsif arg == '-d' || arg == '--debug' |
| logLevel = org.apache.log4j.Level::DEBUG |
| $fullBackTrace = true |
| puts "Setting DEBUG log level..." |
| else |
| # Presume it a script. Save it off for running later below |
| # after we've set up some environment. |
| script2run = arg |
| found.push(arg) |
| # Presume that any other args are meant for the script. |
| break |
| end |
| end |
| for arg in found |
| ARGV.delete(arg) |
| end |
| |
| # Presume console format. |
| # Formatter takes an :output_stream parameter, if you don't want STDOUT. |
| @formatter = Formatter::Console.new(:format_width => format_width) |
| # TODO, etc. @formatter = Formatter::XHTML.new(STDOUT) |
| |
| # Set logging level to avoid verboseness |
| logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper") |
| logger.setLevel(logLevel); |
| logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase") |
| logger.setLevel(logLevel); |
| # Require HBase now after setting log levels |
| require 'HBase' |
| |
| # Setup the HBase module. Create a configuration. |
| # Turn off retries in hbase and ipc. Human doesn't want to wait on N retries. |
| @configuration = org.apache.hadoop.hbase.HBaseConfiguration.new() |
| @configuration.setInt("hbase.client.retries.number", 7) |
| @configuration.setInt("ipc.client.connect.max.retries", 3) |
| |
| # Do lazy create of admin because if we are pointed at bad master, it will hang |
| # shell on startup trying to connect. |
| @admin = nil |
| |
| # Promote hbase constants to be constants of this module so can |
| # be used bare as keys in 'create', 'alter', etc. To see constants |
| # in IRB, type 'Object.constants'. Don't promote defaults because |
| # flattens all types to String. Can be confusing. |
| def promoteConstants(constants) |
| # The constants to import are all in uppercase |
| for c in constants |
| if c == c.upcase |
| eval("%s = \"%s\"" % [c, c]) unless c =~ /DEFAULT_.*/ |
| end |
| end |
| end |
| promoteConstants(org.apache.hadoop.hbase.HColumnDescriptor.constants) |
| promoteConstants(org.apache.hadoop.hbase.HTableDescriptor.constants) |
| promoteConstants(HBase.constants) |
| |
| |
| # Start of the hbase shell commands. |
| |
| # General shell methods |
| |
| def tools |
| # Help for hbase shell surgery tools |
| h = <<HERE |
| HBASE SURGERY TOOLS: |
| close_region Close a single region. Optionally specify regionserver. |
| Examples: |
| |
| hbase> close_region 'REGIONNAME' |
| hbase> close_region 'REGIONNAME', 'REGIONSERVER_IP:PORT' |
| |
| compact Compact all regions in passed table or pass a region row |
| to compact an individual region |
| |
| disable_region Disable a single region |
| |
| enable_region Enable a single region. For example: |
| |
| hbase> enable_region 'REGIONNAME' |
| |
| flush Flush all regions in passed table or pass a region row to |
| flush an individual region. For example: |
| |
| hbase> flush 'TABLENAME' |
| hbase> flush 'REGIONNAME' |
| |
| major_compact Run major compaction on passed table or pass a region row |
| to major compact an individual region |
| |
| split Split table or pass a region row to split individual region |
| |
| zk Low level ZooKeeper surgery tools. Type "zk 'help'" for more |
| information (Yes, you must quote 'help'). |
| |
| zk_dump Dump status of HBase cluster as seen by ZooKeeper. |
| |
| Above commands are for 'experts'-only as misuse can damage an install |
| HERE |
| puts h |
| end |
| |
| def help |
| # Output help. Help used to be a dictionary of name to short and long |
| # descriptions emitted using Formatters but awkward getting it to show |
| # nicely on console; instead use a HERE document. Means we can't |
| # output help other than on console but not an issue at the moment. |
| # TODO: Add help to the commands themselves rather than keep it distinct |
| h = <<HERE |
| HBASE SHELL COMMANDS: |
| alter Alter column family schema; pass table name and a dictionary |
| specifying new column family schema. Dictionaries are described |
| below in the GENERAL NOTES section. Dictionary must include name |
| of column family to alter. For example, |
| |
| To change or add the 'f1' column family in table 't1' from defaults |
| to instead keep a maximum of 5 cell VERSIONS, do: |
| hbase> alter 't1', {NAME => 'f1', VERSIONS => 5} |
| |
| To delete the 'f1' column family in table 't1', do: |
| hbase> alter 't1', {NAME => 'f1', METHOD => 'delete'} |
| |
| You can also change table-scope attributes like MAX_FILESIZE |
| MEMSTORE_FLUSHSIZE and READONLY. |
| |
| For example, to change the max size of a family to 128MB, do: |
| hbase> alter 't1', {METHOD => 'table_att', MAX_FILESIZE => '134217728'} |
| |
| count Count the number of rows in a table. This operation may take a LONG |
| time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a |
| counting mapreduce job). Current count is shown every 1000 rows by |
| default. Count interval may be optionally specified. Examples: |
| |
| hbase> count 't1' |
| hbase> count 't1', 100000 |
| |
| create Create table; pass table name, a dictionary of specifications per |
| column family, and optionally a dictionary of table configuration. |
| Dictionaries are described below in the GENERAL NOTES section. |
| Examples: |
| |
| hbase> create 't1', {NAME => 'f1', VERSIONS => 5} |
| hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'} |
| hbase> # The above in shorthand would be the following: |
| hbase> create 't1', 'f1', 'f2', 'f3' |
| hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, \\ |
| BLOCKCACHE => true} |
| |
| describe Describe the named table: e.g. "hbase> describe 't1'" |
| |
| delete Put a delete cell value at specified table/row/column and optionally |
| timestamp coordinates. Deletes must match the deleted cell's |
| coordinates exactly. When scanning, a delete cell suppresses older |
| versions. Takes arguments like the 'put' command described below |
| |
| deleteall Delete all cells in a given row; pass a table name, row, and optionally |
| a column and timestamp |
| |
| disable Disable the named table: e.g. "hbase> disable 't1'" |
| |
| drop Drop the named table. Table must first be disabled. If table has |
| more than one region, run a major compaction on .META.: |
| |
| hbase> major_compact ".META." |
| |
| enable Enable the named table |
| |
| exists Does the named table exist? e.g. "hbase> exists 't1'" |
| |
| exit Type "hbase> exit" to leave the HBase Shell |
| |
| get Get row or cell contents; pass table name, row, and optionally |
| a dictionary of column(s), timestamp and versions. Examples: |
| |
| hbase> get 't1', 'r1' |
| hbase> get 't1', 'r1', {COLUMN => 'c1'} |
| hbase> get 't1', 'r1', {COLUMN => ['c1', 'c2', 'c3']} |
| hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1} |
| hbase> get 't1', 'r1', {COLUMN => 'c1', TIMESTAMP => ts1, \\ |
| VERSIONS => 4} |
| |
| incr Increments a cell 'value' at specified table/row/column coordinates. |
| To increment a cell value in table 't1' at row 'r1' under column |
| 'c1' by 1 (can be omitted) or 10 do: |
| |
| hbase> incr 't1', 'r1', 'c1' |
| hbase> incr 't1', 'r1', 'c1', 1 |
| hbase> incr 't1', 'r1', 'c1', 10 |
| |
| list List all tables in hbase |
| |
| put Put a cell 'value' at specified table/row/column and optionally |
| timestamp coordinates. To put a cell value into table 't1' at |
| row 'r1' under column 'c1' marked with the time 'ts1', do: |
| |
| hbase> put 't1', 'r1', 'c1', 'value', ts1 |
| |
| tools Listing of hbase surgery tools |
| |
| scan Scan a table; pass table name and optionally a dictionary of scanner |
| specifications. Scanner specifications may include one or more of |
| the following: LIMIT, STARTROW, STOPROW, TIMESTAMP, or COLUMNS. If |
| no columns are specified, all columns will be scanned. To scan all |
| members of a column family, leave the qualifier empty as in |
| 'col_family:'. Examples: |
| |
| hbase> scan '.META.' |
| hbase> scan '.META.', {COLUMNS => 'info:regioninfo'} |
| hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, \\ |
| STARTROW => 'xyz'} |
| |
| For experts, there is an additional option -- CACHE_BLOCKS -- which |
| switches block caching for the scanner on (true) or off (false). By |
| default it is enabled. Examples: |
| |
| hbase> scan 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false} |
| |
| status Show cluster status. Can be 'summary', 'simple', or 'detailed'. The |
| default is 'summary'. Examples: |
| |
| hbase> status |
| hbase> status 'simple' |
| hbase> status 'summary' |
| hbase> status 'detailed' |
| |
| shutdown Shut down the cluster. |
| |
| truncate Disables, drops and recreates the specified table. |
| |
| version Output this HBase version |
| |
| GENERAL NOTES: |
| Quote all names in the hbase shell such as table and column names. Don't |
| forget commas delimit command parameters. Type <RETURN> after entering a |
| command to run it. Dictionaries of configuration used in the creation and |
| alteration of tables are ruby Hashes. They look like this: |
| |
| {'key1' => 'value1', 'key2' => 'value2', ...} |
| |
| They are opened and closed with curley-braces. Key/values are delimited by |
| the '=>' character combination. Usually keys are predefined constants such as |
| NAME, VERSIONS, COMPRESSION, etc. Constants do not need to be quoted. Type |
| 'Object.constants' to see a (messy) list of all constants in the environment. |
| |
| In case you are using binary keys or values and need to enter them into the |
| shell then use double-quotes to make use of hexadecimal for example: |
| |
| hbase> get 't1', "key\\x03\\x3f\\xcd" |
| hbase> get 't1', "key\\003\\023\\011" |
| hbase> put 't1', "test\\xef\\xff", 'f1:', "\\x01\\x33\\x40" |
| |
| Using the double-quote notation you can directly use the values output by the |
| shell for example during a "scan" call. |
| |
| This HBase shell is the JRuby IRB with the above HBase-specific commands added. |
| For more on the HBase Shell, see http://wiki.apache.org/hadoop/Hbase/Shell |
| HERE |
| puts h |
| end |
| |
| def version |
| # Output version. |
| puts "Version: #{org.apache.hadoop.hbase.util.VersionInfo.getVersion()},\ |
| r#{org.apache.hadoop.hbase.util.VersionInfo.getRevision()},\ |
| #{org.apache.hadoop.hbase.util.VersionInfo.getDate()}" |
| end |
| |
| def shutdown |
| admin().shutdown() |
| end |
| |
| # DDL |
| |
| def admin() |
| @admin = HBase::Admin.new(@configuration, @formatter) unless @admin |
| @admin |
| end |
| |
| def table(table) |
| # Create new one each time |
| HBase::Table.new(@configuration, table, @formatter) |
| end |
| |
| def create(table, *args) |
| admin().create(table, args) |
| end |
| |
| def drop(table) |
| admin().drop(table) |
| end |
| |
| def alter(table, args) |
| admin().alter(table, args) |
| end |
| |
| # Administration |
| |
| def list |
| admin().list() |
| end |
| |
| def describe(table) |
| admin().describe(table) |
| end |
| |
| def enable(table) |
| admin().enable(table) |
| end |
| |
| def disable(table) |
| admin().disable(table) |
| end |
| |
| def enable_region(regionName) |
| admin().enable_region(regionName) |
| end |
| |
| def disable_region(regionName) |
| admin().disable_region(regionName) |
| end |
| |
| def exists(table) |
| admin().exists(table) |
| end |
| |
| def truncate(table) |
| admin().truncate(table) |
| end |
| |
| def close_region(regionName, server = nil) |
| admin().close_region(regionName, server) |
| end |
| |
| def status(format = 'summary') |
| admin().status(format) |
| end |
| |
| def zk(*args) |
| admin().zk(args) |
| end |
| |
| def zk_dump |
| admin().zk_dump |
| end |
| |
| # CRUD |
| |
| def get(table, row, args = {}) |
| table(table).get(row, args) |
| end |
| |
| def put(table, row, column, value, timestamp = nil) |
| table(table).put(row, column, value, timestamp) |
| end |
| |
| def incr(table, row, column, value = nil) |
| table(table).incr(row, column, value) |
| end |
| |
| def scan(table, args = {}) |
| table(table).scan(args) |
| end |
| |
| def delete(table, row, column, |
| timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP) |
| table(table).delete(row, column, timestamp) |
| end |
| |
| def deleteall(table, row, column = nil, |
| timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP) |
| table(table).deleteall(row, column, timestamp) |
| end |
| |
| def count(table, interval = 1000) |
| table(table).count(interval) |
| end |
| |
| def flush(tableNameOrRegionName) |
| admin().flush(tableNameOrRegionName) |
| end |
| |
| def compact(tableNameOrRegionName) |
| admin().compact(tableNameOrRegionName) |
| end |
| |
| def major_compact(tableNameOrRegionName) |
| admin().major_compact(tableNameOrRegionName) |
| end |
| |
| def split(tableNameOrRegionName) |
| admin().split(tableNameOrRegionName) |
| end |
| |
| |
| # If script2run, try running it. Will go on to run the shell unless |
| # script calls 'exit' or 'exit 0' or 'exit errcode'. |
| load(script2run) if script2run |
| |
| |
| # Output a banner message that tells users where to go for help |
| puts <<HERE |
| HBase Shell; enter 'help<RETURN>' for list of supported commands. |
| HERE |
| version |
| |
| require "irb" |
| |
| module IRB |
| # Subclass of IRB so can intercept methods |
| class HIRB < Irb |
| def initialize |
| # This is ugly. Our 'help' method above provokes the following message |
| # on irb construction: 'irb: warn: can't alias help from irb_help.' |
| # Below, we reset the output so its pointed at /dev/null during irb |
| # construction just so this message does not come out after we emit |
| # the banner. Other attempts at playing with the hash of methods |
| # down in IRB didn't seem to work. I think the worst thing that can |
| # happen is the shell exiting because of failed IRB construction with |
| # no error (though we're not blanking STDERR) |
| begin |
| f = File.open("/dev/null", "w") |
| $stdout = f |
| super |
| ensure |
| f.close() |
| $stdout = STDOUT |
| end |
| end |
| |
| def output_value |
| # Suppress output if last_value is 'nil' |
| # Otherwise, when user types help, get ugly 'nil' |
| # after all output. |
| if @context.last_value != nil |
| super |
| end |
| end |
| end |
| |
| def IRB.start(ap_path = nil) |
| $0 = File::basename(ap_path, ".rb") if ap_path |
| |
| IRB.setup(ap_path) |
| @CONF[:IRB_NAME] = 'hbase' |
| @CONF[:AP_NAME] = 'hbase' |
| @CONF[:BACK_TRACE_LIMIT] = 0 unless $fullBackTrace |
| |
| if @CONF[:SCRIPT] |
| hirb = HIRB.new(nil, @CONF[:SCRIPT]) |
| else |
| hirb = HIRB.new |
| end |
| |
| @CONF[:IRB_RC].call(hirb.context) if @CONF[:IRB_RC] |
| @CONF[:MAIN_CONTEXT] = hirb.context |
| |
| catch(:IRB_EXIT) do |
| hirb.eval_input |
| end |
| end |
| end |
| |
| IRB.start |