| //// |
| /** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| //// |
| |
| [[external_apis]] |
| = Apache HBase External APIs |
| :doctype: book |
| :numbered: |
| :toc: left |
| :icons: font |
| :experimental: |
| |
| This chapter will cover access to Apache HBase either through non-Java languages and |
| through custom protocols. For information on using the native HBase APIs, refer to |
| link:http://hbase.apache.org/apidocs/index.html[User API Reference] and the |
| <<hbase_apis,HBase APIs>> chapter. |
| |
| == REST |
| |
| Representational State Transfer (REST) was introduced in 2000 in the doctoral |
| dissertation of Roy Fielding, one of the principal authors of the HTTP specification. |
| |
| REST itself is out of the scope of this documentation, but in general, REST allows |
| client-server interactions via an API that is tied to the URL itself. This section |
| discusses how to configure and run the REST server included with HBase, which exposes |
| HBase tables, rows, cells, and metadata as URL specified resources. |
| There is also a nice series of blogs on |
| link:http://blog.cloudera.com/blog/2013/03/how-to-use-the-apache-hbase-rest-interface-part-1/[How-to: Use the Apache HBase REST Interface] |
| by Jesse Anderson. |
| |
| === Starting and Stopping the REST Server |
| |
| The included REST server can run as a daemon which starts an embedded Jetty |
| servlet container and deploys the servlet into it. Use one of the following commands |
| to start the REST server in the foreground or background. The port is optional, and |
| defaults to 8080. |
| |
| [source, bash] |
| ---- |
| # Foreground |
| $ bin/hbase rest start -p <port> |
| |
| # Background, logging to a file in $HBASE_LOGS_DIR |
| $ bin/hbase-daemon.sh start rest -p <port> |
| ---- |
| |
| To stop the REST server, use Ctrl-C if you were running it in the foreground, or the |
| following command if you were running it in the background. |
| |
| [source, bash] |
| ---- |
| $ bin/hbase-daemon.sh stop rest |
| ---- |
| |
| === Configuring the REST Server and Client |
| |
| For information about configuring the REST server and client for SSL, as well as `doAs` |
| impersonation for the REST server, see <<security.gateway.thrift>> and other portions |
| of the <<security>> chapter. |
| |
| === Using REST Endpoints |
| |
| The following examples use the placeholder server pass:[http://example.com:8000], and |
| the following commands can all be run using `curl` or `wget` commands. You can request |
| plain text (the default), XML , or JSON output by adding no header for plain text, |
| or the header "Accept: text/xml" for XML, "Accept: application/json" for JSON, or |
| "Accept: application/x-protobuf" to for protocol buffers. |
| |
| NOTE: Unless specified, use `GET` requests for queries, `PUT` or `POST` requests for |
| creation or mutation, and `DELETE` for deletion. |
| |
| .Cluster-Wide Endpoints |
| [options="header", cols="2m,m,3d,6l"] |
| |=== |
| |Endpoint |
| |HTTP Verb |
| |Description |
| |Example |
| |
| |/version/cluster |
| |GET |
| |Version of HBase running on this cluster |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/version/cluster" |
| |
| |/status/cluster |
| |GET |
| |Cluster status |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/status/cluster" |
| |
| |/ |
| |GET |
| |List of all non-system tables |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/" |
| |
| |=== |
| |
| .Namespace Endpoints |
| [options="header", cols="2m,m,3d,6l"] |
| |=== |
| |Endpoint |
| |HTTP Verb |
| |Description |
| |Example |
| |
| |/namespaces |
| |GET |
| |List all namespaces |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/namespaces/" |
| |
| |/namespaces/_namespace_ |
| |GET |
| |Describe a specific namespace |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/namespaces/special_ns" |
| |
| |/namespaces/_namespace_ |
| |POST |
| |Create a new namespace |
| |curl -vi -X POST \ |
| -H "Accept: text/xml" \ |
| "example.com:8000/namespaces/special_ns" |
| |
| |/namespaces/_namespace_/tables |
| |GET |
| |List all tables in a specific namespace |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/namespaces/special_ns/tables" |
| |
| |/namespaces/_namespace_ |
| |PUT |
| |Alter an existing namespace. Currently not used. |
| |curl -vi -X PUT \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/namespaces/special_ns |
| |
| |/namespaces/_namespace_ |
| |DELETE |
| |Delete a namespace. The namespace must be empty. |
| |curl -vi -X DELETE \ |
| -H "Accept: text/xml" \ |
| "example.com:8000/namespaces/special_ns" |
| |
| |=== |
| |
| .Table Endpoints |
| [options="header", cols="2m,m,3d,6l"] |
| |=== |
| |Endpoint |
| |HTTP Verb |
| |Description |
| |Example |
| |
| |/_table_/schema |
| |GET |
| |Describe the schema of the specified table. |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/schema" |
| |
| |/_table_/schema |
| |POST |
| |Create a new table, or replace an existing table's schema |
| |curl -vi -X POST \ |
| -H "Accept: text/xml" \ |
| -H "Content-Type: text/xml" \ |
| -d '<?xml version="1.0" encoding="UTF-8"?><TableSchema name="users"><ColumnSchema name="cf" /></TableSchema>' \ |
| "http://example.com:8000/users/schema" |
| |
| |/_table_/schema |
| |PUT |
| |Update an existing table with the provided schema fragment |
| |curl -vi -X PUT \ |
| -H "Accept: text/xml" \ |
| -H "Content-Type: text/xml" \ |
| -d '<?xml version="1.0" encoding="UTF-8"?><TableSchema name="users"><ColumnSchema name="cf" KEEP_DELETED_CELLS="true" /></TableSchema>' \ |
| "http://example.com:8000/users/schema" |
| |
| |/_table_/schema |
| |DELETE |
| |Delete the table. You must use the `/_table_/schema` endpoint, not just `/_table_/`. |
| |curl -vi -X DELETE \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/schema" |
| |
| |/_table_/regions |
| |GET |
| |List the table regions |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/regions |
| |=== |
| |
| .Endpoints for `Get` Operations |
| [options="header", cols="2m,m,3d,6l"] |
| |=== |
| |Endpoint |
| |HTTP Verb |
| |Description |
| |Example |
| |
| |/_table_/_row_/_column:qualifier_/_timestamp_ |
| |GET |
| |Get the value of a single row. Values are Base-64 encoded. |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/row1" |
| |
| curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/row1/cf:a/1458586888395" |
| |
| |/_table_/_row_/_column:qualifier_ |
| |GET |
| |Get the value of a single column. Values are Base-64 encoded. |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/row1/cf:a" |
| |
| curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/row1/cf:a/" |
| |
| |/_table_/_row_/_column:qualifier_/?v=_number_of_versions_ |
| |GET |
| |Multi-Get a specified number of versions of a given cell. Values are Base-64 encoded. |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/row1/cf:a?v=2" |
| |
| |=== |
| |
| .Endpoints for `Scan` Operations |
| [options="header", cols="2m,m,3d,6l"] |
| |=== |
| |Endpoint |
| |HTTP Verb |
| |Description |
| |Example |
| |
| |/_table_/scanner/ |
| |PUT |
| |Get a Scanner object. Required by all other Scan operations. Adjust the batch parameter |
| to the number of rows the scan should return in a batch. See the next example for |
| adding filters to your scanner. The scanner endpoint URL is returned as the `Location` |
| in the HTTP response. The other examples in this table assume that the scanner endpoint |
| is `\http://example.com:8000/users/scanner/145869072824375522207`. |
| |curl -vi -X PUT \ |
| -H "Accept: text/xml" \ |
| -H "Content-Type: text/xml" \ |
| -d '<Scanner batch="1"/>' \ |
| "http://example.com:8000/users/scanner/" |
| |
| |/_table_/scanner/ |
| |PUT |
| |To supply filters to the Scanner object or configure the |
| Scanner in any other way, you can create a text file and add |
| your filter to the file. For example, to return only rows for |
| which keys start with <codeph>u123</codeph> and use a batch size |
| of 100, the filter file would look like this: |
| |
| +++ |
| <pre> |
| <Scanner batch="100"> |
| <filter> |
| { |
| "type": "PrefixFilter", |
| "value": "u123" |
| } |
| </filter> |
| </Scanner> |
| </pre> |
| +++ |
| |
| Pass the file to the `-d` argument of the `curl` request. |
| |curl -vi -X PUT \ |
| -H "Accept: text/xml" \ |
| -H "Content-Type:text/xml" \ |
| -d @filter.txt \ |
| "http://example.com:8000/users/scanner/" |
| |
| |/_table_/scanner/_scanner-id_ |
| |GET |
| |Get the next batch from the scanner. Cell values are byte-encoded. If the scanner |
| has been exhausted, HTTP status `204` is returned. |
| |curl -vi -X GET \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/scanner/145869072824375522207" |
| |
| |_table_/scanner/_scanner-id_ |
| |DELETE |
| |Deletes the scanner and frees the resources it used. |
| |curl -vi -X DELETE \ |
| -H "Accept: text/xml" \ |
| "http://example.com:8000/users/scanner/145869072824375522207" |
| |
| |=== |
| |
| .Endpoints for `Put` Operations |
| [options="header", cols="2m,m,3d,6l"] |
| |=== |
| |Endpoint |
| |HTTP Verb |
| |Description |
| |Example |
| |
| |/_table_/_row_key_ |
| |PUT |
| |Write a row to a table. The row, column qualifier, and value must each be Base-64 |
| encoded. To encode a string, use the `base64` command-line utility. To decode the |
| string, use `base64 -d`. The payload is in the `--data` argument, and the `/users/fakerow` |
| value is a placeholder. Insert multiple rows by adding them to the `<CellSet>` |
| element. You can also save the data to be inserted to a file and pass it to the `-d` |
| parameter with syntax like `-d @filename.txt`. |
| |curl -vi -X PUT \ |
| -H "Accept: text/xml" \ |
| -H "Content-Type: text/xml" \ |
| -d '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><CellSet><Row key="cm93NQo="><Cell column="Y2Y6ZQo=">dmFsdWU1Cg==</Cell></Row></CellSet>' \ |
| "http://example.com:8000/users/fakerow" |
| |
| curl -vi -X PUT \ |
| -H "Accept: text/json" \ |
| -H "Content-Type: text/json" \ |
| -d '{"Row":[{"key":"cm93NQo=", "Cell": [{"column":"Y2Y6ZQo=", "$":"dmFsdWU1Cg=="}]}]}'' \ |
| "example.com:8000/users/fakerow" |
| |
| |=== |
| [[xml_schema]] |
| === REST XML Schema |
| |
| [source,xml] |
| ---- |
| <schema xmlns="http://www.w3.org/2001/XMLSchema" xmlns:tns="RESTSchema"> |
| |
| <element name="Version" type="tns:Version"></element> |
| |
| <complexType name="Version"> |
| <attribute name="REST" type="string"></attribute> |
| <attribute name="JVM" type="string"></attribute> |
| <attribute name="OS" type="string"></attribute> |
| <attribute name="Server" type="string"></attribute> |
| <attribute name="Jersey" type="string"></attribute> |
| </complexType> |
| |
| <element name="TableList" type="tns:TableList"></element> |
| |
| <complexType name="TableList"> |
| <sequence> |
| <element name="table" type="tns:Table" maxOccurs="unbounded" minOccurs="1"></element> |
| </sequence> |
| </complexType> |
| |
| <complexType name="Table"> |
| <sequence> |
| <element name="name" type="string"></element> |
| </sequence> |
| </complexType> |
| |
| <element name="TableInfo" type="tns:TableInfo"></element> |
| |
| <complexType name="TableInfo"> |
| <sequence> |
| <element name="region" type="tns:TableRegion" maxOccurs="unbounded" minOccurs="1"></element> |
| </sequence> |
| <attribute name="name" type="string"></attribute> |
| </complexType> |
| |
| <complexType name="TableRegion"> |
| <attribute name="name" type="string"></attribute> |
| <attribute name="id" type="int"></attribute> |
| <attribute name="startKey" type="base64Binary"></attribute> |
| <attribute name="endKey" type="base64Binary"></attribute> |
| <attribute name="location" type="string"></attribute> |
| </complexType> |
| |
| <element name="TableSchema" type="tns:TableSchema"></element> |
| |
| <complexType name="TableSchema"> |
| <sequence> |
| <element name="column" type="tns:ColumnSchema" maxOccurs="unbounded" minOccurs="1"></element> |
| </sequence> |
| <attribute name="name" type="string"></attribute> |
| <anyAttribute></anyAttribute> |
| </complexType> |
| |
| <complexType name="ColumnSchema"> |
| <attribute name="name" type="string"></attribute> |
| <anyAttribute></anyAttribute> |
| </complexType> |
| |
| <element name="CellSet" type="tns:CellSet"></element> |
| |
| <complexType name="CellSet"> |
| <sequence> |
| <element name="row" type="tns:Row" maxOccurs="unbounded" minOccurs="1"></element> |
| </sequence> |
| </complexType> |
| |
| <element name="Row" type="tns:Row"></element> |
| |
| <complexType name="Row"> |
| <sequence> |
| <element name="key" type="base64Binary"></element> |
| <element name="cell" type="tns:Cell" maxOccurs="unbounded" minOccurs="1"></element> |
| </sequence> |
| </complexType> |
| |
| <element name="Cell" type="tns:Cell"></element> |
| |
| <complexType name="Cell"> |
| <sequence> |
| <element name="value" maxOccurs="1" minOccurs="1"> |
| <simpleType><restriction base="base64Binary"> |
| </simpleType> |
| </element> |
| </sequence> |
| <attribute name="column" type="base64Binary" /> |
| <attribute name="timestamp" type="int" /> |
| </complexType> |
| |
| <element name="Scanner" type="tns:Scanner"></element> |
| |
| <complexType name="Scanner"> |
| <sequence> |
| <element name="column" type="base64Binary" minOccurs="0" maxOccurs="unbounded"></element> |
| </sequence> |
| <sequence> |
| <element name="filter" type="string" minOccurs="0" maxOccurs="1"></element> |
| </sequence> |
| <attribute name="startRow" type="base64Binary"></attribute> |
| <attribute name="endRow" type="base64Binary"></attribute> |
| <attribute name="batch" type="int"></attribute> |
| <attribute name="startTime" type="int"></attribute> |
| <attribute name="endTime" type="int"></attribute> |
| </complexType> |
| |
| <element name="StorageClusterVersion" type="tns:StorageClusterVersion" /> |
| |
| <complexType name="StorageClusterVersion"> |
| <attribute name="version" type="string"></attribute> |
| </complexType> |
| |
| <element name="StorageClusterStatus" |
| type="tns:StorageClusterStatus"> |
| </element> |
| |
| <complexType name="StorageClusterStatus"> |
| <sequence> |
| <element name="liveNode" type="tns:Node" |
| maxOccurs="unbounded" minOccurs="0"> |
| </element> |
| <element name="deadNode" type="string" maxOccurs="unbounded" |
| minOccurs="0"> |
| </element> |
| </sequence> |
| <attribute name="regions" type="int"></attribute> |
| <attribute name="requests" type="int"></attribute> |
| <attribute name="averageLoad" type="float"></attribute> |
| </complexType> |
| |
| <complexType name="Node"> |
| <sequence> |
| <element name="region" type="tns:Region" |
| maxOccurs="unbounded" minOccurs="0"> |
| </element> |
| </sequence> |
| <attribute name="name" type="string"></attribute> |
| <attribute name="startCode" type="int"></attribute> |
| <attribute name="requests" type="int"></attribute> |
| <attribute name="heapSizeMB" type="int"></attribute> |
| <attribute name="maxHeapSizeMB" type="int"></attribute> |
| </complexType> |
| |
| <complexType name="Region"> |
| <attribute name="name" type="base64Binary"></attribute> |
| <attribute name="stores" type="int"></attribute> |
| <attribute name="storefiles" type="int"></attribute> |
| <attribute name="storefileSizeMB" type="int"></attribute> |
| <attribute name="memstoreSizeMB" type="int"></attribute> |
| <attribute name="storefileIndexSizeMB" type="int"></attribute> |
| </complexType> |
| |
| </schema> |
| ---- |
| |
| [[protobufs_schema]] |
| === REST Protobufs Schema |
| |
| [source,json] |
| ---- |
| message Version { |
| optional string restVersion = 1; |
| optional string jvmVersion = 2; |
| optional string osVersion = 3; |
| optional string serverVersion = 4; |
| optional string jerseyVersion = 5; |
| } |
| |
| message StorageClusterStatus { |
| message Region { |
| required bytes name = 1; |
| optional int32 stores = 2; |
| optional int32 storefiles = 3; |
| optional int32 storefileSizeMB = 4; |
| optional int32 memstoreSizeMB = 5; |
| optional int32 storefileIndexSizeMB = 6; |
| } |
| message Node { |
| required string name = 1; // name:port |
| optional int64 startCode = 2; |
| optional int32 requests = 3; |
| optional int32 heapSizeMB = 4; |
| optional int32 maxHeapSizeMB = 5; |
| repeated Region regions = 6; |
| } |
| // node status |
| repeated Node liveNodes = 1; |
| repeated string deadNodes = 2; |
| // summary statistics |
| optional int32 regions = 3; |
| optional int32 requests = 4; |
| optional double averageLoad = 5; |
| } |
| |
| message TableList { |
| repeated string name = 1; |
| } |
| |
| message TableInfo { |
| required string name = 1; |
| message Region { |
| required string name = 1; |
| optional bytes startKey = 2; |
| optional bytes endKey = 3; |
| optional int64 id = 4; |
| optional string location = 5; |
| } |
| repeated Region regions = 2; |
| } |
| |
| message TableSchema { |
| optional string name = 1; |
| message Attribute { |
| required string name = 1; |
| required string value = 2; |
| } |
| repeated Attribute attrs = 2; |
| repeated ColumnSchema columns = 3; |
| // optional helpful encodings of commonly used attributes |
| optional bool inMemory = 4; |
| optional bool readOnly = 5; |
| } |
| |
| message ColumnSchema { |
| optional string name = 1; |
| message Attribute { |
| required string name = 1; |
| required string value = 2; |
| } |
| repeated Attribute attrs = 2; |
| // optional helpful encodings of commonly used attributes |
| optional int32 ttl = 3; |
| optional int32 maxVersions = 4; |
| optional string compression = 5; |
| } |
| |
| message Cell { |
| optional bytes row = 1; // unused if Cell is in a CellSet |
| optional bytes column = 2; |
| optional int64 timestamp = 3; |
| optional bytes data = 4; |
| } |
| |
| message CellSet { |
| message Row { |
| required bytes key = 1; |
| repeated Cell values = 2; |
| } |
| repeated Row rows = 1; |
| } |
| |
| message Scanner { |
| optional bytes startRow = 1; |
| optional bytes endRow = 2; |
| repeated bytes columns = 3; |
| optional int32 batch = 4; |
| optional int64 startTime = 5; |
| optional int64 endTime = 6; |
| } |
| ---- |
| |
| == Thrift |
| |
| Documentation about Thrift has moved to <<thrift>>. |
| |
| [[c]] |
| == C/C++ Apache HBase Client |
| |
| FB's Chip Turner wrote a pure C/C++ client. |
| link:https://github.com/facebook/native-cpp-hbase-client[Check it out]. |
| |
| [[jdo]] |
| |
| == Using Java Data Objects (JDO) with HBase |
| |
| link:https://db.apache.org/jdo/[Java Data Objects (JDO)] is a standard way to |
| access persistent data in databases, using plain old Java objects (POJO) to |
| represent persistent data. |
| |
| .Dependencies |
| This code example has the following dependencies: |
| |
| . HBase 0.90.x or newer |
| . commons-beanutils.jar (http://commons.apache.org/) |
| . commons-pool-1.5.5.jar (http://commons.apache.org/) |
| . transactional-tableindexed for HBase 0.90 (https://github.com/hbase-trx/hbase-transactional-tableindexed) |
| |
| .Download `hbase-jdo` |
| Download the code from http://code.google.com/p/hbase-jdo/. |
| |
| .JDO Example |
| ==== |
| |
| This example uses JDO to create a table and an index, insert a row into a table, get |
| a row, get a column value, perform a query, and do some additional HBase operations. |
| |
| [source, java] |
| ---- |
| package com.apache.hadoop.hbase.client.jdo.examples; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.InputStream; |
| import java.util.Hashtable; |
| |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hbase.client.tableindexed.IndexedTable; |
| |
| import com.apache.hadoop.hbase.client.jdo.AbstractHBaseDBO; |
| import com.apache.hadoop.hbase.client.jdo.HBaseBigFile; |
| import com.apache.hadoop.hbase.client.jdo.HBaseDBOImpl; |
| import com.apache.hadoop.hbase.client.jdo.query.DeleteQuery; |
| import com.apache.hadoop.hbase.client.jdo.query.HBaseOrder; |
| import com.apache.hadoop.hbase.client.jdo.query.HBaseParam; |
| import com.apache.hadoop.hbase.client.jdo.query.InsertQuery; |
| import com.apache.hadoop.hbase.client.jdo.query.QSearch; |
| import com.apache.hadoop.hbase.client.jdo.query.SelectQuery; |
| import com.apache.hadoop.hbase.client.jdo.query.UpdateQuery; |
| |
| /** |
| * Hbase JDO Example. |
| * |
| * dependency library. |
| * - commons-beanutils.jar |
| * - commons-pool-1.5.5.jar |
| * - hbase0.90.0-transactionl.jar |
| * |
| * you can expand Delete,Select,Update,Insert Query classes. |
| * |
| */ |
| public class HBaseExample { |
| public static void main(String[] args) throws Exception { |
| AbstractHBaseDBO dbo = new HBaseDBOImpl(); |
| |
| //*drop if table is already exist.* |
| if(dbo.isTableExist("user")){ |
| dbo.deleteTable("user"); |
| } |
| |
| //*create table* |
| dbo.createTableIfNotExist("user",HBaseOrder.DESC,"account"); |
| //dbo.createTableIfNotExist("user",HBaseOrder.ASC,"account"); |
| |
| //create index. |
| String[] cols={"id","name"}; |
| dbo.addIndexExistingTable("user","account",cols); |
| |
| //insert |
| InsertQuery insert = dbo.createInsertQuery("user"); |
| UserBean bean = new UserBean(); |
| bean.setFamily("account"); |
| bean.setAge(20); |
| bean.setEmail("ncanis@gmail.com"); |
| bean.setId("ncanis"); |
| bean.setName("ncanis"); |
| bean.setPassword("1111"); |
| insert.insert(bean); |
| |
| //select 1 row |
| SelectQuery select = dbo.createSelectQuery("user"); |
| UserBean resultBean = (UserBean)select.select(bean.getRow(),UserBean.class); |
| |
| // select column value. |
| String value = (String)select.selectColumn(bean.getRow(),"account","id",String.class); |
| |
| // search with option (QSearch has EQUAL, NOT_EQUAL, LIKE) |
| // select id,password,name,email from account where id='ncanis' limit startRow,20 |
| HBaseParam param = new HBaseParam(); |
| param.setPage(bean.getRow(),20); |
| param.addColumn("id","password","name","email"); |
| param.addSearchOption("id","ncanis",QSearch.EQUAL); |
| select.search("account", param, UserBean.class); |
| |
| // search column value is existing. |
| boolean isExist = select.existColumnValue("account","id","ncanis".getBytes()); |
| |
| // update password. |
| UpdateQuery update = dbo.createUpdateQuery("user"); |
| Hashtable<String, byte[]> colsTable = new Hashtable<String, byte[]>(); |
| colsTable.put("password","2222".getBytes()); |
| update.update(bean.getRow(),"account",colsTable); |
| |
| //delete |
| DeleteQuery delete = dbo.createDeleteQuery("user"); |
| delete.deleteRow(resultBean.getRow()); |
| |
| //////////////////////////////////// |
| // etc |
| |
| // HTable pool with apache commons pool |
| // borrow and release. HBasePoolManager(maxActive, minIdle etc..) |
| IndexedTable table = dbo.getPool().borrow("user"); |
| dbo.getPool().release(table); |
| |
| // upload bigFile by hadoop directly. |
| HBaseBigFile bigFile = new HBaseBigFile(); |
| File file = new File("doc/movie.avi"); |
| FileInputStream fis = new FileInputStream(file); |
| Path rootPath = new Path("/files/"); |
| String filename = "movie.avi"; |
| bigFile.uploadFile(rootPath,filename,fis,true); |
| |
| // receive file stream from hadoop. |
| Path p = new Path(rootPath,filename); |
| InputStream is = bigFile.path2Stream(p,4096); |
| |
| } |
| } |
| ---- |
| ==== |
| |
| [[scala]] |
| == Scala |
| |
| === Setting the Classpath |
| |
| To use Scala with HBase, your CLASSPATH must include HBase's classpath as well as |
| the Scala JARs required by your code. First, use the following command on a server |
| running the HBase RegionServer process, to get HBase's classpath. |
| |
| [source, bash] |
| ---- |
| $ ps aux |grep regionserver| awk -F 'java.library.path=' {'print $2'} | awk {'print $1'} |
| |
| /usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64 |
| ---- |
| |
| Set the `$CLASSPATH` environment variable to include the path you found in the previous |
| step, plus the path of `scala-library.jar` and each additional Scala-related JAR needed for |
| your project. |
| |
| [source, bash] |
| ---- |
| $ export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64:/path/to/scala-library.jar |
| ---- |
| |
| === Scala SBT File |
| |
| Your `build.sbt` file needs the following `resolvers` and `libraryDependencies` to work |
| with HBase. |
| |
| ---- |
| resolvers += "Apache HBase" at "https://repository.apache.org/content/repositories/releases" |
| |
| resolvers += "Thrift" at "http://people.apache.org/~rawson/repo/" |
| |
| libraryDependencies ++= Seq( |
| "org.apache.hadoop" % "hadoop-core" % "0.20.2", |
| "org.apache.hbase" % "hbase" % "0.90.4" |
| ) |
| ---- |
| |
| === Example Scala Code |
| |
| This example lists HBase tables, creates a new table, and adds a row to it. |
| |
| [source, scala] |
| ---- |
| import org.apache.hadoop.hbase.HBaseConfiguration |
| import org.apache.hadoop.hbase.client.{Connection,ConnectionFactory,HBaseAdmin,HTable,Put,Get} |
| import org.apache.hadoop.hbase.util.Bytes |
| |
| |
| val conf = new HBaseConfiguration() |
| val connection = ConnectionFactory.createConnection(conf); |
| val admin = connection.getAdmin(); |
| |
| // list the tables |
| val listtables=admin.listTables() |
| listtables.foreach(println) |
| |
| // let's insert some data in 'mytable' and get the row |
| |
| val table = new HTable(conf, "mytable") |
| |
| val theput= new Put(Bytes.toBytes("rowkey1")) |
| |
| theput.add(Bytes.toBytes("ids"),Bytes.toBytes("id1"),Bytes.toBytes("one")) |
| table.put(theput) |
| |
| val theget= new Get(Bytes.toBytes("rowkey1")) |
| val result=table.get(theget) |
| val value=result.value() |
| println(Bytes.toString(value)) |
| ---- |
| |
| [[jython]] |
| == Jython |
| |
| |
| === Setting the Classpath |
| |
| To use Jython with HBase, your CLASSPATH must include HBase's classpath as well as |
| the Jython JARs required by your code. First, use the following command on a server |
| running the HBase RegionServer process, to get HBase's classpath. |
| |
| [source, bash] |
| ---- |
| $ ps aux |grep regionserver| awk -F 'java.library.path=' {'print $2'} | awk {'print $1'} |
| |
| /usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64 |
| ---- |
| |
| Set the `$CLASSPATH` environment variable to include the path you found in the previous |
| step, plus the path to `jython.jar` and each additional Jython-related JAR needed for |
| your project. |
| |
| [source, bash] |
| ---- |
| $ export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64:/path/to/jython.jar |
| ---- |
| |
| Start a Jython shell with HBase and Hadoop JARs in the classpath: |
| $ bin/hbase org.python.util.jython |
| |
| === Jython Code Examples |
| |
| .Table Creation, Population, Get, and Delete with Jython |
| ==== |
| The following Jython code example creates a table, populates it with data, fetches |
| the data, and deletes the table. |
| |
| [source,jython] |
| ---- |
| import java.lang |
| from org.apache.hadoop.hbase import HBaseConfiguration, HTableDescriptor, HColumnDescriptor, HConstants, TableName |
| from org.apache.hadoop.hbase.client import HBaseAdmin, HTable, Get |
| from org.apache.hadoop.hbase.io import Cell, RowResult |
| |
| # First get a conf object. This will read in the configuration |
| # that is out in your hbase-*.xml files such as location of the |
| # hbase master node. |
| conf = HBaseConfiguration() |
| |
| # Create a table named 'test' that has two column families, |
| # one named 'content, and the other 'anchor'. The colons |
| # are required for column family names. |
| tablename = TableName.valueOf("test") |
| |
| desc = HTableDescriptor(tablename) |
| desc.addFamily(HColumnDescriptor("content:")) |
| desc.addFamily(HColumnDescriptor("anchor:")) |
| admin = HBaseAdmin(conf) |
| |
| # Drop and recreate if it exists |
| if admin.tableExists(tablename): |
| admin.disableTable(tablename) |
| admin.deleteTable(tablename) |
| admin.createTable(desc) |
| |
| tables = admin.listTables() |
| table = HTable(conf, tablename) |
| |
| # Add content to 'column:' on a row named 'row_x' |
| row = 'row_x' |
| update = Get(row) |
| update.put('content:', 'some content') |
| table.commit(update) |
| |
| # Now fetch the content just added, returns a byte[] |
| data_row = table.get(row, "content:") |
| data = java.lang.String(data_row.value, "UTF8") |
| |
| print "The fetched row contains the value '%s'" % data |
| |
| # Delete the table. |
| admin.disableTable(desc.getName()) |
| admin.deleteTable(desc.getName()) |
| ---- |
| ==== |
| |
| .Table Scan Using Jython |
| ==== |
| This example scans a table and returns the results that match a given family qualifier. |
| |
| [source, jython] |
| ---- |
| # Print all rows that are members of a particular column family |
| # by passing a regex for family qualifier |
| |
| import java.lang |
| |
| from org.apache.hadoop.hbase import HBaseConfiguration |
| from org.apache.hadoop.hbase.client import HTable |
| |
| conf = HBaseConfiguration() |
| |
| table = HTable(conf, "wiki") |
| col = "title:.*$" |
| |
| scanner = table.getScanner([col], "") |
| while 1: |
| result = scanner.next() |
| if not result: |
| break |
| print java.lang.String(result.row), java.lang.String(result.get('title:').value) |
| ---- |
| ==== |