storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java - hcatalog - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.hcatalog.hbase;

 import java.io.IOException;
 import java.util.List;

 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapred.TableSplit;
 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
 import org.apache.hadoop.mapred.HCatMapRedUtil;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hcatalog.common.HCatConstants;
 import org.apache.hcatalog.common.HCatUtil;
 import org.apache.hcatalog.mapreduce.InputJobInfo;

 /**
  * This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase.
  */
 class HBaseInputFormat implements InputFormat<ImmutableBytesWritable, Result> {

     private final TableInputFormat inputFormat;

     public HBaseInputFormat() {
         inputFormat = new TableInputFormat();
     }

     /*
      * @param instance of InputSplit
      *
      * @param instance of TaskAttemptContext
      *
      * @return RecordReader
      *
      * @throws IOException
      *
      * @throws InterruptedException
      *
      * @see
      * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache
      * .hadoop.mapreduce.InputSplit,
      * org.apache.hadoop.mapreduce.TaskAttemptContext)
      */
     @Override
     public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
         InputSplit split, JobConf job, Reporter reporter)
         throws IOException {
         String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO);
         InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString);

         String tableName = job.get(TableInputFormat.INPUT_TABLE);
         TableSplit tSplit = (TableSplit) split;
         HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job);
         inputFormat.setConf(job);
         Scan inputScan = inputFormat.getScan();
         // TODO: Make the caching configurable by the user
         inputScan.setCaching(200);
         inputScan.setCacheBlocks(false);
         Scan sc = new Scan(inputScan);
         sc.setStartRow(tSplit.getStartRow());
         sc.setStopRow(tSplit.getEndRow());
         recordReader.setScan(sc);
         recordReader.setHTable(new HTable(job, tableName));
         recordReader.init();
         return recordReader;
     }

     /*
      * @param jobContext
      *
      * @return List of InputSplit
      *
      * @throws IOException
      *
      * @throws InterruptedException
      *
      * @see
      * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce
      * .JobContext)
      */
     @Override
     public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits)
         throws IOException {
         inputFormat.setConf(job);
         return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null,
             Reporter.NULL)));
     }

     private InputSplit[] convertSplits(List<org.apache.hadoop.mapreduce.InputSplit> splits) {
         InputSplit[] converted = new InputSplit[splits.size()];
         for (int i = 0; i < splits.size(); i++) {
             org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit =
                 (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i);
             TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(),
                 tableSplit.getStartRow(),
                 tableSplit.getEndRow(), tableSplit.getRegionLocation());
             converted[i] = newTableSplit;
         }
         return converted;
     }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.hcatalog.hbase;

	import java.io.IOException;
	import java.util.List;

	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Result;
	import org.apache.hadoop.hbase.client.Scan;
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
	import org.apache.hadoop.hbase.mapred.TableSplit;
	import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
	import org.apache.hadoop.mapred.HCatMapRedUtil;
	import org.apache.hadoop.mapred.InputFormat;
	import org.apache.hadoop.mapred.InputSplit;
	import org.apache.hadoop.mapred.JobConf;
	import org.apache.hadoop.mapred.RecordReader;
	import org.apache.hadoop.mapred.Reporter;
	import org.apache.hcatalog.common.HCatConstants;
	import org.apache.hcatalog.common.HCatUtil;
	import org.apache.hcatalog.mapreduce.InputJobInfo;

	/**
	* This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase.
	*/
	class HBaseInputFormat implements InputFormat<ImmutableBytesWritable, Result> {

	private final TableInputFormat inputFormat;

	public HBaseInputFormat() {
	inputFormat = new TableInputFormat();
	}

	/*
	* @param instance of InputSplit
	*
	* @param instance of TaskAttemptContext
	*
	* @return RecordReader
	*
	* @throws IOException
	*
	* @throws InterruptedException
	*
	* @see
	* org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache
	* .hadoop.mapreduce.InputSplit,
	* org.apache.hadoop.mapreduce.TaskAttemptContext)
	*/
	@Override
	public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
	InputSplit split, JobConf job, Reporter reporter)
	throws IOException {
	String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO);
	InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString);

	String tableName = job.get(TableInputFormat.INPUT_TABLE);
	TableSplit tSplit = (TableSplit) split;
	HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job);
	inputFormat.setConf(job);
	Scan inputScan = inputFormat.getScan();
	// TODO: Make the caching configurable by the user
	inputScan.setCaching(200);
	inputScan.setCacheBlocks(false);
	Scan sc = new Scan(inputScan);
	sc.setStartRow(tSplit.getStartRow());
	sc.setStopRow(tSplit.getEndRow());
	recordReader.setScan(sc);
	recordReader.setHTable(new HTable(job, tableName));
	recordReader.init();
	return recordReader;
	}

	/*
	* @param jobContext
	*
	* @return List of InputSplit
	*
	* @throws IOException
	*
	* @throws InterruptedException
	*
	* @see
	* org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce
	* .JobContext)
	*/
	@Override
	public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits)
	throws IOException {
	inputFormat.setConf(job);
	return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null,
	Reporter.NULL)));
	}

	private InputSplit[] convertSplits(List<org.apache.hadoop.mapreduce.InputSplit> splits) {
	InputSplit[] converted = new InputSplit[splits.size()];
	for (int i = 0; i < splits.size(); i++) {
	org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit =
	(org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i);
	TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(),
	tableSplit.getStartRow(),
	tableSplit.getEndRow(), tableSplit.getRegionLocation());
	converted[i] = newTableSplit;
	}
	return converted;
	}

	}