hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java - hbase - Git at Google

 /**
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.mapreduce;

 import java.io.IOException;
 import java.util.List;
 import java.util.Map;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 /**
  * Writes HFiles. Passed KeyValues must arrive in order.
  * Writes current time as the sequence id for the file. Sets the major compacted
  * attribute on created hfiles. Calling write(null,null) will forcibly roll
  * all HFiles being written.
  * <p>
  * Using this class as part of a MapReduce job is best done
  * using {@link #configureIncrementalLoad(Job, HTable)}.
  * @see KeyValueSortReducer
  * @deprecated use {@link HFileOutputFormat2} instead.
  */
 @Deprecated
 @InterfaceAudience.Public
 @InterfaceStability.Stable
 public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
   private static final Log LOG = LogFactory.getLog(HFileOutputFormat.class);

   // This constant is public since the client can modify this when setting
   // up their conf object and thus refer to this symbol.
   // It is present for backwards compatibility reasons. Use it only to
   // override the auto-detection of datablock encoding.
   public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
     HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;

   @Override
   public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
       final TaskAttemptContext context) throws IOException, InterruptedException {
     return HFileOutputFormat2.createRecordWriter(context, this.getOutputCommitter(context));
   }

   /**
    * Configure a MapReduce Job to perform an incremental load into the given
    * table. This
    * <ul>
    *   <li>Inspects the table to configure a total order partitioner</li>
    *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
    *   <li>Sets the number of reduce tasks to match the current number of regions</li>
    *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
    *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
    *     PutSortReducer)</li>
    * </ul>
    * The user should be sure to set the map output value class to either KeyValue or Put before
    * running this function.
    */
   public static void configureIncrementalLoad(Job job, HTable table)
       throws IOException {
     HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),
         table.getRegionLocator());
   }

   /**
    * Runs inside the task to deserialize column family to compression algorithm
    * map from the configuration.
    *
    * @param conf to read the serialized values from
    * @return a map from column family to the configured compression algorithm
    */
   @InterfaceAudience.Private
   static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
       conf) {
     return HFileOutputFormat2.createFamilyCompressionMap(conf);
   }

   /**
    * Runs inside the task to deserialize column family to bloom filter type
    * map from the configuration.
    *
    * @param conf to read the serialized values from
    * @return a map from column family to the the configured bloom filter type
    */
   @InterfaceAudience.Private
   static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
     return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
   }

   /**
    * Runs inside the task to deserialize column family to block size
    * map from the configuration.
    *
    * @param conf to read the serialized values from
    * @return a map from column family to the configured block size
    */
   @InterfaceAudience.Private
   static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
     return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
   }

   /**
    * Runs inside the task to deserialize column family to data block encoding
    * type map from the configuration.
    *
    * @param conf to read the serialized values from
    * @return a map from column family to HFileDataBlockEncoder for the
    *         configured data block type for the family
    */
   @InterfaceAudience.Private
   static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
       Configuration conf) {
     return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
   }

   /**
    * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
    * <code>splitPoints</code>. Cleans up the partitions file after job exists.
    */
   static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
       throws IOException {
     HFileOutputFormat2.configurePartitioner(job, splitPoints);
   }

   static void configureCompression(Table table, Configuration conf) throws IOException {
     HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
   }

   /**
    * Serialize column family to block size map to configuration.
    * Invoked while configuring the MR job for incremental load.
    *
    * @param table to read the properties from
    * @param conf to persist serialized values into
    * @throws IOException
    *           on failure to read column family descriptors
    */
   @InterfaceAudience.Private
   static void configureBlockSize(Table table, Configuration conf) throws IOException {
     HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
   }

   /**
    * Serialize column family to bloom type map to configuration.
    * Invoked while configuring the MR job for incremental load.
    *
    * @param table to read the properties from
    * @param conf to persist serialized values into
    * @throws IOException
    *           on failure to read column family descriptors
    */
   @InterfaceAudience.Private
   static void configureBloomType(Table table, Configuration conf) throws IOException {
     HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
   }

   /**
    * Serialize column family to data block encoding map to configuration.
    * Invoked while configuring the MR job for incremental load.
    *
    * @param table to read the properties from
    * @param conf to persist serialized values into
    * @throws IOException
    *           on failure to read column family descriptors
    */
   @InterfaceAudience.Private
   static void configureDataBlockEncoding(Table table,
       Configuration conf) throws IOException {
     HTableDescriptor tableDescriptor = table.getTableDescriptor();
     HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
   }
 }
	/**
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.hbase.mapreduce;

	import java.io.IOException;
	import java.util.List;
	import java.util.Map;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.hbase.classification.InterfaceAudience;
	import org.apache.hadoop.hbase.classification.InterfaceStability;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HTableDescriptor;
	import org.apache.hadoop.hbase.KeyValue;
	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Table;
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
	import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
	import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
	import org.apache.hadoop.hbase.regionserver.BloomType;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.RecordWriter;
	import org.apache.hadoop.mapreduce.TaskAttemptContext;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

	/**
	* Writes HFiles. Passed KeyValues must arrive in order.
	* Writes current time as the sequence id for the file. Sets the major compacted
	* attribute on created hfiles. Calling write(null,null) will forcibly roll
	* all HFiles being written.
	* <p>
	* Using this class as part of a MapReduce job is best done
	* using {@link #configureIncrementalLoad(Job, HTable)}.
	* @see KeyValueSortReducer
	* @deprecated use {@link HFileOutputFormat2} instead.
	*/
	@Deprecated
	@InterfaceAudience.Public
	@InterfaceStability.Stable
	public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
	private static final Log LOG = LogFactory.getLog(HFileOutputFormat.class);

	// This constant is public since the client can modify this when setting
	// up their conf object and thus refer to this symbol.
	// It is present for backwards compatibility reasons. Use it only to
	// override the auto-detection of datablock encoding.
	public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
	HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;

	@Override
	public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
	final TaskAttemptContext context) throws IOException, InterruptedException {
	return HFileOutputFormat2.createRecordWriter(context, this.getOutputCommitter(context));
	}

	/**
	* Configure a MapReduce Job to perform an incremental load into the given
	* table. This
	* <ul>
	* <li>Inspects the table to configure a total order partitioner</li>
	* <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
	* <li>Sets the number of reduce tasks to match the current number of regions</li>
	* <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
	* <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
	* PutSortReducer)</li>
	* </ul>
	* The user should be sure to set the map output value class to either KeyValue or Put before
	* running this function.
	*/
	public static void configureIncrementalLoad(Job job, HTable table)
	throws IOException {
	HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),
	table.getRegionLocator());
	}

	/**
	* Runs inside the task to deserialize column family to compression algorithm
	* map from the configuration.
	*
	* @param conf to read the serialized values from
	* @return a map from column family to the configured compression algorithm
	*/
	@InterfaceAudience.Private
	static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
	conf) {
	return HFileOutputFormat2.createFamilyCompressionMap(conf);
	}

	/**
	* Runs inside the task to deserialize column family to bloom filter type
	* map from the configuration.
	*
	* @param conf to read the serialized values from
	* @return a map from column family to the the configured bloom filter type
	*/
	@InterfaceAudience.Private
	static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
	return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
	}

	/**
	* Runs inside the task to deserialize column family to block size
	* map from the configuration.
	*
	* @param conf to read the serialized values from
	* @return a map from column family to the configured block size
	*/
	@InterfaceAudience.Private
	static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
	return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
	}

	/**
	* Runs inside the task to deserialize column family to data block encoding
	* type map from the configuration.
	*
	* @param conf to read the serialized values from
	* @return a map from column family to HFileDataBlockEncoder for the
	* configured data block type for the family
	*/
	@InterfaceAudience.Private
	static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
	Configuration conf) {
	return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
	}

	/**
	* Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
	* <code>splitPoints</code>. Cleans up the partitions file after job exists.
	*/
	static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
	throws IOException {
	HFileOutputFormat2.configurePartitioner(job, splitPoints);
	}

	static void configureCompression(Table table, Configuration conf) throws IOException {
	HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
	}

	/**
	* Serialize column family to block size map to configuration.
	* Invoked while configuring the MR job for incremental load.
	*
	* @param table to read the properties from
	* @param conf to persist serialized values into
	* @throws IOException
	* on failure to read column family descriptors
	*/
	@InterfaceAudience.Private
	static void configureBlockSize(Table table, Configuration conf) throws IOException {
	HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
	}

	/**
	* Serialize column family to bloom type map to configuration.
	* Invoked while configuring the MR job for incremental load.
	*
	* @param table to read the properties from
	* @param conf to persist serialized values into
	* @throws IOException
	* on failure to read column family descriptors
	*/
	@InterfaceAudience.Private
	static void configureBloomType(Table table, Configuration conf) throws IOException {
	HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
	}

	/**
	* Serialize column family to data block encoding map to configuration.
	* Invoked while configuring the MR job for incremental load.
	*
	* @param table to read the properties from
	* @param conf to persist serialized values into
	* @throws IOException
	* on failure to read column family descriptors
	*/
	@InterfaceAudience.Private
	static void configureDataBlockEncoding(Table table,
	Configuration conf) throws IOException {
	HTableDescriptor tableDescriptor = table.getTableDescriptor();
	HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
	}
	}