0.90/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScan.java - hbase - Git at Google

 /**
  * Copyright 2007 The Apache Software Foundation
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.mapreduce;

 import java.io.File;
 import java.io.IOException;
 import java.util.Map;
 import java.util.NavigableMap;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;

 /**
  * Tests various scan start and stop row scenarios. This is set in a scan and
  * tested in a MapReduce job to see if that is handed over and done properly
  * too.
  */
 public class TestTableInputFormatScan {

   static final Log LOG = LogFactory.getLog(TestTableInputFormatScan.class);
   static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

   static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
   static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
   static final String KEY_STARTROW = "startRow";
   static final String KEY_LASTROW = "stpRow";

   private static HTable table = null;

   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     // switch TIF to log at DEBUG level
     TEST_UTIL.enableDebug(TableInputFormat.class);
     TEST_UTIL.enableDebug(TableInputFormatBase.class);
     // start mini hbase cluster
     TEST_UTIL.startMiniCluster(3);
     // create and fill table
     table = TEST_UTIL.createTable(TABLE_NAME, INPUT_FAMILY);
     TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
     TEST_UTIL.loadTable(table, INPUT_FAMILY);
     // start MR cluster
     TEST_UTIL.startMiniMapReduceCluster();
   }

   @AfterClass
   public static void tearDownAfterClass() throws Exception {
     TEST_UTIL.shutdownMiniMapReduceCluster();
     TEST_UTIL.shutdownMiniCluster();
   }

   @Before
   public void setUp() throws Exception {
     // nothing
   }

   /**
    * @throws java.lang.Exception
    */
   @After
   public void tearDown() throws Exception {
     Configuration c = TEST_UTIL.getConfiguration();
     FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
   }

   /**
    * Pass the key and value to reduce.
    */
   public static class ScanMapper
   extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {

     /**
      * Pass the key and value to reduce.
      *
      * @param key  The key, here "aaa", "aab" etc.
      * @param value  The value is the same as the key.
      * @param context  The task context.
      * @throws IOException When reading the rows fails.
      */
     @Override
     public void map(ImmutableBytesWritable key, Result value,
       Context context)
     throws IOException, InterruptedException {
       if (value.size() != 1) {
         throw new IOException("There should only be one input column");
       }
       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
         cf = value.getMap();
       if(!cf.containsKey(INPUT_FAMILY)) {
         throw new IOException("Wrong input columns. Missing: '" +
           Bytes.toString(INPUT_FAMILY) + "'.");
       }
       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
       LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
         ", value -> " + val);
       context.write(key, key);
     }

   }

   /**
    * Checks the last and first key seen against the scanner boundaries.
    */
   public static class ScanReducer
   extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
                   NullWritable, NullWritable> {

     private String first = null;
     private String last = null;

     protected void reduce(ImmutableBytesWritable key,
         Iterable<ImmutableBytesWritable> values, Context context)
     throws IOException ,InterruptedException {
       int count = 0;
       for (ImmutableBytesWritable value : values) {
         String val = Bytes.toStringBinary(value.get());
         LOG.info("reduce: key[" + count + "] -> " +
           Bytes.toStringBinary(key.get()) + ", value -> " + val);
         if (first == null) first = val;
         last = val;
         count++;
       }
     }

     protected void cleanup(Context context)
     throws IOException, InterruptedException {
       Configuration c = context.getConfiguration();
       String startRow = c.get(KEY_STARTROW);
       String lastRow = c.get(KEY_LASTROW);
       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
       if (startRow != null && startRow.length() > 0) {
         assertEquals(startRow, first);
       }
       if (lastRow != null && lastRow.length() > 0) {
         assertEquals(lastRow, last);
       }
     }

   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanEmptyToEmpty()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan(null, null, null);
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanEmptyToAPP()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan(null, "app", "apo");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanEmptyToBBA()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan(null, "bba", "baz");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanEmptyToBBB()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan(null, "bbb", "bba");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanEmptyToOPP()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan(null, "opp", "opo");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanOBBToOPP()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan("obb", "opp", "opo");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanOBBToQPP()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan("obb", "qpp", "qpo");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanOPPToEmpty()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan("opp", null, "zzz");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanYYXToEmpty()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan("yyx", null, "zzz");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanYYYToEmpty()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan("yyy", null, "zzz");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   @Test
   public void testScanYZYToEmpty()
   throws IOException, InterruptedException, ClassNotFoundException {
     testScan("yzy", null, "zzz");
   }

   /**
    * Tests a MR scan using specific start and stop rows.
    *
    * @throws IOException
    * @throws ClassNotFoundException
    * @throws InterruptedException
    */
   private void testScan(String start, String stop, String last)
   throws IOException, InterruptedException, ClassNotFoundException {
     String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
     "To" + (stop != null ? stop.toUpperCase() : "Empty");
     LOG.info("Before map/reduce startup - job " + jobName);
     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
     Scan scan = new Scan();
     scan.addFamily(INPUT_FAMILY);
     if (start != null) {
       scan.setStartRow(Bytes.toBytes(start));
     }
     c.set(KEY_STARTROW, start != null ? start : "");
     if (stop != null) {
       scan.setStopRow(Bytes.toBytes(stop));
     }
     c.set(KEY_LASTROW, last != null ? last : "");
     LOG.info("scan before: " + scan);
     Job job = new Job(c, jobName);
     TableMapReduceUtil.initTableMapperJob(
       Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
       ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
     job.setReducerClass(ScanReducer.class);
     job.setNumReduceTasks(1); // one to get final "first" and "last" key
     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
     LOG.info("Started " + job.getJobName());
     job.waitForCompletion(true);
     assertTrue(job.isComplete());
     LOG.info("After map/reduce completion - job " + jobName);
   }
 }
	/**
	* Copyright 2007 The Apache Software Foundation
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.hbase.mapreduce;

	import java.io.File;
	import java.io.IOException;
	import java.util.Map;
	import java.util.NavigableMap;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileUtil;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.hbase.HBaseTestingUtility;
	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Result;
	import org.apache.hadoop.hbase.client.Scan;
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
	import org.apache.hadoop.hbase.util.Bytes;
	import org.apache.hadoop.io.NullWritable;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.junit.After;
	import org.junit.AfterClass;
	import org.junit.Before;
	import org.junit.BeforeClass;
	import org.junit.Test;

	import static org.junit.Assert.assertEquals;
	import static org.junit.Assert.assertTrue;

	/**
	* Tests various scan start and stop row scenarios. This is set in a scan and
	* tested in a MapReduce job to see if that is handed over and done properly
	* too.
	*/
	public class TestTableInputFormatScan {

	static final Log LOG = LogFactory.getLog(TestTableInputFormatScan.class);
	static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

	static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
	static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
	static final String KEY_STARTROW = "startRow";
	static final String KEY_LASTROW = "stpRow";

	private static HTable table = null;

	@BeforeClass
	public static void setUpBeforeClass() throws Exception {
	// switch TIF to log at DEBUG level
	TEST_UTIL.enableDebug(TableInputFormat.class);
	TEST_UTIL.enableDebug(TableInputFormatBase.class);
	// start mini hbase cluster
	TEST_UTIL.startMiniCluster(3);
	// create and fill table
	table = TEST_UTIL.createTable(TABLE_NAME, INPUT_FAMILY);
	TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
	TEST_UTIL.loadTable(table, INPUT_FAMILY);
	// start MR cluster
	TEST_UTIL.startMiniMapReduceCluster();
	}

	@AfterClass
	public static void tearDownAfterClass() throws Exception {
	TEST_UTIL.shutdownMiniMapReduceCluster();
	TEST_UTIL.shutdownMiniCluster();
	}

	@Before
	public void setUp() throws Exception {
	// nothing
	}

	/**
	* @throws java.lang.Exception
	*/
	@After
	public void tearDown() throws Exception {
	Configuration c = TEST_UTIL.getConfiguration();
	FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
	}

	/**
	* Pass the key and value to reduce.
	*/
	public static class ScanMapper
	extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {

	/**
	* Pass the key and value to reduce.
	*
	* @param key The key, here "aaa", "aab" etc.
	* @param value The value is the same as the key.
	* @param context The task context.
	* @throws IOException When reading the rows fails.
	*/
	@Override
	public void map(ImmutableBytesWritable key, Result value,
	Context context)
	throws IOException, InterruptedException {
	if (value.size() != 1) {
	throw new IOException("There should only be one input column");
	}
	Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
	cf = value.getMap();
	if(!cf.containsKey(INPUT_FAMILY)) {
	throw new IOException("Wrong input columns. Missing: '" +
	Bytes.toString(INPUT_FAMILY) + "'.");
	}
	String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
	LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
	", value -> " + val);
	context.write(key, key);
	}

	}

	/**
	* Checks the last and first key seen against the scanner boundaries.
	*/
	public static class ScanReducer
	extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
	NullWritable, NullWritable> {

	private String first = null;
	private String last = null;

	protected void reduce(ImmutableBytesWritable key,
	Iterable<ImmutableBytesWritable> values, Context context)
	throws IOException ,InterruptedException {
	int count = 0;
	for (ImmutableBytesWritable value : values) {
	String val = Bytes.toStringBinary(value.get());
	LOG.info("reduce: key[" + count + "] -> " +
	Bytes.toStringBinary(key.get()) + ", value -> " + val);
	if (first == null) first = val;
	last = val;
	count++;
	}
	}

	protected void cleanup(Context context)
	throws IOException, InterruptedException {
	Configuration c = context.getConfiguration();
	String startRow = c.get(KEY_STARTROW);
	String lastRow = c.get(KEY_LASTROW);
	LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
	LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
	if (startRow != null && startRow.length() > 0) {
	assertEquals(startRow, first);
	}
	if (lastRow != null && lastRow.length() > 0) {
	assertEquals(lastRow, last);
	}
	}

	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanEmptyToEmpty()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan(null, null, null);
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanEmptyToAPP()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan(null, "app", "apo");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanEmptyToBBA()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan(null, "bba", "baz");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanEmptyToBBB()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan(null, "bbb", "bba");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanEmptyToOPP()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan(null, "opp", "opo");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanOBBToOPP()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan("obb", "opp", "opo");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanOBBToQPP()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan("obb", "qpp", "qpo");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanOPPToEmpty()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan("opp", null, "zzz");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanYYXToEmpty()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan("yyx", null, "zzz");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanYYYToEmpty()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan("yyy", null, "zzz");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	@Test
	public void testScanYZYToEmpty()
	throws IOException, InterruptedException, ClassNotFoundException {
	testScan("yzy", null, "zzz");
	}

	/**
	* Tests a MR scan using specific start and stop rows.
	*
	* @throws IOException
	* @throws ClassNotFoundException
	* @throws InterruptedException
	*/
	private void testScan(String start, String stop, String last)
	throws IOException, InterruptedException, ClassNotFoundException {
	String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
	"To" + (stop != null ? stop.toUpperCase() : "Empty");
	LOG.info("Before map/reduce startup - job " + jobName);
	Configuration c = new Configuration(TEST_UTIL.getConfiguration());
	Scan scan = new Scan();
	scan.addFamily(INPUT_FAMILY);
	if (start != null) {
	scan.setStartRow(Bytes.toBytes(start));
	}
	c.set(KEY_STARTROW, start != null ? start : "");
	if (stop != null) {
	scan.setStopRow(Bytes.toBytes(stop));
	}
	c.set(KEY_LASTROW, last != null ? last : "");
	LOG.info("scan before: " + scan);
	Job job = new Job(c, jobName);
	TableMapReduceUtil.initTableMapperJob(
	Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
	ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
	job.setReducerClass(ScanReducer.class);
	job.setNumReduceTasks(1); // one to get final "first" and "last" key
	FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
	LOG.info("Started " + job.getJobName());
	job.waitForCompletion(true);
	assertTrue(job.isComplete());
	LOG.info("After map/reduce completion - job " + jobName);
	}
	}