0.90/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java - hbase - Git at Google

 /**
  * Copyright 2010 The Apache Software Foundation
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.hbase.regionserver;

 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
 import org.apache.hadoop.hbase.io.TimeRange;
 import org.apache.hadoop.hbase.util.Bytes;

 import java.util.NavigableSet;

 /**
  * A query matcher that is specifically designed for the scan case.
  */
 public class ScanQueryMatcher {
   // Optimization so we can skip lots of compares when we decide to skip
   // to the next row.
   private boolean stickyNextRow;
   private byte[] stopRow;

   protected TimeRange tr;

   protected Filter filter;

   /** Keeps track of deletes */
   protected DeleteTracker deletes;
   protected boolean retainDeletesInOutput;

   /** Keeps track of columns and versions */
   protected ColumnTracker columns;

   /** Key to seek to in memstore and StoreFiles */
   protected KeyValue startKey;

   /** Oldest allowed version stamp for TTL enforcement */
   protected long oldestStamp;

   /** Row comparator for the region this query is for */
   KeyValue.KeyComparator rowComparator;

   /** Row the query is on */
   protected byte [] row;

   /**
    * Constructs a ScanQueryMatcher for a Scan.
    * @param scan
    * @param family
    * @param columns
    * @param ttl
    * @param rowComparator
    */
   public ScanQueryMatcher(Scan scan, byte [] family,
       NavigableSet<byte[]> columns, long ttl,
       KeyValue.KeyComparator rowComparator, int maxVersions,
       boolean retainDeletesInOutput) {
     this.tr = scan.getTimeRange();
     this.oldestStamp = System.currentTimeMillis() - ttl;
     this.rowComparator = rowComparator;
     this.deletes =  new ScanDeleteTracker();
     this.stopRow = scan.getStopRow();
     this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
     this.filter = scan.getFilter();
     this.retainDeletesInOutput = retainDeletesInOutput;

     // Single branch to deal with two types of reads (columns vs all in family)
     if (columns == null || columns.size() == 0) {
       // use a specialized scan for wildcard column tracker.
       this.columns = new ScanWildcardColumnTracker(maxVersions);
     } else {
       // We can share the ExplicitColumnTracker, diff is we reset
       // between rows, not between storefiles.
       this.columns = new ExplicitColumnTracker(columns,maxVersions);
     }
   }
   public ScanQueryMatcher(Scan scan, byte [] family,
       NavigableSet<byte[]> columns, long ttl,
       KeyValue.KeyComparator rowComparator, int maxVersions) {
       /* By default we will not include deletes */
       /* deletes are included explicitly (for minor compaction) */
       this(scan, family, columns, ttl, rowComparator, maxVersions, false);
   }

   /**
    * Determines if the caller should do one of several things:
    * - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
    * - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
    * - include the current KeyValue (MatchCode.INCLUDE)
    * - ignore the current KeyValue (MatchCode.SKIP)
    * - got to the next row (MatchCode.DONE)
    *
    * @param kv KeyValue to check
    * @return The match code instance.
    */
   public MatchCode match(KeyValue kv) {
     if (filter != null && filter.filterAllRemaining()) {
       return MatchCode.DONE_SCAN;
     }

     byte [] bytes = kv.getBuffer();
     int offset = kv.getOffset();
     int initialOffset = offset;

     int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT);
     offset += KeyValue.ROW_OFFSET;

     short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT);
     offset += Bytes.SIZEOF_SHORT;

     int ret = this.rowComparator.compareRows(row, 0, row.length,
         bytes, offset, rowLength);
     if (ret <= -1) {
       return MatchCode.DONE;
     } else if (ret >= 1) {
       // could optimize this, if necessary?
       // Could also be called SEEK_TO_CURRENT_ROW, but this
       // should be rare/never happens.
       return MatchCode.SEEK_NEXT_ROW;
     }

     // optimize case.
     if (this.stickyNextRow)
         return MatchCode.SEEK_NEXT_ROW;

     if (this.columns.done()) {
       stickyNextRow = true;
       return MatchCode.SEEK_NEXT_ROW;
     }

     //Passing rowLength
     offset += rowLength;

     //Skipping family
     byte familyLength = bytes [offset];
     offset += familyLength + 1;

     int qualLength = keyLength + KeyValue.ROW_OFFSET -
       (offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE;

     long timestamp = kv.getTimestamp();
     if (isExpired(timestamp)) {
       // done, the rest of this column will also be expired as well.
       return getNextRowOrNextColumn(bytes, offset, qualLength);
     }

     byte type = kv.getType();
     if (isDelete(type)) {
       if (tr.withinOrAfterTimeRange(timestamp)) {
         this.deletes.add(bytes, offset, qualLength, timestamp, type);
         // Can't early out now, because DelFam come before any other keys
       }
       if (retainDeletesInOutput) {
         return MatchCode.INCLUDE;
       }
       else {
         return MatchCode.SKIP;
       }
     }

     if (!this.deletes.isEmpty() &&
         deletes.isDeleted(bytes, offset, qualLength, timestamp)) {

       // May be able to optimize the SKIP here, if we matched
       // due to a DelFam, we can skip to next row
       // due to a DelCol, we can skip to next col
       // But it requires more info out of isDelete().
       // needful -> million column challenge.
       return MatchCode.SKIP;
     }

     int timestampComparison = tr.compare(timestamp);
     if (timestampComparison >= 1) {
       return MatchCode.SKIP;
     } else if (timestampComparison <= -1) {
       return getNextRowOrNextColumn(bytes, offset, qualLength);
     }

     /**
      * Filters should be checked before checking column trackers. If we do
      * otherwise, as was previously being done, ColumnTracker may increment its
      * counter for even that KV which may be discarded later on by Filter. This
      * would lead to incorrect results in certain cases.
      */
     if (filter != null) {
       ReturnCode filterResponse = filter.filterKeyValue(kv);
       if (filterResponse == ReturnCode.SKIP) {
         return MatchCode.SKIP;
       } else if (filterResponse == ReturnCode.NEXT_COL) {
         return getNextRowOrNextColumn(bytes, offset, qualLength);
       } else if (filterResponse == ReturnCode.NEXT_ROW) {
         stickyNextRow = true;
         return MatchCode.SEEK_NEXT_ROW;
       } else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) {
         return MatchCode.SEEK_NEXT_USING_HINT;
       }
     }

     MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, timestamp);
     /*
      * According to current implementation, colChecker can only be
      * SEEK_NEXT_COL, SEEK_NEXT_ROW, SKIP or INCLUDE. Therefore, always return
      * the MatchCode. If it is SEEK_NEXT_ROW, also set stickyNextRow.
      */
     if (colChecker == MatchCode.SEEK_NEXT_ROW) {
       stickyNextRow = true;
     }
     return colChecker;

   }

   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
       int qualLength) {
     if (columns instanceof ExplicitColumnTracker) {
       //We only come here when we know that columns is an instance of
       //ExplicitColumnTracker so we should never have a cast exception
       ((ExplicitColumnTracker)columns).doneWithColumn(bytes, offset,
           qualLength);
       if (columns.getColumnHint() == null) {
         return MatchCode.SEEK_NEXT_ROW;
       } else {
         return MatchCode.SEEK_NEXT_COL;
       }
     } else {
       return MatchCode.SEEK_NEXT_COL;
     }
   }

   public boolean moreRowsMayExistAfter(KeyValue kv) {
     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
         rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
       // KV >= STOPROW
       // then NO there is nothing left.
       return false;
     } else {
       return true;
     }
   }

   /**
    * Set current row
    * @param row
    */
   public void setRow(byte [] row) {
     this.row = row;
     reset();
   }

   public void reset() {
     this.deletes.reset();
     this.columns.reset();

     stickyNextRow = false;
   }

   // should be in KeyValue.
   protected boolean isDelete(byte type) {
     return (type != KeyValue.Type.Put.getCode());
   }

   protected boolean isExpired(long timestamp) {
     return (timestamp < oldestStamp);
   }

   /**
    *
    * @return the start key
    */
   public KeyValue getStartKey() {
     return this.startKey;
   }

   public KeyValue getNextKeyHint(KeyValue kv) {
     if (filter == null) {
       return null;
     } else {
       return filter.getNextKeyHint(kv);
     }
   }

   public KeyValue getKeyForNextColumn(KeyValue kv) {
     ColumnCount nextColumn = columns.getColumnHint();
     if (nextColumn == null) {
       return KeyValue.createLastOnRow(
           kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
           kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
           kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
     } else {
       return KeyValue.createFirstOnRow(
           kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
           kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
           nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
     }
   }

   public KeyValue getKeyForNextRow(KeyValue kv) {
     return KeyValue.createLastOnRow(
         kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
         null, 0, 0,
         null, 0, 0);
   }

   /**
    * {@link #match} return codes.  These instruct the scanner moving through
    * memstores and StoreFiles what to do with the current KeyValue.
    * <p>
    * Additionally, this contains "early-out" language to tell the scanner to
    * move on to the next File (memstore or Storefile), or to return immediately.
    */
   public static enum MatchCode {
     /**
      * Include KeyValue in the returned result
      */
     INCLUDE,

     /**
      * Do not include KeyValue in the returned result
      */
     SKIP,

     /**
      * Do not include, jump to next StoreFile or memstore (in time order)
      */
     NEXT,

     /**
      * Do not include, return current result
      */
     DONE,

     /**
      * These codes are used by the ScanQueryMatcher
      */

     /**
      * Done with the row, seek there.
      */
     SEEK_NEXT_ROW,
     /**
      * Done with column, seek to next.
      */
     SEEK_NEXT_COL,

     /**
      * Done with scan, thanks to the row filter.
      */
     DONE_SCAN,

     /*
      * Seek to next key which is given as hint.
      */
     SEEK_NEXT_USING_HINT,
   }
 }
	/**
	* Copyright 2010 The Apache Software Foundation
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.hbase.regionserver;

	import org.apache.hadoop.hbase.HConstants;
	import org.apache.hadoop.hbase.KeyValue;
	import org.apache.hadoop.hbase.client.Scan;
	import org.apache.hadoop.hbase.filter.Filter;
	import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
	import org.apache.hadoop.hbase.io.TimeRange;
	import org.apache.hadoop.hbase.util.Bytes;

	import java.util.NavigableSet;

	/**
	* A query matcher that is specifically designed for the scan case.
	*/
	public class ScanQueryMatcher {
	// Optimization so we can skip lots of compares when we decide to skip
	// to the next row.
	private boolean stickyNextRow;
	private byte[] stopRow;

	protected TimeRange tr;

	protected Filter filter;

	/** Keeps track of deletes */
	protected DeleteTracker deletes;
	protected boolean retainDeletesInOutput;

	/** Keeps track of columns and versions */
	protected ColumnTracker columns;

	/** Key to seek to in memstore and StoreFiles */
	protected KeyValue startKey;

	/** Oldest allowed version stamp for TTL enforcement */
	protected long oldestStamp;

	/** Row comparator for the region this query is for */
	KeyValue.KeyComparator rowComparator;

	/** Row the query is on */
	protected byte [] row;

	/**
	* Constructs a ScanQueryMatcher for a Scan.
	* @param scan
	* @param family
	* @param columns
	* @param ttl
	* @param rowComparator
	*/
	public ScanQueryMatcher(Scan scan, byte [] family,
	NavigableSet<byte[]> columns, long ttl,
	KeyValue.KeyComparator rowComparator, int maxVersions,
	boolean retainDeletesInOutput) {
	this.tr = scan.getTimeRange();
	this.oldestStamp = System.currentTimeMillis() - ttl;
	this.rowComparator = rowComparator;
	this.deletes = new ScanDeleteTracker();
	this.stopRow = scan.getStopRow();
	this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
	this.filter = scan.getFilter();
	this.retainDeletesInOutput = retainDeletesInOutput;

	// Single branch to deal with two types of reads (columns vs all in family)
	if (columns == null \|\| columns.size() == 0) {
	// use a specialized scan for wildcard column tracker.
	this.columns = new ScanWildcardColumnTracker(maxVersions);
	} else {
	// We can share the ExplicitColumnTracker, diff is we reset
	// between rows, not between storefiles.
	this.columns = new ExplicitColumnTracker(columns,maxVersions);
	}
	}
	public ScanQueryMatcher(Scan scan, byte [] family,
	NavigableSet<byte[]> columns, long ttl,
	KeyValue.KeyComparator rowComparator, int maxVersions) {
	/* By default we will not include deletes */
	/* deletes are included explicitly (for minor compaction) */
	this(scan, family, columns, ttl, rowComparator, maxVersions, false);
	}

	/**
	* Determines if the caller should do one of several things:
	* - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
	* - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
	* - include the current KeyValue (MatchCode.INCLUDE)
	* - ignore the current KeyValue (MatchCode.SKIP)
	* - got to the next row (MatchCode.DONE)
	*
	* @param kv KeyValue to check
	* @return The match code instance.
	*/
	public MatchCode match(KeyValue kv) {
	if (filter != null && filter.filterAllRemaining()) {
	return MatchCode.DONE_SCAN;
	}

	byte [] bytes = kv.getBuffer();
	int offset = kv.getOffset();
	int initialOffset = offset;

	int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT);
	offset += KeyValue.ROW_OFFSET;

	short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT);
	offset += Bytes.SIZEOF_SHORT;

	int ret = this.rowComparator.compareRows(row, 0, row.length,
	bytes, offset, rowLength);
	if (ret <= -1) {
	return MatchCode.DONE;
	} else if (ret >= 1) {
	// could optimize this, if necessary?
	// Could also be called SEEK_TO_CURRENT_ROW, but this
	// should be rare/never happens.
	return MatchCode.SEEK_NEXT_ROW;
	}

	// optimize case.
	if (this.stickyNextRow)
	return MatchCode.SEEK_NEXT_ROW;

	if (this.columns.done()) {
	stickyNextRow = true;
	return MatchCode.SEEK_NEXT_ROW;
	}

	//Passing rowLength
	offset += rowLength;

	//Skipping family
	byte familyLength = bytes [offset];
	offset += familyLength + 1;

	int qualLength = keyLength + KeyValue.ROW_OFFSET -
	(offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE;

	long timestamp = kv.getTimestamp();
	if (isExpired(timestamp)) {
	// done, the rest of this column will also be expired as well.
	return getNextRowOrNextColumn(bytes, offset, qualLength);
	}

	byte type = kv.getType();
	if (isDelete(type)) {
	if (tr.withinOrAfterTimeRange(timestamp)) {
	this.deletes.add(bytes, offset, qualLength, timestamp, type);
	// Can't early out now, because DelFam come before any other keys
	}
	if (retainDeletesInOutput) {
	return MatchCode.INCLUDE;
	}
	else {
	return MatchCode.SKIP;
	}
	}

	if (!this.deletes.isEmpty() &&
	deletes.isDeleted(bytes, offset, qualLength, timestamp)) {

	// May be able to optimize the SKIP here, if we matched
	// due to a DelFam, we can skip to next row
	// due to a DelCol, we can skip to next col
	// But it requires more info out of isDelete().
	// needful -> million column challenge.
	return MatchCode.SKIP;
	}

	int timestampComparison = tr.compare(timestamp);
	if (timestampComparison >= 1) {
	return MatchCode.SKIP;
	} else if (timestampComparison <= -1) {
	return getNextRowOrNextColumn(bytes, offset, qualLength);
	}

	/**
	* Filters should be checked before checking column trackers. If we do
	* otherwise, as was previously being done, ColumnTracker may increment its
	* counter for even that KV which may be discarded later on by Filter. This
	* would lead to incorrect results in certain cases.
	*/
	if (filter != null) {
	ReturnCode filterResponse = filter.filterKeyValue(kv);
	if (filterResponse == ReturnCode.SKIP) {
	return MatchCode.SKIP;
	} else if (filterResponse == ReturnCode.NEXT_COL) {
	return getNextRowOrNextColumn(bytes, offset, qualLength);
	} else if (filterResponse == ReturnCode.NEXT_ROW) {
	stickyNextRow = true;
	return MatchCode.SEEK_NEXT_ROW;
	} else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) {
	return MatchCode.SEEK_NEXT_USING_HINT;
	}
	}

	MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, timestamp);
	/*
	* According to current implementation, colChecker can only be
	* SEEK_NEXT_COL, SEEK_NEXT_ROW, SKIP or INCLUDE. Therefore, always return
	* the MatchCode. If it is SEEK_NEXT_ROW, also set stickyNextRow.
	*/
	if (colChecker == MatchCode.SEEK_NEXT_ROW) {
	stickyNextRow = true;
	}
	return colChecker;

	}

	public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
	int qualLength) {
	if (columns instanceof ExplicitColumnTracker) {
	//We only come here when we know that columns is an instance of
	//ExplicitColumnTracker so we should never have a cast exception
	((ExplicitColumnTracker)columns).doneWithColumn(bytes, offset,
	qualLength);
	if (columns.getColumnHint() == null) {
	return MatchCode.SEEK_NEXT_ROW;
	} else {
	return MatchCode.SEEK_NEXT_COL;
	}
	} else {
	return MatchCode.SEEK_NEXT_COL;
	}
	}

	public boolean moreRowsMayExistAfter(KeyValue kv) {
	if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
	rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
	kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
	// KV >= STOPROW
	// then NO there is nothing left.
	return false;
	} else {
	return true;
	}
	}

	/**
	* Set current row
	* @param row
	*/
	public void setRow(byte [] row) {
	this.row = row;
	reset();
	}

	public void reset() {
	this.deletes.reset();
	this.columns.reset();

	stickyNextRow = false;
	}

	// should be in KeyValue.
	protected boolean isDelete(byte type) {
	return (type != KeyValue.Type.Put.getCode());
	}

	protected boolean isExpired(long timestamp) {
	return (timestamp < oldestStamp);
	}

	/**
	*
	* @return the start key
	*/
	public KeyValue getStartKey() {
	return this.startKey;
	}

	public KeyValue getNextKeyHint(KeyValue kv) {
	if (filter == null) {
	return null;
	} else {
	return filter.getNextKeyHint(kv);
	}
	}

	public KeyValue getKeyForNextColumn(KeyValue kv) {
	ColumnCount nextColumn = columns.getColumnHint();
	if (nextColumn == null) {
	return KeyValue.createLastOnRow(
	kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
	kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
	kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
	} else {
	return KeyValue.createFirstOnRow(
	kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
	kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
	nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
	}
	}

	public KeyValue getKeyForNextRow(KeyValue kv) {
	return KeyValue.createLastOnRow(
	kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
	null, 0, 0,
	null, 0, 0);
	}

	/**
	* {@link #match} return codes. These instruct the scanner moving through
	* memstores and StoreFiles what to do with the current KeyValue.
	* <p>
	* Additionally, this contains "early-out" language to tell the scanner to
	* move on to the next File (memstore or Storefile), or to return immediately.
	*/
	public static enum MatchCode {
	/**
	* Include KeyValue in the returned result
	*/
	INCLUDE,

	/**
	* Do not include KeyValue in the returned result
	*/
	SKIP,

	/**
	* Do not include, jump to next StoreFile or memstore (in time order)
	*/
	NEXT,

	/**
	* Do not include, return current result
	*/
	DONE,

	/**
	* These codes are used by the ScanQueryMatcher
	*/

	/**
	* Done with the row, seek there.
	*/
	SEEK_NEXT_ROW,
	/**
	* Done with column, seek to next.
	*/
	SEEK_NEXT_COL,

	/**
	* Done with scan, thanks to the row filter.
	*/
	DONE_SCAN,

	/*
	* Seek to next key which is given as hint.
	*/
	SEEK_NEXT_USING_HINT,
	}
	}