hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java - hbase - Git at Google

 /*
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.hbase.filter;

 import java.io.IOException;
 import java.util.List;

 import org.apache.hadoop.hbase.Cell;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;

 /**
  * Interface for row and column filters directly applied within the regionserver.
  *
  * A filter can expect the following call sequence:
  * <ul>
  *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
  *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
  *   <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
  *   <li> {@link #filterCell(Cell)}: decides whether to include or exclude this Cell.
  *        See {@link ReturnCode}. </li>
  *   <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the
  *        Cell. </li>
  *   <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
  *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
  *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
  * </ul>
  *
  * Filter instances are created one per region/scan.  This abstract class replaces
  * the old RowFilterInterface.
  *
  * When implementing your own filters, consider inheriting {@link FilterBase} to help
  * you reduce boilerplate.
  *
  * @see FilterBase
  */
 @InterfaceAudience.Public
 public abstract class Filter {
   protected transient boolean reversed;
   /**
    * Reset the state of the filter between rows.
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public void reset() throws IOException;

   /**
    * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
    * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below.
    * If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should
    * also return true.
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @param firstRowCell The first cell coming in the new row
    * @return true, remove entire row, false, include the row (maybe).
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;

   /**
    * If this returns true, the scan will terminate.
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @return true to end scan, false to continue.
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public boolean filterAllRemaining() throws IOException;

   /**
    * A way to filter based on the column family, column qualifier and/or the column value. Return
    * code is described below. This allows filters to filter only certain number of columns, then
    * terminate without matching ever column.
    *
    * If filterRowKey returns true, filterCell needs to be consistent with it.
    *
    * filterCell can assume that filterRowKey has already been called for the row.
    *
    * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
    * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
    * for the next row.
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @param c the Cell in question
    * @return code as described below
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    * @see Filter.ReturnCode
    */
   public ReturnCode filterCell(final Cell c) throws IOException {
     return ReturnCode.INCLUDE;
   }

   /**
    * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
    * Cell object must be returned.
    *
    * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
    *      The transformed KeyValue is what is eventually returned to the client. Most filters will
    *      return the passed KeyValue unchanged.
    * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
    *      transformation.
    *
    *      Concrete implementers can signal a failure condition in their code by throwing an
    *      {@link IOException}.
    *
    * @param v the KeyValue in question
    * @return the changed KeyValue
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public Cell transformCell(final Cell v) throws IOException;

   /**
    * Return codes for filterValue().
    */
   @InterfaceAudience.Public
   public enum ReturnCode {
     /**
      * Include the Cell
      */
     INCLUDE,
     /**
      * Include the Cell and seek to the next column skipping older versions.
      */
     INCLUDE_AND_NEXT_COL,
     /**
      * Skip this Cell
      */
     SKIP,
     /**
      * Skip this column. Go to the next column in this row.
      */
     NEXT_COL,
     /**
      * Seek to next row in current family. It may still pass a cell whose family is different but
      * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW
      * returned for previous cell. For more details see HBASE-18368. <br>
      * Once reset() method was invoked, then we switch to the next row for all family, and you can
      * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
      * Note that filterRow() will still be called. <br>
      */
     NEXT_ROW,
     /**
      * Seek to next key which is given as hint by the filter.
      */
     SEEK_NEXT_USING_HINT,
     /**
      * Include KeyValue and done with row, seek to next. See NEXT_ROW.
      */
     INCLUDE_AND_SEEK_NEXT_ROW,
 }

   /**
    * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @param kvs the list of Cells to be filtered
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public void filterRowCells(List<Cell> kvs) throws IOException;

   /**
    * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
    * time).
    *
    * @return True if this filter actively uses filterRowCells(List) or filterRow().
    */
   abstract public boolean hasFilterRow();

   /**
    * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter
    * needs to retain state then return a particular value for this call if they wish to exclude a
    * row if a certain column is missing (for example).
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @return true to exclude row, false to include row.
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public boolean filterRow() throws IOException;

   /**
    * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
    * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
    * QueryMatcher would call this function to find out which key it must next seek to.
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
    *         seek to next.
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;

   /**
    * Check that given column family is essential for filter to check row. Most filters always return
    * true here. But some could have more sophisticated logic which could significantly reduce
    * scanning process by not even touching columns until we are 100% sure that it's data is needed
    * in result.
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public boolean isFamilyEssential(byte[] name) throws IOException;

   /**
    * TODO: JAVADOC
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @return The filter serialized using pb
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract public byte[] toByteArray() throws IOException;

   /**
    *
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @param pbBytes A pb serialized {@link Filter} instance
    * @return An instance of {@link Filter} made from <code>bytes</code>
    * @throws DeserializationException
    * @see #toByteArray
    */
   public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
     throw new DeserializationException(
       "parseFrom called on base Filter, but should be called on derived type");
   }

   /**
    * Concrete implementers can signal a failure condition in their code by throwing an
    * {@link IOException}.
    *
    * @param other
    * @return true if and only if the fields of the filter that are serialized are equal to the
    *         corresponding fields in other. Used for testing.
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract boolean areSerializedFieldsEqual(Filter other);

   /**
    * alter the reversed scan flag
    * @param reversed flag
    */
   public void setReversed(boolean reversed) {
     this.reversed = reversed;
   }

   public boolean isReversed() {
     return this.reversed;
   }
 }
	/*
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.hbase.filter;

	import java.io.IOException;
	import java.util.List;

	import org.apache.hadoop.hbase.Cell;
	import org.apache.yetus.audience.InterfaceAudience;
	import org.apache.hadoop.hbase.exceptions.DeserializationException;

	/**
	* Interface for row and column filters directly applied within the regionserver.
	*
	* A filter can expect the following call sequence:
	* <ul>
	* <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
	* <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
	* <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
	* <li> {@link #filterCell(Cell)}: decides whether to include or exclude this Cell.
	* See {@link ReturnCode}. </li>
	* <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the
	* Cell. </li>
	* <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
	* <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
	* filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
	* </ul>
	*
	* Filter instances are created one per region/scan. This abstract class replaces
	* the old RowFilterInterface.
	*
	* When implementing your own filters, consider inheriting {@link FilterBase} to help
	* you reduce boilerplate.
	*
	* @see FilterBase
	*/
	@InterfaceAudience.Public
	public abstract class Filter {
	protected transient boolean reversed;
	/**
	* Reset the state of the filter between rows.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public void reset() throws IOException;

	/**
	* Filters a row based on the row key. If this returns true, the entire row will be excluded. If
	* false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below.
	* If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should
	* also return true.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @param firstRowCell The first cell coming in the new row
	* @return true, remove entire row, false, include the row (maybe).
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;

	/**
	* If this returns true, the scan will terminate.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @return true to end scan, false to continue.
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public boolean filterAllRemaining() throws IOException;

	/**
	* A way to filter based on the column family, column qualifier and/or the column value. Return
	* code is described below. This allows filters to filter only certain number of columns, then
	* terminate without matching ever column.
	*
	* If filterRowKey returns true, filterCell needs to be consistent with it.
	*
	* filterCell can assume that filterRowKey has already been called for the row.
	*
	* If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
	* <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
	* for the next row.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @param c the Cell in question
	* @return code as described below
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	* @see Filter.ReturnCode
	*/
	public ReturnCode filterCell(final Cell c) throws IOException {
	return ReturnCode.INCLUDE;
	}

	/**
	* Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
	* Cell object must be returned.
	*
	* @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
	* The transformed KeyValue is what is eventually returned to the client. Most filters will
	* return the passed KeyValue unchanged.
	* @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
	* transformation.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @param v the KeyValue in question
	* @return the changed KeyValue
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public Cell transformCell(final Cell v) throws IOException;

	/**
	* Return codes for filterValue().
	*/
	@InterfaceAudience.Public
	public enum ReturnCode {
	/**
	* Include the Cell
	*/
	INCLUDE,
	/**
	* Include the Cell and seek to the next column skipping older versions.
	*/
	INCLUDE_AND_NEXT_COL,
	/**
	* Skip this Cell
	*/
	SKIP,
	/**
	* Skip this column. Go to the next column in this row.
	*/
	NEXT_COL,
	/**
	* Seek to next row in current family. It may still pass a cell whose family is different but
	* row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW
	* returned for previous cell. For more details see HBASE-18368. <br>
	* Once reset() method was invoked, then we switch to the next row for all family, and you can
	* catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
	* Note that filterRow() will still be called. <br>
	*/
	NEXT_ROW,
	/**
	* Seek to next key which is given as hint by the filter.
	*/
	SEEK_NEXT_USING_HINT,
	/**
	* Include KeyValue and done with row, seek to next. See NEXT_ROW.
	*/
	INCLUDE_AND_SEEK_NEXT_ROW,
	}

	/**
	* Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @param kvs the list of Cells to be filtered
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public void filterRowCells(List<Cell> kvs) throws IOException;

	/**
	* Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
	* time).
	*
	* @return True if this filter actively uses filterRowCells(List) or filterRow().
	*/
	abstract public boolean hasFilterRow();

	/**
	* Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter
	* needs to retain state then return a particular value for this call if they wish to exclude a
	* row if a certain column is missing (for example).
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @return true to exclude row, false to include row.
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public boolean filterRow() throws IOException;

	/**
	* If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
	* the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
	* QueryMatcher would call this function to find out which key it must next seek to.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @return KeyValue which must be next seeked. return null if the filter is not sure which key to
	* seek to next.
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;

	/**
	* Check that given column family is essential for filter to check row. Most filters always return
	* true here. But some could have more sophisticated logic which could significantly reduce
	* scanning process by not even touching columns until we are 100% sure that it's data is needed
	* in result.
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public boolean isFamilyEssential(byte[] name) throws IOException;

	/**
	* TODO: JAVADOC
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @return The filter serialized using pb
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract public byte[] toByteArray() throws IOException;

	/**
	*
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @param pbBytes A pb serialized {@link Filter} instance
	* @return An instance of {@link Filter} made from <code>bytes</code>
	* @throws DeserializationException
	* @see #toByteArray
	*/
	public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
	throw new DeserializationException(
	"parseFrom called on base Filter, but should be called on derived type");
	}

	/**
	* Concrete implementers can signal a failure condition in their code by throwing an
	* {@link IOException}.
	*
	* @param other
	* @return true if and only if the fields of the filter that are serialized are equal to the
	* corresponding fields in other. Used for testing.
	* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
	*/
	abstract boolean areSerializedFieldsEqual(Filter other);

	/**
	* alter the reversed scan flag
	* @param reversed flag
	*/
	public void setReversed(boolean reversed) {
	this.reversed = reversed;
	}

	public boolean isReversed() {
	return this.reversed;
	}
	}