blob: 8fba32c34294faae8bef69d8b4fbcfb13e7ccd0b [file] [log] [blame]
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
/**
* Interface for row and column filters directly applied within the regionserver.
*
* A filter can expect the following call sequence:
* <ul>
* <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
* <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
* <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
* <li> {@link #filterCell(Cell)}: decides whether to include or exclude this Cell.
* See {@link ReturnCode}. </li>
* <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the
* Cell. </li>
* <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
* <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
* filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
* </ul>
*
* Filter instances are created one per region/scan. This abstract class replaces
* the old RowFilterInterface.
*
* When implementing your own filters, consider inheriting {@link FilterBase} to help
* you reduce boilerplate.
*
* @see FilterBase
*/
@InterfaceAudience.Public
public abstract class Filter {
protected transient boolean reversed;
/**
* Reset the state of the filter between rows.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public void reset() throws IOException;
/**
* Filters a row based on the row key. If this returns true, the entire row will be excluded. If
* false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below.
* If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should
* also return true.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @param firstRowCell The first cell coming in the new row
* @return true, remove entire row, false, include the row (maybe).
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;
/**
* If this returns true, the scan will terminate.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @return true to end scan, false to continue.
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public boolean filterAllRemaining() throws IOException;
/**
* A way to filter based on the column family, column qualifier and/or the column value. Return
* code is described below. This allows filters to filter only certain number of columns, then
* terminate without matching ever column.
*
* If filterRowKey returns true, filterCell needs to be consistent with it.
*
* filterCell can assume that filterRowKey has already been called for the row.
*
* If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
* <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
* for the next row.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @param c the Cell in question
* @return code as described below
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
* @see Filter.ReturnCode
*/
public ReturnCode filterCell(final Cell c) throws IOException {
return ReturnCode.INCLUDE;
}
/**
* Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
* Cell object must be returned.
*
* @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
* The transformed KeyValue is what is eventually returned to the client. Most filters will
* return the passed KeyValue unchanged.
* @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
* transformation.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @param v the KeyValue in question
* @return the changed KeyValue
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public Cell transformCell(final Cell v) throws IOException;
/**
* Return codes for filterValue().
*/
@InterfaceAudience.Public
public enum ReturnCode {
/**
* Include the Cell
*/
INCLUDE,
/**
* Include the Cell and seek to the next column skipping older versions.
*/
INCLUDE_AND_NEXT_COL,
/**
* Skip this Cell
*/
SKIP,
/**
* Skip this column. Go to the next column in this row.
*/
NEXT_COL,
/**
* Seek to next row in current family. It may still pass a cell whose family is different but
* row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW
* returned for previous cell. For more details see HBASE-18368. <br>
* Once reset() method was invoked, then we switch to the next row for all family, and you can
* catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
* Note that filterRow() will still be called. <br>
*/
NEXT_ROW,
/**
* Seek to next key which is given as hint by the filter.
*/
SEEK_NEXT_USING_HINT,
/**
* Include KeyValue and done with row, seek to next. See NEXT_ROW.
*/
INCLUDE_AND_SEEK_NEXT_ROW,
}
/**
* Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @param kvs the list of Cells to be filtered
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public void filterRowCells(List<Cell> kvs) throws IOException;
/**
* Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
* time).
*
* @return True if this filter actively uses filterRowCells(List) or filterRow().
*/
abstract public boolean hasFilterRow();
/**
* Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter
* needs to retain state then return a particular value for this call if they wish to exclude a
* row if a certain column is missing (for example).
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @return true to exclude row, false to include row.
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public boolean filterRow() throws IOException;
/**
* If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
* the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
* QueryMatcher would call this function to find out which key it must next seek to.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @return KeyValue which must be next seeked. return null if the filter is not sure which key to
* seek to next.
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;
/**
* Check that given column family is essential for filter to check row. Most filters always return
* true here. But some could have more sophisticated logic which could significantly reduce
* scanning process by not even touching columns until we are 100% sure that it's data is needed
* in result.
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public boolean isFamilyEssential(byte[] name) throws IOException;
/**
* TODO: JAVADOC
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @return The filter serialized using pb
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract public byte[] toByteArray() throws IOException;
/**
*
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @param pbBytes A pb serialized {@link Filter} instance
* @return An instance of {@link Filter} made from <code>bytes</code>
* @throws DeserializationException
* @see #toByteArray
*/
public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
throw new DeserializationException(
"parseFrom called on base Filter, but should be called on derived type");
}
/**
* Concrete implementers can signal a failure condition in their code by throwing an
* {@link IOException}.
*
* @param other
* @return true if and only if the fields of the filter that are serialized are equal to the
* corresponding fields in other. Used for testing.
* @throws IOException in case an I/O or an filter specific failure needs to be signaled.
*/
abstract boolean areSerializedFieldsEqual(Filter other);
/**
* alter the reversed scan flag
* @param reversed flag
*/
public void setReversed(boolean reversed) {
this.reversed = reversed;
}
public boolean isReversed() {
return this.reversed;
}
}