| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.phoenix.hbase.index.covered.filter; |
| |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.hadoop.hbase.Cell; |
| import org.apache.hadoop.hbase.CellUtil; |
| import org.apache.hadoop.hbase.HConstants; |
| import org.apache.hadoop.hbase.KeyValue; |
| import org.apache.hadoop.hbase.KeyValue.Type; |
| import org.apache.hadoop.hbase.KeyValueUtil; |
| import org.apache.hadoop.hbase.client.Put; |
| import org.apache.hadoop.hbase.filter.FilterBase; |
| import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr; |
| |
| /** |
| * Only allow the 'latest' timestamp of each family:qualifier pair, ensuring that they aren't |
| * covered by a previous delete. This is similar to some of the work the ScanQueryMatcher does to |
| * ensure correct visibility of keys based on deletes. |
| * <p> |
| * No actual delete {@link KeyValue}s are allowed to pass through this filter - they are always |
| * skipped. |
| * <p> |
| * Note there is a little bit of conceptually odd behavior (though it matches the HBase |
| * specifications) around point deletes ({@link KeyValue} of type {@link Type#Delete}. These deletes |
| * only apply to a single {@link KeyValue} at a single point in time - they essentially completely |
| * 'cover' the existing {@link Put} at that timestamp. However, they don't 'cover' any other |
| * keyvalues at older timestamps. Therefore, if there is a point-delete at ts = 5, and puts at ts = |
| * 4, and ts = 5, we will only allow the put at ts = 4. |
| * <p> |
| * Expects {@link KeyValue}s to arrive in sorted order, with 'Delete' {@link Type} {@link KeyValue}s |
| * ({@link Type#DeleteColumn}, {@link Type#DeleteFamily}, {@link Type#Delete})) before their regular |
| * {@link Type#Put} counterparts. |
| */ |
| public class ApplyAndFilterDeletesFilter extends FilterBase { |
| |
| private boolean done = false; |
| List<ImmutableBytesPtr> families; |
| private final DeleteTracker coveringDelete = new DeleteTracker(); |
| private Hinter currentHint; |
| private DeleteColumnHinter columnHint = new DeleteColumnHinter(); |
| private DeleteFamilyHinter familyHint = new DeleteFamilyHinter(); |
| |
| /** |
| * Setup the filter to only include the given families. This allows us to seek intelligently pass |
| * families we don't care about. |
| * @param families |
| */ |
| public ApplyAndFilterDeletesFilter(Set<ImmutableBytesPtr> families) { |
| this.families = new ArrayList<ImmutableBytesPtr>(families); |
| Collections.sort(this.families); |
| } |
| |
| public DeleteTracker getDeleteTracker() { |
| return coveringDelete; |
| } |
| |
| private ImmutableBytesPtr getNextFamily(ImmutableBytesPtr family) { |
| int index = Collections.binarySearch(families, family); |
| //doesn't match exactly, be we can find the right next match |
| //this is pretty unlikely, but just incase |
| if(index < 0){ |
| //the actual location of the next match |
| index = -index -1; |
| }else{ |
| //its an exact match for a family, so we get the next entry |
| index = index +1; |
| } |
| //now we have the location of the next entry |
| if(index >= families.size()){ |
| return null; |
| } |
| return families.get(index); |
| } |
| |
| @Override |
| public void reset(){ |
| this.coveringDelete.reset(); |
| this.done = false; |
| } |
| |
| |
| @Override |
| public Cell getNextCellHint(Cell peeked){ |
| return currentHint.getHint(KeyValueUtil.ensureKeyValue(peeked)); |
| } |
| |
| @Override |
| public ReturnCode filterKeyValue(Cell next) { |
| // we marked ourselves done, but the END_ROW_KEY didn't manage to seek to the very last key |
| if (this.done) { |
| return ReturnCode.SKIP; |
| } |
| |
| KeyValue nextKV = KeyValueUtil.ensureKeyValue(next); |
| switch (KeyValue.Type.codeToType(next.getTypeByte())) { |
| /* |
| * DeleteFamily will always sort first because those KVs (we assume) don't have qualifiers (or |
| * rather are null). Therefore, we have to keep a hold of all the delete families until we get |
| * to a Put entry that is covered by that delete (in which case, we are done with the family). |
| */ |
| case DeleteFamily: |
| // track the family to delete. If we are updating the delete, that means we have passed all |
| // kvs in the last column, so we can safely ignore the last deleteFamily, and just use this |
| // one. In fact, it means that all the previous deletes can be ignored because the family must |
| // not match anymore. |
| // We could potentially have multiple deleteFamily for the same row and family |
| // (e.g. upsert row+family, delete it, upsert again, delete again), |
| // in which case we keep the first one since its timestamp dominates |
| if (coveringDelete.deleteFamily == null || !CellUtil.matchingFamily(coveringDelete.deleteFamily, nextKV)) { |
| this.coveringDelete.reset(); |
| this.coveringDelete.deleteFamily = nextKV; |
| } |
| return ReturnCode.SKIP; |
| case DeleteColumn: |
| // similar to deleteFamily, all the newer deletes/puts would have been seen at this point, so |
| // we can safely replace the more recent delete column with the more recent one |
| this.coveringDelete.pointDelete = null; |
| this.coveringDelete.deleteColumn = nextKV; |
| return ReturnCode.SKIP; |
| case Delete: |
| // we are just deleting the single column value at this point. |
| // therefore we just skip this entry and go onto the next one. The only caveat is that |
| // we should still cover the next entry if this delete applies to the next entry, so we |
| // have to keep around a reference to the KV to compare against the next valid entry |
| this.coveringDelete.pointDelete = nextKV; |
| return ReturnCode.SKIP; |
| default: |
| // no covering deletes |
| if (coveringDelete.empty()) { |
| return ReturnCode.INCLUDE; |
| } |
| |
| if (coveringDelete.matchesFamily(nextKV)) { |
| this.currentHint = familyHint; |
| return ReturnCode.SEEK_NEXT_USING_HINT; |
| } |
| |
| if (coveringDelete.matchesColumn(nextKV)) { |
| // hint to the next column |
| this.currentHint = columnHint; |
| return ReturnCode.SEEK_NEXT_USING_HINT; |
| } |
| |
| if (coveringDelete.matchesPoint(nextKV)) { |
| return ReturnCode.SKIP; |
| } |
| |
| } |
| |
| // none of the deletes matches, we are done |
| return ReturnCode.INCLUDE; |
| } |
| |
| /** |
| * Get the next hint for a given peeked keyvalue |
| */ |
| interface Hinter { |
| public abstract KeyValue getHint(KeyValue peek); |
| } |
| |
| /** |
| * Entire family has been deleted, so either seek to the next family, or if none are present in |
| * the original set of families to include, seek to the "last possible key"(or rather our best |
| * guess) and be done. |
| */ |
| class DeleteFamilyHinter implements Hinter { |
| |
| @Override |
| public KeyValue getHint(KeyValue peeked) { |
| // check to see if we have another column to seek |
| ImmutableBytesPtr nextFamily = |
| getNextFamily(new ImmutableBytesPtr(peeked.getBuffer(), peeked.getFamilyOffset(), |
| peeked.getFamilyLength())); |
| if (nextFamily == null) { |
| // no known next family, so we can be completely done |
| done = true; |
| return KeyValue.LOWESTKEY; |
| } |
| // there is a valid family, so we should seek to that |
| return KeyValue.createFirstOnRow(peeked.getRow(), nextFamily.copyBytesIfNecessary(), |
| HConstants.EMPTY_BYTE_ARRAY); |
| } |
| |
| } |
| |
| /** |
| * Hint the next column-qualifier after the given keyvalue. We can't be smart like in the |
| * ScanQueryMatcher since we don't know the columns ahead of time. |
| */ |
| class DeleteColumnHinter implements Hinter { |
| |
| @Override |
| public KeyValue getHint(KeyValue kv) { |
| return KeyValueUtil.createLastOnRow(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), |
| kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(), |
| kv.getQualifierOffset(), kv.getQualifierLength()); |
| } |
| } |
| |
| public static class DeleteTracker { |
| |
| public KeyValue deleteFamily; |
| public KeyValue deleteColumn; |
| public KeyValue pointDelete; |
| |
| public void reset() { |
| this.deleteFamily = null; |
| this.deleteColumn = null; |
| this.pointDelete = null; |
| |
| } |
| |
| /** |
| * Check to see if we should skip this {@link KeyValue} based on the family. |
| * <p> |
| * Internally, also resets the currently tracked "Delete Family" marker we are tracking if the |
| * keyvalue is into another family (since CFs sort lexicographically, we can discard the current |
| * marker since it must not be applicable to any more kvs in a linear scan). |
| * @param next |
| * @return <tt>true</tt> if this {@link KeyValue} matches a delete. |
| */ |
| public boolean matchesFamily(KeyValue next) { |
| if (deleteFamily == null) { |
| return false; |
| } |
| if (CellUtil.matchingFamily(deleteFamily, next)) { |
| // falls within the timestamp range |
| if (deleteFamily.getTimestamp() >= next.getTimestamp()) { |
| return true; |
| } |
| } else { |
| // only can reset the delete family because we are on to another family |
| deleteFamily = null; |
| } |
| |
| return false; |
| } |
| |
| |
| /** |
| * @param next |
| * @return |
| */ |
| public boolean matchesColumn(KeyValue next) { |
| if (deleteColumn == null) { |
| return false; |
| } |
| if (CellUtil.matchingFamily(deleteColumn, next) && CellUtil.matchingQualifier(deleteColumn, next)) { |
| // falls within the timestamp range |
| if (deleteColumn.getTimestamp() >= next.getTimestamp()) { |
| return true; |
| } |
| } else { |
| deleteColumn = null; |
| } |
| return false; |
| } |
| |
| /** |
| * @param next |
| * @return |
| */ |
| public boolean matchesPoint(KeyValue next) { |
| // point deletes only apply to the exact KV that they reference, so we only need to ensure |
| // that the timestamp matches exactly. Because we sort by timestamp first, either the next |
| // keyvalue has the exact timestamp or is an older (smaller) timestamp, and we can allow that |
| // one. |
| if (pointDelete != null && CellUtil.matchingFamily(pointDelete, next) |
| && CellUtil.matchingQualifier(pointDelete, next)) { |
| if (pointDelete.getTimestamp() == next.getTimestamp()) { |
| return true; |
| } |
| // clear the point delete since the TS must not be matching |
| pointDelete = null; |
| } |
| return false; |
| } |
| |
| /** |
| * @return <tt>true</tt> if no delete has been set |
| */ |
| public boolean empty() { |
| return deleteFamily == null && deleteColumn == null && pointDelete == null; |
| } |
| } |
| } |