| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.search; |
| |
| import java.util.Collection; |
| import java.util.Collections; |
| |
| import org.apache.lucene.util.Accountable; |
| import org.apache.lucene.util.BitUtil; |
| import org.apache.lucene.util.RamUsageEstimator; |
| |
| |
| /** |
| * <code>HashDocSet</code> represents an unordered set of Lucene Document Ids |
| * using a primitive int hash table. It can be a better choice if there are few docs |
| * in the set because it takes up less memory and is faster to iterate and take |
| * set intersections. |
| * |
| * |
| * @since solr 0.9 |
| */ |
| @Deprecated // see SOLR-14256 |
| public final class HashDocSet extends DocSetBase { |
| private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(HashDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; |
| |
| /** Default load factor to use for HashDocSets. We keep track of the inverse |
| * since multiplication is so much faster than division. The default |
| * is 1.0f / 0.75f |
| */ |
| static float DEFAULT_INVERSE_LOAD_FACTOR = 1.0f /0.75f; |
| |
| // public final static int MAX_SIZE = SolrConfig.config.getInt("//HashDocSet/@maxSize",-1); |
| |
| |
| // lucene docs are numbered from 0, so a neg number must be used for missing. |
| // an alternative to having to init the array to EMPTY at the start is |
| // |
| private final static int EMPTY=-1; |
| private final int[] table; |
| private final int size; |
| private final int mask; |
| |
| public HashDocSet(HashDocSet set) { |
| this.table = set.table.clone(); |
| this.size = set.size; |
| this.mask = set.mask; |
| } |
| |
| /** Create a HashDocSet from a list of *unique* ids */ |
| public HashDocSet(int[] docs, int offset, int len) { |
| this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR); |
| } |
| |
| /** Create a HashDocSet from a list of *unique* ids */ |
| public HashDocSet(int[] docs, int offset, int len, float inverseLoadFactor) { |
| int tsize = Math.max(BitUtil.nextHighestPowerOfTwo(len), 1); |
| if (tsize < len * inverseLoadFactor) { |
| tsize <<= 1; |
| } |
| |
| mask=tsize-1; |
| |
| table = new int[tsize]; |
| // (for now) better then: Arrays.fill(table, EMPTY); |
| // https://issues.apache.org/jira/browse/SOLR-390 |
| for (int i=tsize-1; i>=0; i--) table[i]=EMPTY; |
| |
| int end = offset + len; |
| for (int i=offset; i<end; i++) { |
| put(docs[i]); |
| } |
| |
| size = len; |
| } |
| |
| void put(int doc) { |
| int s = doc & mask; |
| while (table[s]!=EMPTY) { |
| // Adding an odd number to this power-of-two hash table is |
| // guaranteed to do a full traversal, so instead of re-hashing |
| // we jump straight to a "linear" traversal. |
| // The key is that we provide many different ways to do the |
| // traversal (tablesize/2) based on the last hash code (the doc). |
| // Rely on loop invariant code motion to eval ((doc>>7)|1) only once. |
| // otherwise, we would need to pull the first case out of the loop. |
| s = (s + ((doc>>7)|1)) & mask; |
| } |
| table[s]=doc; |
| } |
| |
| @Override |
| public boolean exists(int doc) { |
| int s = doc & mask; |
| for(;;) { |
| int v = table[s]; |
| if (v==EMPTY) return false; |
| if (v==doc) return true; |
| // see put() for algorithm details. |
| s = (s + ((doc>>7)|1)) & mask; |
| } |
| } |
| |
| |
| @Override |
| public int size() { |
| return size; |
| } |
| |
| @Override |
| public DocIterator iterator() { |
| return new DocIterator() { |
| int pos=0; |
| int doc; |
| { goNext(); } |
| |
| @Override |
| public boolean hasNext() { |
| return pos < table.length; |
| } |
| |
| @Override |
| public Integer next() { |
| return nextDoc(); |
| } |
| |
| @Override |
| public void remove() { |
| } |
| |
| void goNext() { |
| while (pos<table.length && table[pos]==EMPTY) pos++; |
| } |
| |
| // modify to return -1 at end of iteration? |
| @Override |
| public int nextDoc() { |
| int doc = table[pos]; |
| pos++; |
| goNext(); |
| return doc; |
| } |
| |
| @Override |
| public float score() { |
| return 0.0f; |
| } |
| }; |
| } |
| |
| @Override |
| public DocSet intersection(DocSet other) { |
| if (other instanceof HashDocSet) { |
| // set "a" to the smallest doc set for the most efficient |
| // intersection. |
| final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other; |
| final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this; |
| |
| int[] result = new int[a.size()]; |
| int resultCount=0; |
| for (int i=0; i<a.table.length; i++) { |
| int id=a.table[i]; |
| if (id >= 0 && b.exists(id)) { |
| result[resultCount++]=id; |
| } |
| } |
| return new HashDocSet(result,0,resultCount); |
| |
| } else { |
| |
| int[] result = new int[size()]; |
| int resultCount=0; |
| for (int i=0; i<table.length; i++) { |
| int id=table[i]; |
| if (id >= 0 && other.exists(id)) { |
| result[resultCount++]=id; |
| } |
| } |
| return new HashDocSet(result,0,resultCount); |
| } |
| |
| } |
| |
| @Override |
| public int intersectionSize(DocSet other) { |
| if (other instanceof HashDocSet) { |
| // set "a" to the smallest doc set for the most efficient |
| // intersection. |
| final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other; |
| final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this; |
| |
| int resultCount=0; |
| for (int i=0; i<a.table.length; i++) { |
| int id=a.table[i]; |
| if (id >= 0 && b.exists(id)) { |
| resultCount++; |
| } |
| } |
| return resultCount; |
| } else { |
| int resultCount=0; |
| for (int i=0; i<table.length; i++) { |
| int id=table[i]; |
| if (id >= 0 && other.exists(id)) { |
| resultCount++; |
| } |
| } |
| return resultCount; |
| } |
| |
| } |
| |
| @Override |
| public boolean intersects(DocSet other) { |
| if (other instanceof HashDocSet) { |
| // set "a" to the smallest doc set for the most efficient |
| // intersection. |
| final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other; |
| final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this; |
| |
| for (int i=0; i<a.table.length; i++) { |
| int id=a.table[i]; |
| if (id >= 0 && b.exists(id)) { |
| return true; |
| } |
| } |
| return false; |
| } else { |
| for (int i=0; i<table.length; i++) { |
| int id=table[i]; |
| if (id >= 0 && other.exists(id)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| } |
| |
| @Override |
| public DocSet andNot(DocSet other) { |
| int[] result = new int[size()]; |
| int resultCount=0; |
| |
| for (int i=0; i<table.length; i++) { |
| int id=table[i]; |
| if (id >= 0 && !other.exists(id)) { |
| result[resultCount++]=id; |
| } |
| } |
| return new HashDocSet(result,0,resultCount); |
| } |
| |
| @Override |
| public DocSet union(DocSet other) { |
| if (other instanceof HashDocSet) { |
| // set "a" to the smallest doc set |
| final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other; |
| final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this; |
| |
| int[] result = new int[a.size()+b.size()]; |
| int resultCount=0; |
| // iterate over the largest table first, adding w/o checking. |
| for (int i=0; i<b.table.length; i++) { |
| int id=b.table[i]; |
| if (id>=0) result[resultCount++]=id; |
| } |
| |
| // now iterate over smaller set, adding all not already in larger set. |
| for (int i=0; i<a.table.length; i++) { |
| int id=a.table[i]; |
| if (id>=0 && !b.exists(id)) result[resultCount++]=id; |
| } |
| |
| return new HashDocSet(result,0,resultCount); |
| } else { |
| return other.union(this); |
| } |
| } |
| |
| @Override |
| public HashDocSet clone() { |
| return new HashDocSet(this); |
| } |
| |
| // don't implement andNotSize() and unionSize() on purpose... they are implemented |
| // in BaseDocSet in terms of intersectionSize(). |
| |
| |
| @Override |
| public long ramBytesUsed() { |
| return BASE_RAM_BYTES_USED + (table.length<<2); |
| } |
| |
| @Override |
| public Collection<Accountable> getChildResources() { |
| return Collections.emptyList(); |
| } |
| } |