| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.spatial.prefix; |
| |
| import java.io.IOException; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.concurrent.ConcurrentHashMap; |
| |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.IndexReaderContext; |
| import org.apache.lucene.search.DoubleValuesSource; |
| import org.apache.lucene.spatial.SpatialStrategy; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.spatial.query.SpatialArgs; |
| import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource; |
| import org.apache.lucene.util.Bits; |
| import org.locationtech.spatial4j.shape.Point; |
| import org.locationtech.spatial4j.shape.Shape; |
| |
| /** |
| * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two |
| * subclasses are {@link RecursivePrefixTreeStrategy} and {@link |
| * TermQueryPrefixTreeStrategy}. This strategy is most effective as a fast |
| * approximate spatial search filter. |
| * <p> |
| * <b>Characteristics:</b> |
| * <br> |
| * <ul> |
| * <li>Can index any shape; however only {@link RecursivePrefixTreeStrategy} |
| * can effectively search non-point shapes.</li> |
| * <li>Can index a variable number of shapes per field value. This strategy |
| * can do it via multiple calls to {@link #createIndexableFields(org.locationtech.spatial4j.shape.Shape)} |
| * for a document or by giving it some sort of Shape aggregate (e.g. JTS |
| * WKT MultiPoint). The shape's boundary is approximated to a grid precision. |
| * </li> |
| * <li>Can query with any shape. The shape's boundary is approximated to a grid |
| * precision.</li> |
| * <li>Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects} |
| * is supported. If only points are indexed then this is effectively equivalent |
| * to IsWithin.</li> |
| * <li>The strategy supports {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point,double)} |
| * even for multi-valued data, so long as the indexed data is all points; the |
| * behavior is undefined otherwise. However, <em>it will likely be removed in |
| * the future</em> in lieu of using another strategy with a more scalable |
| * implementation. Use of this call is the only |
| * circumstance in which a cache is used. The cache is simple but as such |
| * it doesn't scale to large numbers of points nor is it real-time-search |
| * friendly.</li> |
| * </ul> |
| * <p> |
| * <b>Implementation:</b> |
| * <p> |
| * The {@link SpatialPrefixTree} does most of the work, for example returning |
| * a list of terms representing grids of various sizes for a supplied shape. |
| * An important |
| * configuration item is {@link #setDistErrPct(double)} which balances |
| * shape precision against scalability. See those javadocs. |
| * |
| * @lucene.experimental |
| */ |
| public abstract class PrefixTreeStrategy extends SpatialStrategy { |
| protected final SpatialPrefixTree grid; |
| private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<>(); |
| protected int defaultFieldValuesArrayLen = 2; |
| protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ] |
| protected boolean pointsOnly = false;//if true, there are no leaves |
| |
| public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { |
| super(grid.getSpatialContext(), fieldName); |
| this.grid = grid; |
| } |
| |
| public SpatialPrefixTree getGrid() { |
| return grid; |
| } |
| |
| /** |
| * A memory hint used by {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} |
| * for how big the initial size of each Document's array should be. The |
| * default is 2. Set this to slightly more than the default expected number |
| * of points per document. |
| */ |
| public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) { |
| this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen; |
| } |
| |
| public double getDistErrPct() { |
| return distErrPct; |
| } |
| |
| /** |
| * The default measure of shape precision affecting shapes at index and query |
| * times. Points don't use this as they are always indexed at the configured |
| * maximum precision ({@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()}); |
| * this applies to all other shapes. Specific shapes at index and query time |
| * can use something different than this default value. If you don't set a |
| * default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} -- |
| * 2.5%. |
| * |
| * @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct() |
| */ |
| public void setDistErrPct(double distErrPct) { |
| this.distErrPct = distErrPct; |
| } |
| |
| public boolean isPointsOnly() { |
| return pointsOnly; |
| } |
| |
| /** True if only indexed points shall be supported. There are no "leafs" in such a case, except those |
| * at maximum precision. */ |
| public void setPointsOnly(boolean pointsOnly) { |
| this.pointsOnly = pointsOnly; |
| } |
| |
| @Override |
| public Field[] createIndexableFields(Shape shape) { |
| double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx); |
| return createIndexableFields(shape, distErr); |
| } |
| |
| /** |
| * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a |
| * {@link org.apache.lucene.analysis.TokenStream}. |
| */ |
| public Field[] createIndexableFields(Shape shape, double distErr) { |
| int detailLevel = grid.getLevelForDistance(distErr); |
| return createIndexableFields(shape, detailLevel); |
| } |
| |
| public Field[] createIndexableFields(Shape shape, int detailLevel) { |
| //TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override tokenStream() |
| Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null); |
| CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); |
| cellToBytesRefIterator.reset(cells); |
| BytesRefIteratorTokenStream tokenStream = new BytesRefIteratorTokenStream(); |
| tokenStream.setBytesRefIterator(cellToBytesRefIterator); |
| Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); |
| return new Field[]{field}; |
| } |
| |
| public class ShapeTokenStream extends BytesRefIteratorTokenStream { |
| |
| public void setShape(Shape shape) { |
| double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx); |
| int detailLevel = grid.getLevelForDistance(distErr); |
| Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null); |
| CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); |
| cellToBytesRefIterator.reset(cells); |
| setBytesRefIterator(cellToBytesRefIterator); |
| } |
| |
| } |
| |
| public ShapeTokenStream tokenStream() { |
| return new ShapeTokenStream(); |
| } |
| |
| protected CellToBytesRefIterator newCellToBytesRefIterator() { |
| //subclasses could return one that never emits leaves, or does both, or who knows. |
| return new CellToBytesRefIterator(); |
| } |
| |
| protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) { |
| if (pointsOnly && !isPointShape(shape)) { |
| throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing"); |
| } |
| return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator |
| } |
| |
| /* Indexed, tokenized, not stored. */ |
| public static final FieldType FIELD_TYPE = new FieldType(); |
| |
| static { |
| FIELD_TYPE.setTokenized(true); |
| FIELD_TYPE.setOmitNorms(true); |
| FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); |
| FIELD_TYPE.freeze(); |
| } |
| |
| @Override |
| public DoubleValuesSource makeDistanceValueSource(Point queryPoint, double multiplier) { |
| PointPrefixTreeFieldCacheProvider p = provider.get( getFieldName() ); |
| if( p == null ) { |
| synchronized (this) {//double checked locking idiom is okay since provider is threadsafe |
| p = provider.get( getFieldName() ); |
| if (p == null) { |
| p = new PointPrefixTreeFieldCacheProvider(grid, getFieldName(), defaultFieldValuesArrayLen); |
| provider.put(getFieldName(),p); |
| } |
| } |
| } |
| |
| return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint, multiplier); |
| } |
| |
| /** |
| * Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called |
| * "heatmap". |
| * |
| * @see HeatmapFacetCounter#calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int, int) |
| */ |
| public HeatmapFacetCounter.Heatmap calcFacets(IndexReaderContext context, Bits topAcceptDocs, |
| Shape inputShape, final int facetLevel, int maxCells) throws IOException { |
| return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells); |
| } |
| |
| /** |
| * Returns true if the {@code shape} is a {@link Point}. For custom spatial contexts, it may make sense to |
| * have certain other shapes return true. |
| * @lucene.experimental |
| */ |
| protected boolean isPointShape(Shape shape) { |
| return shape instanceof Point; |
| } |
| } |