blob: f36c270d7cf56a80cf20904807f1b5eaa2734184 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.spi.ImageReaderSpi;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.ImageInputStreamImpl;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.AbstractList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.spatial.prefix.HeatmapFacetCounter;
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.RptWithGeometrySpatialField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.DistanceUnits;
import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Shape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* JSON Facet API request for a 2D spatial summary of a rectangular region.
*
* @see HeatmapFacetCounter
* @version 7.5.0
*/
@SuppressWarnings("WeakerAccess")
public class FacetHeatmap extends FacetRequest {
// note: much of this code was moved from SpatialHeatmapFacets (SimpleFacets API)
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_GEOM */
public static final String GEOM_PARAM = "geom";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_LEVEL */
public static final String LEVEL_PARAM = "gridLevel";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_DIST_ERR_PCT */
public static final String DIST_ERR_PCT_PARAM = "distErrPct";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_DIST_ERR */
public static final String DIST_ERR_PARAM = "distErr";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_MAX_CELLS */
public static final String MAX_CELLS_PARAM = "maxCells";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_FORMAT */
public static final String FORMAT_PARAM = "format";
public static final String FORMAT_PNG = "png";
public static final String FORMAT_INTS2D = "ints2D";
//note: if we change or add more formats, remember to update the javadoc on the format param
//TODO for more format ideas, see formatCountsVal()
public static final double DEFAULT_DIST_ERR_PCT = 0.15;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
static class Parser extends FacetParser<FacetHeatmap> {
@SuppressWarnings({"rawtypes"})
Parser(FacetParser parent, String key) {
super(parent, key);
}
public FacetHeatmap parse(Object argsObj) {
assert facet == null;
if (!(argsObj instanceof Map)) {
throw err("Missing heatmap arguments");
}
@SuppressWarnings("unchecked")
Map<String, Object> argsMap = (Map<String, Object>) argsObj;
String fieldName = getField(argsMap);
//get the strategy from the field type
final SchemaField schemaField = getSchema().getField(fieldName);
final FieldType type = schemaField.getType();
final PrefixTreeStrategy strategy;
final DistanceUnits distanceUnits;
// note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor.
if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
@SuppressWarnings({"rawtypes"})
AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
distanceUnits = rptType.getDistanceUnits();
} else if (type instanceof RptWithGeometrySpatialField) {
RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
distanceUnits = rptSdvType.getDistanceUnits();
} else {
//FYI we support the term query one too but few people use that one
throw err("heatmap field needs to be of type " + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class);
}
final SpatialContext ctx = strategy.getSpatialContext();
//get the bbox (query Rectangle)
String geomStr = getString(argsMap, GEOM_PARAM, null);
final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx);
//get the grid level (possibly indirectly via distErr or distErrPct)
final int gridLevel;
final Long gridLevelObj = getLongOrNull(argsMap, LEVEL_PARAM, false);
final int maxGridLevel = strategy.getGrid().getMaxLevels();
if (gridLevelObj != null) {
gridLevel = gridLevelObj.intValue();
if (gridLevel <= 0 || gridLevel > maxGridLevel) {
throw err(LEVEL_PARAM +" should be > 0 and <= " + maxGridLevel);
}
} else {
//SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & distErrPct. Arguably that
// should be refactored to feel less weird than using it like this.
SpatialArgs spatialArgs = new SpatialArgs(SpatialOperation.Intersects/*ignored*/,
boundsShape == null ? ctx.getWorldBounds() : boundsShape);
final Double distErrObj = getDoubleOrNull(argsMap, DIST_ERR_PARAM, false);
if (distErrObj != null) {
// convert distErr units based on configured units
spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
}
spatialArgs.setDistErrPct(getDoubleOrNull(argsMap, DIST_ERR_PCT_PARAM, false));
double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);
if (distErr <= 0) {
throw err(DIST_ERR_PCT_PARAM + " or " + DIST_ERR_PARAM
+ " should be > 0 or instead provide " + LEVEL_PARAM + "=" + maxGridLevel
+ " if you insist on maximum detail");
}
//The SPT (grid) can lookup a grid level satisfying an error distance constraint
gridLevel = strategy.getGrid().getLevelForDistance(distErr);
}
final int maxCells = (int) getLong(argsMap, MAX_CELLS_PARAM, 100_000);// will throw later if exceeded
final String format = getString(argsMap, FORMAT_PARAM, FORMAT_INTS2D);
if (!format.equals(FORMAT_INTS2D) && !format.equals(FORMAT_PNG)) {
throw err("format should be " + FORMAT_INTS2D + " or " + FORMAT_PNG);
}
this.facet = new FacetHeatmap(argsMap, strategy, boundsShape, gridLevel, maxCells, format);
parseCommonParams(argsObj); // e.g. domain change
return this.facet;
}
}//class Parser
private final Map<String, Object> argsMap;
private final PrefixTreeStrategy strategy;
private final Shape boundsShape;
private final int gridLevel;
private final int maxCells;
private final String format;
FacetHeatmap(Map<String, Object> argsMap, PrefixTreeStrategy strategy, Shape boundsShape, int gridLevel, int maxCells, String format) {
this.argsMap = argsMap;
this.strategy = strategy;
this.boundsShape = boundsShape;
this.gridLevel = gridLevel;
this.maxCells = maxCells;
this.format = format;
}
//TODO perhaps all FacetRequest objs should have this?
@Override
public Map<String, Object> getFacetDescription() {
return argsMap;
}
@Override
@SuppressWarnings({"rawtypes"})
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
return new FacetHeatmapProcessor(fcontext);
}
// don't use an anonymous class since the getSimpleName() isn't friendly in debug output
@SuppressWarnings({"rawtypes"})
private class FacetHeatmapProcessor extends FacetProcessor {
@SuppressWarnings({"unchecked"})
public FacetHeatmapProcessor(FacetContext fcontext) {
super(fcontext, FacetHeatmap.this);
}
@Override
@SuppressWarnings({"unchecked"})
public void process() throws IOException {
super.process(); // handles domain changes
//Compute!
final HeatmapFacetCounter.Heatmap heatmap;
try {
heatmap = HeatmapFacetCounter.calcFacets(
strategy,
fcontext.searcher.getTopReaderContext(),
getTopAcceptDocs(fcontext.base, fcontext.searcher), // turn DocSet into Bits
boundsShape,
gridLevel,
maxCells);
} catch (IllegalArgumentException e) {//e.g. too many cells
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e);
}
//Populate response
response = new SimpleOrderedMap<>();
response.add("gridLevel", gridLevel);
response.add("columns", heatmap.columns);
response.add("rows", heatmap.rows);
response.add("minX", heatmap.region.getMinX());
response.add("maxX", heatmap.region.getMaxX());
response.add("minY", heatmap.region.getMinY());
response.add("maxY", heatmap.region.getMaxY());
//A shard request will always be a PNG
String format = fcontext.isShard() ? FORMAT_PNG : FacetHeatmap.this.format;
response.add("counts_" + format, formatCountsVal(format, heatmap.columns, heatmap.rows, heatmap.counts, fcontext.getDebugInfo()));
// note: we do not call processStats or processSubs as it's not supported yet
}
//TODO this is a general utility that should go elsewhere? DocSetUtil? Then should DocSetBase.getBits go away?
private Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException {
if (docSet.size() == searcher.numDocs()) {
return null; // means match everything (all live docs). This can speedup things a lot.
} else if (docSet.size() == 0) {
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
} else if (docSet instanceof BitDocSet) {
return ((BitDocSet) docSet).getBits();
} else {
// TODO DocSetBase.getBits ought to be at DocSet level? Though it doesn't know maxDoc but it could?
FixedBitSet bits = new FixedBitSet(searcher.maxDoc());
for (DocIterator iter = docSet.iterator(); iter.hasNext();) {
bits.set(iter.nextDoc());
}
return bits;
}
}
}
private static Object formatCountsVal(String format, int columns, int rows, int[] counts, FacetDebugInfo debugInfo) {
if (counts == null) {
return null;
}
boolean hasNonZero = false;
for (int count : counts) {
if (count > 0) {
hasNonZero = true;
break;
}
}
if (!hasNonZero) {
return null;
}
switch (format) {
case FORMAT_INTS2D: //A List of List of Integers. Good for small heatmaps and ease of consumption
return asInts2D(columns, rows, counts);
case FORMAT_PNG: //A PNG graphic; compressed. Good for large & dense heatmaps; hard to consume.
return asPngBytes(columns, rows, counts, debugInfo);
//TODO case UTFGRID https://github.com/mapbox/utfgrid-spec
//TODO case skipList: //A sequence of values; negative values are actually how many 0's to insert.
// Good for small or large but sparse heatmaps.
//TODO auto choose png or skipList; use skipList when < ~25% full or <= ~512 cells
// remember to augment error list below when we add more formats.
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown format: " + format);
}
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetMerger() {
NamedList<Object> mergedResult; // except counts, which we add in when done
int[] counts;
// note: there appears to be no mechanism to modify the shard requests in this API. If we could, we'd
// change the format to png. Instead, we have the facet processor recognize it's a shard request and ignore
// the requested format, which seems like a hack.
@SuppressWarnings("unchecked")
@Override
public void merge(Object facetResult, Context mcontext) {
NamedList<Object> facetResultNL = (NamedList<Object>) facetResult;
counts = addPngToIntArray((byte[]) facetResultNL.remove("counts_" + FORMAT_PNG), counts);
if (mergedResult == null) {
mergedResult = facetResultNL;
}
}
@Override
public void finish(Context mcontext) {
//nothing to do; we have no sub-facets
}
@Override
public Object getMergedResult() {
mergedResult.add("counts_" + format, formatCountsVal(
format, (Integer) mergedResult.get("columns"), (Integer) mergedResult.get("rows"), counts, null));//TODO where debugInfo?
return mergedResult;
}
};
}
@VisibleForTesting
static int[] addPngToIntArray(byte[] pngBytes, int[] counts) {
if (pngBytes == null) {
return counts;
}
//read PNG
final BufferedImage image = PngHelper.readImage(pngBytes);
int columns = image.getWidth();
int rows = image.getHeight();
if (counts == null) {
counts = new int[columns * rows];
} else {
assert counts.length == columns * rows;
}
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
counts[c * rows + r] += PngHelper.getCountAtColumnRow(image, rows, c, r);
}
}
return counts;
}
@VisibleForTesting
static List<List<Integer>> asInts2D(final int columns, final int rows, final int[] counts) {
//Returns a view versus returning a copy. This saves memory.
//The data is oriented naturally for human/developer viewing: one row at a time top-down
return new AbstractList<List<Integer>>() {
@Override
public List<Integer> get(final int rowIdx) {//top-down remember; the heatmap.counts is bottom up
//check if all zeroes and return null if so
boolean hasNonZero = false;
int y = rows - rowIdx - 1;//flip direction for 'y'
for (int c = 0; c < columns; c++) {
if (counts[c * rows + y] > 0) {
hasNonZero = true;
break;
}
}
if (!hasNonZero) {
return null;
}
return new AbstractList<Integer>() {
@Override
public Integer get(int columnIdx) {
return counts[columnIdx * rows + y];
}
@Override
public int size() {
return columns;
}
};
}
@Override
public int size() {
return rows;
}
};
}
@VisibleForTesting
static byte[] asPngBytes(final int columns, final int rows, final int[] counts, FacetDebugInfo debugInfo) {
long startTimeNano = System.nanoTime();
BufferedImage image = PngHelper.newImage(columns, rows);
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
PngHelper.writeCountAtColumnRow(image, rows, c, r, counts[c * rows + r]);
}
}
byte[] bytes = PngHelper.writeImage(image);
long durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNano);
if (log.isDebugEnabled()) {
log.debug("heatmap nativeSize={} pngSize={} pngTime={}", (counts.length * 4), bytes.length, durationMs);
}
if (debugInfo != null) {
debugInfo.putInfoItem("heatmap png timing", durationMs);
}
return bytes;
}
@VisibleForTesting
static class PngHelper {
static final ImageReaderSpi imageReaderSpi;//thread-safe
static {
final Iterator<ImageReader> imageReaders = ImageIO.getImageReadersByFormatName("png");
if (!imageReaders.hasNext()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't find png image reader, neaded for heatmaps!");
}
ImageReader imageReader = imageReaders.next();
imageReaderSpi = imageReader.getOriginatingProvider();
}
static BufferedImage readImage(final byte[] bytes) {
// Wrap ImageInputStream around the bytes. We could use MemoryCacheImageInputStream but it will
// cache the data which is quite unnecessary given we have it all in-memory already.
ImageInputStream imageInputStream = new ImageInputStreamImpl() {
//TODO re-use this instance; superclass has 8KB buffer.
@Override
public int read() throws IOException {
checkClosed();
bitOffset = 0;
if (streamPos >= bytes.length) {
return -1;
} else {
return bytes[(int) streamPos++];
}
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
checkClosed();
bitOffset = 0;
if (streamPos >= bytes.length) {
return -1;
} else {
int copyLen = Math.min(len, bytes.length - (int)streamPos);
System.arraycopy(bytes, (int)streamPos, b, off, copyLen);
streamPos += copyLen;
return copyLen;
}
}
@Override
public long length() {
return bytes.length;
}
@Override
public boolean isCached() {
return true;
}
@Override
public boolean isCachedMemory() {
return true;
}
};
try {
//TODO can/should we re-use an imageReader instance on FacetInfo?
ImageReader imageReader = imageReaderSpi.createReaderInstance();
imageReader.setInput(imageInputStream,
false,//forwardOnly
true);//ignoreMetadata
return imageReader.read(0);//read first & only image
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Problem reading png heatmap: " + e);
}
}
static byte[] writeImage(BufferedImage image) {
ByteArrayOutputStream baos = new ByteArrayOutputStream(
// initialize to roughly 1/4th the size a native int would take per-pixel
image.getWidth() * image.getHeight() + 1024
);
try {
ImageIO.write(image, FORMAT_PNG, baos);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "While generating PNG: " + e);
}
//too bad we can't access the raw byte[]; this copies to a new one
return baos.toByteArray();
}
// We abuse the image for storing integers (4 bytes), and so we need a 4-byte ABGR.
// first (low) byte is blue, next byte is green, next byte red, and last (high) byte is alpha.
static BufferedImage newImage(int columns, int rows) {
return new BufferedImage(columns, rows, BufferedImage.TYPE_4BYTE_ABGR);
}
// 'y' dimension goes top-down, so invert.
// Alpha chanel is high byte; 0 means transparent. So XOR those bits with '1' so that we need
// to have counts > 16M before the picture starts to fade
static void writeCountAtColumnRow(BufferedImage image, int rows, int c, int r, int val) {
image.setRGB(c, rows - 1 - r, val ^ 0xFF_00_00_00);
}
static int getCountAtColumnRow(BufferedImage image, int rows, int c, int r) {
return image.getRGB(c, rows - 1 - r) ^ 0xFF_00_00_00;
}
}
}