| package org.apache.lucene.facet.range; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| /** Counts how many times each range was seen; |
| * per-hit it's just a binary search ({@link #add}) |
| * against the elementary intervals, and in the end we |
| * rollup back to the original ranges. */ |
| |
| final class LongRangeCounter { |
| |
| final LongRangeNode root; |
| final long[] boundaries; |
| final int[] leafCounts; |
| |
| // Used during rollup |
| private int leafUpto; |
| private int missingCount; |
| |
| public LongRangeCounter(LongRange[] ranges) { |
| // Maps all range inclusive endpoints to int flags; 1 |
| // = start of interval, 2 = end of interval. We need to |
| // track the start vs end case separately because if a |
| // given point is both, then it must be its own |
| // elementary interval: |
| Map<Long,Integer> endsMap = new HashMap<Long,Integer>(); |
| |
| endsMap.put(Long.MIN_VALUE, 1); |
| endsMap.put(Long.MAX_VALUE, 2); |
| |
| for(LongRange range : ranges) { |
| Integer cur = endsMap.get(range.minIncl); |
| if (cur == null) { |
| endsMap.put(range.minIncl, 1); |
| } else { |
| endsMap.put(range.minIncl, cur.intValue() | 1); |
| } |
| cur = endsMap.get(range.maxIncl); |
| if (cur == null) { |
| endsMap.put(range.maxIncl, 2); |
| } else { |
| endsMap.put(range.maxIncl, cur.intValue() | 2); |
| } |
| } |
| |
| List<Long> endsList = new ArrayList<Long>(endsMap.keySet()); |
| Collections.sort(endsList); |
| |
| // Build elementaryIntervals (a 1D Venn diagram): |
| List<InclusiveRange> elementaryIntervals = new ArrayList<InclusiveRange>(); |
| int upto0 = 1; |
| long v = endsList.get(0); |
| long prev; |
| if (endsMap.get(v) == 3) { |
| elementaryIntervals.add(new InclusiveRange(v, v)); |
| prev = v+1; |
| } else { |
| prev = v; |
| } |
| |
| while (upto0 < endsList.size()) { |
| v = endsList.get(upto0); |
| int flags = endsMap.get(v); |
| //System.out.println(" v=" + v + " flags=" + flags); |
| if (flags == 3) { |
| // This point is both an end and a start; we need to |
| // separate it: |
| if (v > prev) { |
| elementaryIntervals.add(new InclusiveRange(prev, v-1)); |
| } |
| elementaryIntervals.add(new InclusiveRange(v, v)); |
| prev = v+1; |
| } else if (flags == 1) { |
| // This point is only the start of an interval; |
| // attach it to next interval: |
| if (v > prev) { |
| elementaryIntervals.add(new InclusiveRange(prev, v-1)); |
| } |
| prev = v; |
| } else { |
| assert flags == 2; |
| // This point is only the end of an interval; attach |
| // it to last interval: |
| elementaryIntervals.add(new InclusiveRange(prev, v)); |
| prev = v+1; |
| } |
| //System.out.println(" ints=" + elementaryIntervals); |
| upto0++; |
| } |
| |
| // Build binary tree on top of intervals: |
| root = split(0, elementaryIntervals.size(), elementaryIntervals); |
| |
| // Set outputs, so we know which range to output for |
| // each node in the tree: |
| for(int i=0;i<ranges.length;i++) { |
| root.addOutputs(i, ranges[i]); |
| } |
| |
| // Set boundaries (ends of each elementary interval): |
| boundaries = new long[elementaryIntervals.size()]; |
| for(int i=0;i<boundaries.length;i++) { |
| boundaries[i] = elementaryIntervals.get(i).end; |
| } |
| |
| leafCounts = new int[boundaries.length]; |
| |
| //System.out.println("ranges: " + Arrays.toString(ranges)); |
| //System.out.println("intervals: " + elementaryIntervals); |
| //System.out.println("boundaries: " + Arrays.toString(boundaries)); |
| //System.out.println("root:\n" + root); |
| } |
| |
| public void add(long v) { |
| //System.out.println("add v=" + v); |
| |
| // NOTE: this works too, but it's ~6% slower on a simple |
| // test with a high-freq TermQuery w/ range faceting on |
| // wikimediumall: |
| /* |
| int index = Arrays.binarySearch(boundaries, v); |
| if (index < 0) { |
| index = -index-1; |
| } |
| leafCounts[index]++; |
| */ |
| |
| // Binary search to find matched elementary range; we |
| // are guaranteed to find a match because the last |
| // boundary is Long.MAX_VALUE: |
| |
| int lo = 0; |
| int hi = boundaries.length - 1; |
| while (true) { |
| int mid = (lo + hi) >>> 1; |
| //System.out.println(" cycle lo=" + lo + " hi=" + hi + " mid=" + mid + " boundary=" + boundaries[mid] + " to " + boundaries[mid+1]); |
| if (v <= boundaries[mid]) { |
| if (mid == 0) { |
| leafCounts[0]++; |
| return; |
| } else { |
| hi = mid - 1; |
| } |
| } else if (v > boundaries[mid+1]) { |
| lo = mid + 1; |
| } else { |
| leafCounts[mid+1]++; |
| //System.out.println(" incr @ " + (mid+1) + "; now " + leafCounts[mid+1]); |
| return; |
| } |
| } |
| } |
| |
| /** Fills counts corresponding to the original input |
| * ranges, returning the missing count (how many hits |
| * didn't match any ranges). */ |
| public int fillCounts(int[] counts) { |
| //System.out.println(" rollup"); |
| missingCount = 0; |
| leafUpto = 0; |
| rollup(root, counts, false); |
| return missingCount; |
| } |
| |
| private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) { |
| int count; |
| sawOutputs |= node.outputs != null; |
| if (node.left != null) { |
| count = rollup(node.left, counts, sawOutputs); |
| count += rollup(node.right, counts, sawOutputs); |
| } else { |
| // Leaf: |
| count = leafCounts[leafUpto]; |
| leafUpto++; |
| if (!sawOutputs) { |
| // This is a missing count (no output ranges were |
| // seen "above" us): |
| missingCount += count; |
| } |
| } |
| if (node.outputs != null) { |
| for(int rangeIndex : node.outputs) { |
| counts[rangeIndex] += count; |
| } |
| } |
| //System.out.println(" rollup node=" + node.start + " to " + node.end + ": count=" + count); |
| return count; |
| } |
| |
| private static LongRangeNode split(int start, int end, List<InclusiveRange> elementaryIntervals) { |
| if (start == end-1) { |
| // leaf |
| InclusiveRange range = elementaryIntervals.get(start); |
| return new LongRangeNode(range.start, range.end, null, null, start); |
| } else { |
| int mid = (start + end) >>> 1; |
| LongRangeNode left = split(start, mid, elementaryIntervals); |
| LongRangeNode right = split(mid, end, elementaryIntervals); |
| return new LongRangeNode(left.start, right.end, left, right, -1); |
| } |
| } |
| |
| private static final class InclusiveRange { |
| public final long start; |
| public final long end; |
| |
| public InclusiveRange(long start, long end) { |
| assert end >= start; |
| this.start = start; |
| this.end = end; |
| } |
| |
| @Override |
| public String toString() { |
| return start + " to " + end; |
| } |
| } |
| |
| /** Holds one node of the segment tree. */ |
| public static final class LongRangeNode { |
| final LongRangeNode left; |
| final LongRangeNode right; |
| |
| // Our range, inclusive: |
| final long start; |
| final long end; |
| |
| // If we are a leaf, the index into elementary ranges that |
| // we point to: |
| final int leafIndex; |
| |
| // Which range indices to output when a query goes |
| // through this node: |
| List<Integer> outputs; |
| |
| public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right, int leafIndex) { |
| this.start = start; |
| this.end = end; |
| this.left = left; |
| this.right = right; |
| this.leafIndex = leafIndex; |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| toString(sb, 0); |
| return sb.toString(); |
| } |
| |
| static void indent(StringBuilder sb, int depth) { |
| for(int i=0;i<depth;i++) { |
| sb.append(" "); |
| } |
| } |
| |
| /** Recursively assigns range outputs to each node. */ |
| void addOutputs(int index, LongRange range) { |
| if (start >= range.minIncl && end <= range.maxIncl) { |
| // Our range is fully included in the incoming |
| // range; add to our output list: |
| if (outputs == null) { |
| outputs = new ArrayList<Integer>(); |
| } |
| outputs.add(index); |
| } else if (left != null) { |
| assert right != null; |
| // Recurse: |
| left.addOutputs(index, range); |
| right.addOutputs(index, range); |
| } |
| } |
| |
| void toString(StringBuilder sb, int depth) { |
| indent(sb, depth); |
| if (left == null) { |
| assert right == null; |
| sb.append("leaf: " + start + " to " + end); |
| } else { |
| sb.append("node: " + start + " to " + end); |
| } |
| if (outputs != null) { |
| sb.append(" outputs="); |
| sb.append(outputs); |
| } |
| sb.append('\n'); |
| |
| if (left != null) { |
| assert right != null; |
| left.toString(sb, depth+1); |
| right.toString(sb, depth+1); |
| } |
| } |
| } |
| } |