lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermDistinctValuesCollector.java - lucene-solr - Git at Google

 package org.apache.lucene.search.grouping.term;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.grouping.AbstractDistinctValuesCollector;
 import org.apache.lucene.search.grouping.SearchGroup;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SentinelIntSet;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;

 /**
  * A term based implementation of {@link org.apache.lucene.search.grouping.AbstractDistinctValuesCollector} that relies
  * on {@link SortedDocValues} to count the distinct values per group.
  *
  * @lucene.experimental
  */
 public class TermDistinctValuesCollector extends AbstractDistinctValuesCollector<TermDistinctValuesCollector.GroupCount> {

   private final String groupField;
   private final String countField;
   private final List<GroupCount> groups;
   private final SentinelIntSet ordSet;
   private final GroupCount groupCounts[];

   private SortedDocValues groupFieldTermIndex;
   private SortedDocValues countFieldTermIndex;

   /**
    * Constructs {@link TermDistinctValuesCollector} instance.
    *
    * @param groupField The field to group by
    * @param countField The field to count distinct values for
    * @param groups The top N groups, collected during the first phase search
    */
   public TermDistinctValuesCollector(String groupField, String countField, Collection<SearchGroup<BytesRef>> groups) {
     this.groupField = groupField;
     this.countField = countField;
     this.groups = new ArrayList<>(groups.size());
     for (SearchGroup<BytesRef> group : groups) {
       this.groups.add(new GroupCount(group.groupValue));
     }
     ordSet = new SentinelIntSet(groups.size(), -2);
     groupCounts = new GroupCount[ordSet.keys.length];
   }

   @Override
   public void collect(int doc) throws IOException {
     int slot = ordSet.find(groupFieldTermIndex.getOrd(doc));
     if (slot < 0) {
       return;
     }

     GroupCount gc = groupCounts[slot];
     int countOrd = countFieldTermIndex.getOrd(doc);
     if (doesNotContainOrd(countOrd, gc.ords)) {
       if (countOrd == -1) {
         gc.uniqueValues.add(null);
       } else {
         BytesRef br = new BytesRef();
         countFieldTermIndex.lookupOrd(countOrd, br);
         gc.uniqueValues.add(br);
       }

       gc.ords = Arrays.copyOf(gc.ords, gc.ords.length + 1);
       gc.ords[gc.ords.length - 1] = countOrd;
       if (gc.ords.length > 1) {
         Arrays.sort(gc.ords);
       }
     }
   }

   private boolean doesNotContainOrd(int ord, int[] ords) {
     if (ords.length == 0) {
       return true;
     } else if (ords.length == 1) {
       return ord != ords[0];
     }
     return Arrays.binarySearch(ords, ord) < 0;
   }

   @Override
   public List<GroupCount> getGroups() {
     return groups;
   }

   @Override
   protected void doSetNextReader(AtomicReaderContext context) throws IOException {
     groupFieldTermIndex = DocValues.getSorted(context.reader(), groupField);
     countFieldTermIndex = DocValues.getSorted(context.reader(), countField);
     ordSet.clear();
     for (GroupCount group : groups) {
       int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.lookupTerm(group.groupValue);
       if (group.groupValue != null && groupOrd < 0) {
         continue;
       }

       groupCounts[ordSet.put(groupOrd)] = group;
       group.ords = new int[group.uniqueValues.size()];
       Arrays.fill(group.ords, -2);
       int i = 0;
       for (BytesRef value : group.uniqueValues) {
         int countOrd = value == null ? -1 : countFieldTermIndex.lookupTerm(value);
         if (value == null || countOrd >= 0) {
           group.ords[i++] = countOrd;
         }
       }
     }
   }

   /** Holds distinct values for a single group.
    *
    * @lucene.experimental */
   public static class GroupCount extends AbstractDistinctValuesCollector.GroupCount<BytesRef> {

     int[] ords;

     GroupCount(BytesRef groupValue) {
       super(groupValue);
     }
   }

 }
	package org.apache.lucene.search.grouping.term;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import org.apache.lucene.index.AtomicReaderContext;
	import org.apache.lucene.index.DocValues;
	import org.apache.lucene.index.SortedDocValues;
	import org.apache.lucene.search.LeafCollector;
	import org.apache.lucene.search.grouping.AbstractDistinctValuesCollector;
	import org.apache.lucene.search.grouping.SearchGroup;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.SentinelIntSet;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.List;

	/**
	* A term based implementation of {@link org.apache.lucene.search.grouping.AbstractDistinctValuesCollector} that relies
	* on {@link SortedDocValues} to count the distinct values per group.
	*
	* @lucene.experimental
	*/
	public class TermDistinctValuesCollector extends AbstractDistinctValuesCollector<TermDistinctValuesCollector.GroupCount> {

	private final String groupField;
	private final String countField;
	private final List<GroupCount> groups;
	private final SentinelIntSet ordSet;
	private final GroupCount groupCounts[];

	private SortedDocValues groupFieldTermIndex;
	private SortedDocValues countFieldTermIndex;

	/**
	* Constructs {@link TermDistinctValuesCollector} instance.
	*
	* @param groupField The field to group by
	* @param countField The field to count distinct values for
	* @param groups The top N groups, collected during the first phase search
	*/
	public TermDistinctValuesCollector(String groupField, String countField, Collection<SearchGroup<BytesRef>> groups) {
	this.groupField = groupField;
	this.countField = countField;
	this.groups = new ArrayList<>(groups.size());
	for (SearchGroup<BytesRef> group : groups) {
	this.groups.add(new GroupCount(group.groupValue));
	}
	ordSet = new SentinelIntSet(groups.size(), -2);
	groupCounts = new GroupCount[ordSet.keys.length];
	}

	@Override
	public void collect(int doc) throws IOException {
	int slot = ordSet.find(groupFieldTermIndex.getOrd(doc));
	if (slot < 0) {
	return;
	}

	GroupCount gc = groupCounts[slot];
	int countOrd = countFieldTermIndex.getOrd(doc);
	if (doesNotContainOrd(countOrd, gc.ords)) {
	if (countOrd == -1) {
	gc.uniqueValues.add(null);
	} else {
	BytesRef br = new BytesRef();
	countFieldTermIndex.lookupOrd(countOrd, br);
	gc.uniqueValues.add(br);
	}

	gc.ords = Arrays.copyOf(gc.ords, gc.ords.length + 1);
	gc.ords[gc.ords.length - 1] = countOrd;
	if (gc.ords.length > 1) {
	Arrays.sort(gc.ords);
	}
	}
	}

	private boolean doesNotContainOrd(int ord, int[] ords) {
	if (ords.length == 0) {
	return true;
	} else if (ords.length == 1) {
	return ord != ords[0];
	}
	return Arrays.binarySearch(ords, ord) < 0;
	}

	@Override
	public List<GroupCount> getGroups() {
	return groups;
	}

	@Override
	protected void doSetNextReader(AtomicReaderContext context) throws IOException {
	groupFieldTermIndex = DocValues.getSorted(context.reader(), groupField);
	countFieldTermIndex = DocValues.getSorted(context.reader(), countField);
	ordSet.clear();
	for (GroupCount group : groups) {
	int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.lookupTerm(group.groupValue);
	if (group.groupValue != null && groupOrd < 0) {
	continue;
	}

	groupCounts[ordSet.put(groupOrd)] = group;
	group.ords = new int[group.uniqueValues.size()];
	Arrays.fill(group.ords, -2);
	int i = 0;
	for (BytesRef value : group.uniqueValues) {
	int countOrd = value == null ? -1 : countFieldTermIndex.lookupTerm(value);
	if (value == null \|\| countOrd >= 0) {
	group.ords[i++] = countOrd;
	}
	}
	}
	}

	/** Holds distinct values for a single group.
	*
	* @lucene.experimental */
	public static class GroupCount extends AbstractDistinctValuesCollector.GroupCount<BytesRef> {

	int[] ords;

	GroupCount(BytesRef groupValue) {
	super(groupValue);
	}
	}

	}