lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.facet.sortedset;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;

 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.OrdinalMap;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.BytesRef;

 /**
  * Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original
  * {@link IndexReader} passed to the constructor is not closed whenever you use this class!
  */
 public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState {

   private final String field;
   private final int valueCount;

   /** {@link IndexReader} passed to the constructor. */
   public final IndexReader reader;

   private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();

   private final Map<String,OrdRange> prefixToOrdRange = new HashMap<>();

   /** Creates this, pulling doc values from the default {@link
    *  FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
   public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException {
     this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
   }

   /** Creates this, pulling doc values from the specified
    *  field. */
   public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
     this.field = field;
     this.reader = reader;

     // We need this to create thread-safe MultiSortedSetDV
     // per collector:
     SortedSetDocValues dv = getDocValues();
     if (dv == null) {
       throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
     }
     if (dv.getValueCount() > Integer.MAX_VALUE) {
       throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
     }
     valueCount = (int) dv.getValueCount();

     // TODO: we can make this more efficient if eg we can be
     // "involved" when OrdinalMap is being created?  Ie see
     // each term/ord it's assigning as it goes...
     String lastDim = null;
     int startOrd = -1;

     // TODO: this approach can work for full hierarchy?;
     // TaxoReader can't do this since ords are not in
     // "sorted order" ... but we should generalize this to
     // support arbitrary hierarchy:
     for(int ord=0;ord<valueCount;ord++) {
       final BytesRef term = dv.lookupOrd(ord);
       String[] components = FacetsConfig.stringToPath(term.utf8ToString());
       if (components.length != 2) {
         throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + term.utf8ToString());
       }
       if (!components[0].equals(lastDim)) {
         if (lastDim != null) {
           prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
         }
         startOrd = ord;
         lastDim = components[0];
       }
     }

     if (lastDim != null) {
       prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
     }
   }

   /**
    * Return the memory usage of this object in bytes. Negative values are illegal.
    */
   @Override
   public long ramBytesUsed() {
     synchronized (cachedOrdMaps) {
       long bytes = 0;
       for (OrdinalMap map : cachedOrdMaps.values()) {
         bytes += map.ramBytesUsed();
       }

       return bytes;
     }
   }

   /**
    * Returns nested resources of this class.
    * The result should be a point-in-time snapshot (to avoid race conditions).
    * @see Accountables
    */
   @Override
   public Collection<Accountable> getChildResources() {
     synchronized (cachedOrdMaps) {
       return Accountables.namedAccountables("DefaultSortedSetDocValuesReaderState", cachedOrdMaps);
     }
   }

   @Override
   public String toString() {
     return "DefaultSortedSetDocValuesReaderState(field=" + field + " reader=" + reader + ")";
   }

   /** Return top-level doc values. */
   @Override
   public SortedSetDocValues getDocValues() throws IOException {
     // TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share?
     OrdinalMap map = null;
     // TODO: why are we lazy about this?  It's better if ctor pays the cost, not first query?  Oh, but we
     // call this method from ctor, ok.  Also, we only ever store one entry in the map (for key=field) so
     // why are we using a map?
     synchronized (cachedOrdMaps) {
       map = cachedOrdMaps.get(field);
       if (map == null) {
         // uncached, or not a multi dv
         SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field);
         if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
           map = ((MultiDocValues.MultiSortedSetDocValues)dv).mapping;
           IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
           if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
             cachedOrdMaps.put(field, map);
           }
         }
         return dv;
       }
     }

     assert map != null;
     int size = reader.leaves().size();
     final SortedSetDocValues[] values = new SortedSetDocValues[size];
     final int[] starts = new int[size+1];
     long cost = 0;
     for (int i = 0; i < size; i++) {
       LeafReaderContext context = reader.leaves().get(i);
       final LeafReader reader = context.reader();
       final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
       if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
         return null;
       }
       SortedSetDocValues v = reader.getSortedSetDocValues(field);
       if (v == null) {
         v = DocValues.emptySortedSet();
       }
       values[i] = v;
       starts[i] = context.docBase;
       cost += v.cost();
     }
     starts[size] = reader.maxDoc();
     return new MultiSortedSetDocValues(values, starts, map, cost);
   }

   /** Returns mapping from prefix to {@link OrdRange}. */
   @Override
   public Map<String,OrdRange> getPrefixToOrdRange() {
     return prefixToOrdRange;
   }

   /** Returns the {@link OrdRange} for this dimension. */
   @Override
   public OrdRange getOrdRange(String dim) {
     return prefixToOrdRange.get(dim);
   }

   /** Indexed field we are reading. */
   @Override
   public String getField() {
     return field;
   }

   @Override
   public IndexReader getReader() {
     return reader;
   }

   /** Number of unique labels. */
   @Override
   public int getSize() {
     return valueCount;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.facet.sortedset;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.HashMap;
	import java.util.Map;

	import org.apache.lucene.facet.FacetsConfig;
	import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
	import org.apache.lucene.index.DocValues;
	import org.apache.lucene.index.DocValuesType;
	import org.apache.lucene.index.FieldInfo;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.LeafReader;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
	import org.apache.lucene.index.MultiDocValues;
	import org.apache.lucene.index.OrdinalMap;
	import org.apache.lucene.index.SortedSetDocValues;
	import org.apache.lucene.util.Accountable;
	import org.apache.lucene.util.Accountables;
	import org.apache.lucene.util.BytesRef;

	/**
	* Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original
	* {@link IndexReader} passed to the constructor is not closed whenever you use this class!
	*/
	public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState {

	private final String field;
	private final int valueCount;

	/** {@link IndexReader} passed to the constructor. */
	public final IndexReader reader;

	private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();

	private final Map<String,OrdRange> prefixToOrdRange = new HashMap<>();

	/** Creates this, pulling doc values from the default {@link
	* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
	public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException {
	this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
	}

	/** Creates this, pulling doc values from the specified
	* field. */
	public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
	this.field = field;
	this.reader = reader;

	// We need this to create thread-safe MultiSortedSetDV
	// per collector:
	SortedSetDocValues dv = getDocValues();
	if (dv == null) {
	throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
	}
	if (dv.getValueCount() > Integer.MAX_VALUE) {
	throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
	}
	valueCount = (int) dv.getValueCount();

	// TODO: we can make this more efficient if eg we can be
	// "involved" when OrdinalMap is being created? Ie see
	// each term/ord it's assigning as it goes...
	String lastDim = null;
	int startOrd = -1;

	// TODO: this approach can work for full hierarchy?;
	// TaxoReader can't do this since ords are not in
	// "sorted order" ... but we should generalize this to
	// support arbitrary hierarchy:
	for(int ord=0;ord<valueCount;ord++) {
	final BytesRef term = dv.lookupOrd(ord);
	String[] components = FacetsConfig.stringToPath(term.utf8ToString());
	if (components.length != 2) {
	throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + term.utf8ToString());
	}
	if (!components[0].equals(lastDim)) {
	if (lastDim != null) {
	prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
	}
	startOrd = ord;
	lastDim = components[0];
	}
	}

	if (lastDim != null) {
	prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
	}
	}

	/**
	* Return the memory usage of this object in bytes. Negative values are illegal.
	*/
	@Override
	public long ramBytesUsed() {
	synchronized (cachedOrdMaps) {
	long bytes = 0;
	for (OrdinalMap map : cachedOrdMaps.values()) {
	bytes += map.ramBytesUsed();
	}

	return bytes;
	}
	}

	/**
	* Returns nested resources of this class.
	* The result should be a point-in-time snapshot (to avoid race conditions).
	* @see Accountables
	*/
	@Override
	public Collection<Accountable> getChildResources() {
	synchronized (cachedOrdMaps) {
	return Accountables.namedAccountables("DefaultSortedSetDocValuesReaderState", cachedOrdMaps);
	}
	}

	@Override
	public String toString() {
	return "DefaultSortedSetDocValuesReaderState(field=" + field + " reader=" + reader + ")";
	}

	/** Return top-level doc values. */
	@Override
	public SortedSetDocValues getDocValues() throws IOException {
	// TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share?
	OrdinalMap map = null;
	// TODO: why are we lazy about this? It's better if ctor pays the cost, not first query? Oh, but we
	// call this method from ctor, ok. Also, we only ever store one entry in the map (for key=field) so
	// why are we using a map?
	synchronized (cachedOrdMaps) {
	map = cachedOrdMaps.get(field);
	if (map == null) {
	// uncached, or not a multi dv
	SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field);
	if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
	map = ((MultiDocValues.MultiSortedSetDocValues)dv).mapping;
	IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
	if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
	cachedOrdMaps.put(field, map);
	}
	}
	return dv;
	}
	}

	assert map != null;
	int size = reader.leaves().size();
	final SortedSetDocValues[] values = new SortedSetDocValues[size];
	final int[] starts = new int[size+1];
	long cost = 0;
	for (int i = 0; i < size; i++) {
	LeafReaderContext context = reader.leaves().get(i);
	final LeafReader reader = context.reader();
	final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
	if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
	return null;
	}
	SortedSetDocValues v = reader.getSortedSetDocValues(field);
	if (v == null) {
	v = DocValues.emptySortedSet();
	}
	values[i] = v;
	starts[i] = context.docBase;
	cost += v.cost();
	}
	starts[size] = reader.maxDoc();
	return new MultiSortedSetDocValues(values, starts, map, cost);
	}

	/** Returns mapping from prefix to {@link OrdRange}. */
	@Override
	public Map<String,OrdRange> getPrefixToOrdRange() {
	return prefixToOrdRange;
	}

	/** Returns the {@link OrdRange} for this dimension. */
	@Override
	public OrdRange getOrdRange(String dim) {
	return prefixToOrdRange.get(dim);
	}

	/** Indexed field we are reading. */
	@Override
	public String getField() {
	return field;
	}

	@Override
	public IndexReader getReader() {
	return reader;
	}

	/** Number of unique labels. */
	@Override
	public int getSize() {
	return valueCount;
	}

	}