lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search.suggest;

 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiBits;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;


 /**
  * <p>
  * Dictionary with terms, weights, payload (optional) and contexts (optional)
  * information taken from stored/indexed fields in a Lucene index.
  * </p>
  * <b>NOTE:</b>
  *  <ul>
  *    <li>
  *      The term field has to be stored; if it is missing, the document is skipped.
  *    </li>
  *    <li>
  *      The payload and contexts field are optional and are not required to be stored.
  *    </li>
  *    <li>
  *      The weight field can be stored or can be a {@link NumericDocValues}.
  *      If the weight field is not defined, the value of the weight is <code>0</code>
  *    </li>
  *  </ul>
  */
 public class DocumentDictionary implements Dictionary {

   /** {@link IndexReader} to load documents from */
   protected final IndexReader reader;

   /** Field to read payload from */
   protected final String payloadField;
   /** Field to read contexts from */
   protected final String contextsField;
   private final String field;
   private final String weightField;

   /**
    * Creates a new dictionary with the contents of the fields named <code>field</code>
    * for the terms and <code>weightField</code> for the weights that will be used for
    * the corresponding terms.
    */
   public DocumentDictionary(IndexReader reader, String field, String weightField) {
     this(reader, field, weightField, null);
   }

   /**
    * Creates a new dictionary with the contents of the fields named <code>field</code>
    * for the terms, <code>weightField</code> for the weights that will be used for the
    * the corresponding terms and <code>payloadField</code> for the corresponding payloads
    * for the entry.
    */
   public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField) {
     this(reader, field, weightField, payloadField, null);
   }

   /**
    * Creates a new dictionary with the contents of the fields named <code>field</code>
    * for the terms, <code>weightField</code> for the weights that will be used for the
    * the corresponding terms, <code>payloadField</code> for the corresponding payloads
    * for the entry and <code>contextsField</code> for associated contexts.
    */
   public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField, String contextsField) {
     this.reader = reader;
     this.field = field;
     this.weightField = weightField;
     this.payloadField = payloadField;
     this.contextsField = contextsField;
   }

   @Override
   public InputIterator getEntryIterator() throws IOException {
     return new DocumentInputIterator(payloadField!=null, contextsField!=null);
   }

   /** Implements {@link InputIterator} from stored fields. */
   protected class DocumentInputIterator implements InputIterator {

     private final int docCount;
     private final Set<String> relevantFields;
     private final boolean hasPayloads;
     private final boolean hasContexts;
     private final Bits liveDocs;
     private int currentDocId = -1;
     private long currentWeight = 0;
     private BytesRef currentPayload = null;
     private Set<BytesRef> currentContexts;
     private final NumericDocValues weightValues;
     IndexableField[] currentDocFields = new IndexableField[0];
     int nextFieldsPosition = 0;

     /**
      * Creates an iterator over term, weight and payload fields from the lucene
      * index. setting <code>withPayload</code> to false, implies an iterator
      * over only term and weight.
      */
     public DocumentInputIterator(boolean hasPayloads, boolean hasContexts) throws IOException {
       this.hasPayloads = hasPayloads;
       this.hasContexts = hasContexts;
       docCount = reader.maxDoc() - 1;
       weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null;
       liveDocs = (reader.leaves().size() > 0) ? MultiBits.getLiveDocs(reader) : null;
       relevantFields = getRelevantFields(new String [] {field, weightField, payloadField, contextsField});
     }

     @Override
     public long weight() {
       return currentWeight;
     }

     @Override
     public BytesRef next() throws IOException {
       while (true) {
         if (nextFieldsPosition < currentDocFields.length) {
           // Still values left from the document
           IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
           if (fieldValue.binaryValue() != null) {
             return fieldValue.binaryValue();
           } else if (fieldValue.stringValue() != null) {
             return new BytesRef(fieldValue.stringValue());
           } else {
             continue;
           }
         }

         if (currentDocId == docCount) {
           // Iterated over all the documents.
           break;
         }

         currentDocId++;
         if (liveDocs != null && !liveDocs.get(currentDocId)) {
           continue;
         }

         Document doc = reader.document(currentDocId, relevantFields);

         BytesRef tempPayload = null;
         if (hasPayloads) {
           IndexableField payload = doc.getField(payloadField);
           if (payload != null) {
             if (payload.binaryValue() != null) {
               tempPayload =  payload.binaryValue();
             } else if (payload.stringValue() != null) {
               tempPayload = new BytesRef(payload.stringValue());
             }
           }
           // in case that the iterator has payloads configured, use empty values
           // instead of null for payload
           if (tempPayload == null) {
             tempPayload = new BytesRef();
           }
         }

         Set<BytesRef> tempContexts;
         if (hasContexts) {
           tempContexts = new HashSet<>();
           final IndexableField[] contextFields = doc.getFields(contextsField);
           for (IndexableField contextField : contextFields) {
             if (contextField.binaryValue() != null) {
               tempContexts.add(contextField.binaryValue());
             } else if (contextField.stringValue() != null) {
               tempContexts.add(new BytesRef(contextField.stringValue()));
             } else {
               continue;
             }
           }
         } else {
           tempContexts = Collections.emptySet();
         }

         currentDocFields = doc.getFields(field);
         nextFieldsPosition = 0;
         if (currentDocFields.length == 0) { // no values in this document
           continue;
         }
         IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
         BytesRef tempTerm;
         if (fieldValue.binaryValue() != null) {
           tempTerm = fieldValue.binaryValue();
         } else if (fieldValue.stringValue() != null) {
           tempTerm = new BytesRef(fieldValue.stringValue());
         } else {
           continue;
         }

         currentPayload = tempPayload;
         currentContexts = tempContexts;
         currentWeight = getWeight(doc, currentDocId);

         return tempTerm;
       }

       return null;
     }

     @Override
     public BytesRef payload() {
       return currentPayload;
     }

     @Override
     public boolean hasPayloads() {
       return hasPayloads;
     }

     /**
      * Returns the value of the <code>weightField</code> for the current document.
      * Retrieves the value for the <code>weightField</code> if it's stored (using <code>doc</code>)
      * or if it's indexed as {@link NumericDocValues} (using <code>docId</code>) for the document.
      * If no value is found, then the weight is 0.
      */
     protected long getWeight(Document doc, int docId) throws IOException {
       IndexableField weight = doc.getField(weightField);
       if (weight != null) { // found weight as stored
         return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0;
       } else if (weightValues != null) {  // found weight as NumericDocValue
         if (weightValues.docID() < docId) {
           weightValues.advance(docId);
         }
         if (weightValues.docID() == docId) {
           return weightValues.longValue();
         } else {
           // missing
           return 0;
         }
       } else { // fall back
         return 0;
       }
     }

     private Set<String> getRelevantFields(String... fields) {
       Set<String> relevantFields = new HashSet<>();
       for (String relevantField : fields) {
         if (relevantField != null) {
           relevantFields.add(relevantField);
         }
       }
       return relevantFields;
     }

     @Override
     public Set<BytesRef> contexts() {
       if (hasContexts) {
         return currentContexts;
       }
       return null;
     }

     @Override
     public boolean hasContexts() {
       return hasContexts;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search.suggest;

	import java.io.IOException;
	import java.util.Collections;
	import java.util.HashSet;
	import java.util.Set;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.IndexableField;
	import org.apache.lucene.index.MultiBits;
	import org.apache.lucene.index.MultiDocValues;
	import org.apache.lucene.index.NumericDocValues;
	import org.apache.lucene.search.spell.Dictionary;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;



	/**
	* <p>
	* Dictionary with terms, weights, payload (optional) and contexts (optional)
	* information taken from stored/indexed fields in a Lucene index.
	* </p>
	* <b>NOTE:</b>
	* <ul>
	* <li>
	* The term field has to be stored; if it is missing, the document is skipped.
	* </li>
	* <li>
	* The payload and contexts field are optional and are not required to be stored.
	* </li>
	* <li>
	* The weight field can be stored or can be a {@link NumericDocValues}.
	* If the weight field is not defined, the value of the weight is <code>0</code>
	* </li>
	* </ul>
	*/
	public class DocumentDictionary implements Dictionary {

	/** {@link IndexReader} to load documents from */
	protected final IndexReader reader;

	/** Field to read payload from */
	protected final String payloadField;
	/** Field to read contexts from */
	protected final String contextsField;
	private final String field;
	private final String weightField;

	/**
	* Creates a new dictionary with the contents of the fields named <code>field</code>
	* for the terms and <code>weightField</code> for the weights that will be used for
	* the corresponding terms.
	*/
	public DocumentDictionary(IndexReader reader, String field, String weightField) {
	this(reader, field, weightField, null);
	}

	/**
	* Creates a new dictionary with the contents of the fields named <code>field</code>
	* for the terms, <code>weightField</code> for the weights that will be used for the
	* the corresponding terms and <code>payloadField</code> for the corresponding payloads
	* for the entry.
	*/
	public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField) {
	this(reader, field, weightField, payloadField, null);
	}

	/**
	* Creates a new dictionary with the contents of the fields named <code>field</code>
	* for the terms, <code>weightField</code> for the weights that will be used for the
	* the corresponding terms, <code>payloadField</code> for the corresponding payloads
	* for the entry and <code>contextsField</code> for associated contexts.
	*/
	public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField, String contextsField) {
	this.reader = reader;
	this.field = field;
	this.weightField = weightField;
	this.payloadField = payloadField;
	this.contextsField = contextsField;
	}

	@Override
	public InputIterator getEntryIterator() throws IOException {
	return new DocumentInputIterator(payloadField!=null, contextsField!=null);
	}

	/** Implements {@link InputIterator} from stored fields. */
	protected class DocumentInputIterator implements InputIterator {

	private final int docCount;
	private final Set<String> relevantFields;
	private final boolean hasPayloads;
	private final boolean hasContexts;
	private final Bits liveDocs;
	private int currentDocId = -1;
	private long currentWeight = 0;
	private BytesRef currentPayload = null;
	private Set<BytesRef> currentContexts;
	private final NumericDocValues weightValues;
	IndexableField[] currentDocFields = new IndexableField[0];
	int nextFieldsPosition = 0;

	/**
	* Creates an iterator over term, weight and payload fields from the lucene
	* index. setting <code>withPayload</code> to false, implies an iterator
	* over only term and weight.
	*/
	public DocumentInputIterator(boolean hasPayloads, boolean hasContexts) throws IOException {
	this.hasPayloads = hasPayloads;
	this.hasContexts = hasContexts;
	docCount = reader.maxDoc() - 1;
	weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null;
	liveDocs = (reader.leaves().size() > 0) ? MultiBits.getLiveDocs(reader) : null;
	relevantFields = getRelevantFields(new String [] {field, weightField, payloadField, contextsField});
	}

	@Override
	public long weight() {
	return currentWeight;
	}

	@Override
	public BytesRef next() throws IOException {
	while (true) {
	if (nextFieldsPosition < currentDocFields.length) {
	// Still values left from the document
	IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
	if (fieldValue.binaryValue() != null) {
	return fieldValue.binaryValue();
	} else if (fieldValue.stringValue() != null) {
	return new BytesRef(fieldValue.stringValue());
	} else {
	continue;
	}
	}

	if (currentDocId == docCount) {
	// Iterated over all the documents.
	break;
	}

	currentDocId++;
	if (liveDocs != null && !liveDocs.get(currentDocId)) {
	continue;
	}

	Document doc = reader.document(currentDocId, relevantFields);

	BytesRef tempPayload = null;
	if (hasPayloads) {
	IndexableField payload = doc.getField(payloadField);
	if (payload != null) {
	if (payload.binaryValue() != null) {
	tempPayload = payload.binaryValue();
	} else if (payload.stringValue() != null) {
	tempPayload = new BytesRef(payload.stringValue());
	}
	}
	// in case that the iterator has payloads configured, use empty values
	// instead of null for payload
	if (tempPayload == null) {
	tempPayload = new BytesRef();
	}
	}

	Set<BytesRef> tempContexts;
	if (hasContexts) {
	tempContexts = new HashSet<>();
	final IndexableField[] contextFields = doc.getFields(contextsField);
	for (IndexableField contextField : contextFields) {
	if (contextField.binaryValue() != null) {
	tempContexts.add(contextField.binaryValue());
	} else if (contextField.stringValue() != null) {
	tempContexts.add(new BytesRef(contextField.stringValue()));
	} else {
	continue;
	}
	}
	} else {
	tempContexts = Collections.emptySet();
	}

	currentDocFields = doc.getFields(field);
	nextFieldsPosition = 0;
	if (currentDocFields.length == 0) { // no values in this document
	continue;
	}
	IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
	BytesRef tempTerm;
	if (fieldValue.binaryValue() != null) {
	tempTerm = fieldValue.binaryValue();
	} else if (fieldValue.stringValue() != null) {
	tempTerm = new BytesRef(fieldValue.stringValue());
	} else {
	continue;
	}

	currentPayload = tempPayload;
	currentContexts = tempContexts;
	currentWeight = getWeight(doc, currentDocId);

	return tempTerm;
	}

	return null;
	}

	@Override
	public BytesRef payload() {
	return currentPayload;
	}

	@Override
	public boolean hasPayloads() {
	return hasPayloads;
	}

	/**
	* Returns the value of the <code>weightField</code> for the current document.
	* Retrieves the value for the <code>weightField</code> if it's stored (using <code>doc</code>)
	* or if it's indexed as {@link NumericDocValues} (using <code>docId</code>) for the document.
	* If no value is found, then the weight is 0.
	*/
	protected long getWeight(Document doc, int docId) throws IOException {
	IndexableField weight = doc.getField(weightField);
	if (weight != null) { // found weight as stored
	return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0;
	} else if (weightValues != null) { // found weight as NumericDocValue
	if (weightValues.docID() < docId) {
	weightValues.advance(docId);
	}
	if (weightValues.docID() == docId) {
	return weightValues.longValue();
	} else {
	// missing
	return 0;
	}
	} else { // fall back
	return 0;
	}
	}

	private Set<String> getRelevantFields(String... fields) {
	Set<String> relevantFields = new HashSet<>();
	for (String relevantField : fields) {
	if (relevantField != null) {
	relevantFields.add(relevantField);
	}
	}
	return relevantFields;
	}

	@Override
	public Set<BytesRef> contexts() {
	if (hasContexts) {
	return currentContexts;
	}
	return null;
	}

	@Override
	public boolean hasContexts() {
	return hasContexts;
	}
	}
	}