lucene/core/src/java/org/apache/lucene/index/MultiFields.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.concurrent.ConcurrentHashMap;

 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.MultiBits;
 import org.apache.lucene.util.ReaderUtil.Gather;  // for javadocs
 import org.apache.lucene.util.ReaderUtil;

 /**
  * Exposes flex API, merged from flex API of sub-segments.
  * This is useful when you're interacting with an {@link
  * IndexReader} implementation that consists of sequential
  * sub-readers (eg {@link DirectoryReader} or {@link
  * MultiReader}).
  *
  * <p><b>NOTE</b>: for multi readers, you'll get better
  * performance by gathering the sub readers using {@link
  * ReaderUtil#gatherSubReaders} and then operate per-reader,
  * instead of using this class.
  *
  * @lucene.experimental
  */

 public final class MultiFields extends Fields {
   private final Fields[] subs;
   private final ReaderUtil.Slice[] subSlices;
   private final Map<String,Terms> terms = new ConcurrentHashMap<String,Terms>();

   /** Returns a single {@link Fields} instance for this
    *  reader, merging fields/terms/docs/positions on the
    *  fly.  This method will return null if the reader
    *  has no postings.
    *
    *  <p><b>NOTE</b>: this is a slow way to access postings.
    *  It's better to get the sub-readers (using {@link
    *  Gather}) and iterate through them
    *  yourself. */
   public static Fields getFields(IndexReader r) throws IOException {
     if (r instanceof AtomicReader) {
       // already an atomic reader
       return ((AtomicReader) r).fields();
     }
     assert r instanceof CompositeReader;
     final IndexReader[] subs = ((CompositeReader) r).getSequentialSubReaders();
     if (subs.length == 0) {
       // no fields
       return null;
     } else {
       final List<Fields> fields = new ArrayList<Fields>();
       final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();

       new ReaderUtil.Gather(r) {
         @Override
         protected void add(int base, AtomicReader r) throws IOException {
           final Fields f = r.fields();
           if (f != null) {
             fields.add(f);
             slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
           }
         }
       }.run();

       if (fields.isEmpty()) {
         return null;
       } else if (fields.size() == 1) {
         return fields.get(0);
       } else {
         return new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
                                        slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
       }
     }
   }

   public static Bits getLiveDocs(IndexReader r) {
     if (r.hasDeletions()) {
       final List<Bits> liveDocs = new ArrayList<Bits>();
       final List<Integer> starts = new ArrayList<Integer>();

       try {
         final int maxDoc = new ReaderUtil.Gather(r) {
             @Override
             protected void add(int base, AtomicReader r) throws IOException {
               // record all liveDocs, even if they are null
               liveDocs.add(r.getLiveDocs());
               starts.add(base);
             }
           }.run();
         starts.add(maxDoc);
       } catch (IOException ioe) {
         // should not happen
         throw new RuntimeException(ioe);
       }

       assert liveDocs.size() > 0;
       if (liveDocs.size() == 1) {
         // Only one actual sub reader -- optimize this case
         return liveDocs.get(0);
       } else {
         return new MultiBits(liveDocs, starts, true);
       }
     } else {
       return null;
     }
   }

   /**  This method may return null if the field does not exist.*/
   public static Terms getTerms(IndexReader r, String field) throws IOException {
     final Fields fields = getFields(r);
     if (fields == null) {
       return null;
     } else {
       return fields.terms(field);
     }
   }

   /** Returns {@link DocsEnum} for the specified field &
    *  term.  This may return null if the term does not
    *  exist. */
   public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
     assert field != null;
     assert term != null;
     final Terms terms = getTerms(r, field);
     if (terms != null) {
       final TermsEnum termsEnum = terms.iterator(null);
       if (termsEnum.seekExact(term, true)) {
         return termsEnum.docs(liveDocs, null, needsFreqs);
       }
     }
     return null;
   }

   /** Returns {@link DocsAndPositionsEnum} for the specified
    *  field & term.  This may return null if the term does
    *  not exist or positions were not indexed. */
   public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
     assert field != null;
     assert term != null;
     final Terms terms = getTerms(r, field);
     if (terms != null) {
       final TermsEnum termsEnum = terms.iterator(null);
       if (termsEnum.seekExact(term, true)) {
         return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
       }
     }
     return null;
   }

   public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
     this.subs = subs;
     this.subSlices = subSlices;
   }

   @Override
   public FieldsEnum iterator() throws IOException {

     final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>();
     final List<ReaderUtil.Slice> fieldsSlices = new ArrayList<ReaderUtil.Slice>();
     for(int i=0;i<subs.length;i++) {
       fieldsEnums.add(subs[i].iterator());
       fieldsSlices.add(subSlices[i]);
     }
     if (fieldsEnums.size() == 0) {
       return FieldsEnum.EMPTY;
     } else {
       return new MultiFieldsEnum(this,
                                  fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY),
                                  fieldsSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
     }
   }

   @Override
   public Terms terms(String field) throws IOException {

     Terms result = terms.get(field);
     if (result != null)
       return result;


     // Lazy init: first time this field is requested, we
     // create & add to terms:
     final List<Terms> subs2 = new ArrayList<Terms>();
     final List<ReaderUtil.Slice> slices2 = new ArrayList<ReaderUtil.Slice>();

     // Gather all sub-readers that share this field
     for(int i=0;i<subs.length;i++) {
       final Terms terms = subs[i].terms(field);
       if (terms != null) {
         subs2.add(terms);
         slices2.add(subSlices[i]);
       }
     }
     if (subs2.size() == 0) {
       result = null;
       // don't cache this case with an unbounded cache, since the number of fields that don't exist
       // is unbounded.
     } else {
       result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY),
           slices2.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
       terms.put(field, result);
     }

     return result;
   }

   @Override
   public int getUniqueFieldCount() {
     return -1;
   }

   public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
     final Terms terms = getTerms(r, field);
     if (terms != null) {
       final TermsEnum termsEnum = terms.iterator(null);
       if (termsEnum.seekExact(text, true)) {
         return termsEnum.totalTermFreq();
       }
     }
     return 0;
   }

   /** Call this to get the (merged) FieldInfos for a
    *  composite reader */
   public static FieldInfos getMergedFieldInfos(IndexReader reader) {
     final List<AtomicReader> subReaders = new ArrayList<AtomicReader>();
     ReaderUtil.gatherSubReaders(subReaders, reader);
     final FieldInfos fieldInfos = new FieldInfos();
     for(AtomicReader subReader : subReaders) {
       fieldInfos.add(subReader.getFieldInfos());
     }
     return fieldInfos;
   }

   public static Collection<String> getIndexedFields(IndexReader reader) {
     final Collection<String> fields = new HashSet<String>();
     for(FieldInfo fieldInfo : getMergedFieldInfos(reader)) {
       if (fieldInfo.isIndexed) {
         fields.add(fieldInfo.name);
       }
     }
     return fields;
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;
	import java.util.Map;
	import java.util.Collection;
	import java.util.HashSet;
	import java.util.concurrent.ConcurrentHashMap;

	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.MultiBits;
	import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs
	import org.apache.lucene.util.ReaderUtil;

	/**
	* Exposes flex API, merged from flex API of sub-segments.
	* This is useful when you're interacting with an {@link
	* IndexReader} implementation that consists of sequential
	* sub-readers (eg {@link DirectoryReader} or {@link
	* MultiReader}).
	*
	* <p><b>NOTE</b>: for multi readers, you'll get better
	* performance by gathering the sub readers using {@link
	* ReaderUtil#gatherSubReaders} and then operate per-reader,
	* instead of using this class.
	*
	* @lucene.experimental
	*/

	public final class MultiFields extends Fields {
	private final Fields[] subs;
	private final ReaderUtil.Slice[] subSlices;
	private final Map<String,Terms> terms = new ConcurrentHashMap<String,Terms>();

	/** Returns a single {@link Fields} instance for this
	* reader, merging fields/terms/docs/positions on the
	* fly. This method will return null if the reader
	* has no postings.
	*
	* <p><b>NOTE</b>: this is a slow way to access postings.
	* It's better to get the sub-readers (using {@link
	* Gather}) and iterate through them
	* yourself. */
	public static Fields getFields(IndexReader r) throws IOException {
	if (r instanceof AtomicReader) {
	// already an atomic reader
	return ((AtomicReader) r).fields();
	}
	assert r instanceof CompositeReader;
	final IndexReader[] subs = ((CompositeReader) r).getSequentialSubReaders();
	if (subs.length == 0) {
	// no fields
	return null;
	} else {
	final List<Fields> fields = new ArrayList<Fields>();
	final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();

	new ReaderUtil.Gather(r) {
	@Override
	protected void add(int base, AtomicReader r) throws IOException {
	final Fields f = r.fields();
	if (f != null) {
	fields.add(f);
	slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
	}
	}
	}.run();

	if (fields.isEmpty()) {
	return null;
	} else if (fields.size() == 1) {
	return fields.get(0);
	} else {
	return new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
	slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
	}
	}
	}

	public static Bits getLiveDocs(IndexReader r) {
	if (r.hasDeletions()) {
	final List<Bits> liveDocs = new ArrayList<Bits>();
	final List<Integer> starts = new ArrayList<Integer>();

	try {
	final int maxDoc = new ReaderUtil.Gather(r) {
	@Override
	protected void add(int base, AtomicReader r) throws IOException {
	// record all liveDocs, even if they are null
	liveDocs.add(r.getLiveDocs());
	starts.add(base);
	}
	}.run();
	starts.add(maxDoc);
	} catch (IOException ioe) {
	// should not happen
	throw new RuntimeException(ioe);
	}

	assert liveDocs.size() > 0;
	if (liveDocs.size() == 1) {
	// Only one actual sub reader -- optimize this case
	return liveDocs.get(0);
	} else {
	return new MultiBits(liveDocs, starts, true);
	}
	} else {
	return null;
	}
	}

	/** This method may return null if the field does not exist.*/
	public static Terms getTerms(IndexReader r, String field) throws IOException {
	final Fields fields = getFields(r);
	if (fields == null) {
	return null;
	} else {
	return fields.terms(field);
	}
	}

	/** Returns {@link DocsEnum} for the specified field &
	* term. This may return null if the term does not
	* exist. */
	public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
	assert field != null;
	assert term != null;
	final Terms terms = getTerms(r, field);
	if (terms != null) {
	final TermsEnum termsEnum = terms.iterator(null);
	if (termsEnum.seekExact(term, true)) {
	return termsEnum.docs(liveDocs, null, needsFreqs);
	}
	}
	return null;
	}

	/** Returns {@link DocsAndPositionsEnum} for the specified
	* field & term. This may return null if the term does
	* not exist or positions were not indexed. */
	public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
	assert field != null;
	assert term != null;
	final Terms terms = getTerms(r, field);
	if (terms != null) {
	final TermsEnum termsEnum = terms.iterator(null);
	if (termsEnum.seekExact(term, true)) {
	return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
	}
	}
	return null;
	}

	public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
	this.subs = subs;
	this.subSlices = subSlices;
	}

	@Override
	public FieldsEnum iterator() throws IOException {

	final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>();
	final List<ReaderUtil.Slice> fieldsSlices = new ArrayList<ReaderUtil.Slice>();
	for(int i=0;i<subs.length;i++) {
	fieldsEnums.add(subs[i].iterator());
	fieldsSlices.add(subSlices[i]);
	}
	if (fieldsEnums.size() == 0) {
	return FieldsEnum.EMPTY;
	} else {
	return new MultiFieldsEnum(this,
	fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY),
	fieldsSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
	}
	}

	@Override
	public Terms terms(String field) throws IOException {

	Terms result = terms.get(field);
	if (result != null)
	return result;


	// Lazy init: first time this field is requested, we
	// create & add to terms:
	final List<Terms> subs2 = new ArrayList<Terms>();
	final List<ReaderUtil.Slice> slices2 = new ArrayList<ReaderUtil.Slice>();

	// Gather all sub-readers that share this field
	for(int i=0;i<subs.length;i++) {
	final Terms terms = subs[i].terms(field);
	if (terms != null) {
	subs2.add(terms);
	slices2.add(subSlices[i]);
	}
	}
	if (subs2.size() == 0) {
	result = null;
	// don't cache this case with an unbounded cache, since the number of fields that don't exist
	// is unbounded.
	} else {
	result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY),
	slices2.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
	terms.put(field, result);
	}

	return result;
	}

	@Override
	public int getUniqueFieldCount() {
	return -1;
	}

	public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
	final Terms terms = getTerms(r, field);
	if (terms != null) {
	final TermsEnum termsEnum = terms.iterator(null);
	if (termsEnum.seekExact(text, true)) {
	return termsEnum.totalTermFreq();
	}
	}
	return 0;
	}

	/** Call this to get the (merged) FieldInfos for a
	* composite reader */
	public static FieldInfos getMergedFieldInfos(IndexReader reader) {
	final List<AtomicReader> subReaders = new ArrayList<AtomicReader>();
	ReaderUtil.gatherSubReaders(subReaders, reader);
	final FieldInfos fieldInfos = new FieldInfos();
	for(AtomicReader subReader : subReaders) {
	fieldInfos.add(subReader.getFieldInfos());
	}
	return fieldInfos;
	}

	public static Collection<String> getIndexedFields(IndexReader reader) {
	final Collection<String> fields = new HashSet<String>();
	for(FieldInfo fieldInfo : getMergedFieldInfos(reader)) {
	if (fieldInfo.isIndexed) {
	fields.add(fieldInfo.name);
	}
	}
	return fields;
	}
	}