lucene/src/java/org/apache/lucene/index/MultiFields.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Map;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.concurrent.ConcurrentHashMap;

 import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util.ReaderUtil.Gather;  // for javadocs
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;

 /**
  * Exposes flex API, merged from flex API of sub-segments.
  * This is useful when you're interacting with an {@link
  * IndexReader} implementation that consists of sequential
  * sub-readers (eg DirectoryReader or {@link
  * MultiReader}).
  *
  * <p><b>NOTE</b>: for multi readers, you'll get better
  * performance by gathering the sub readers using {@link
  * ReaderUtil#gatherSubReaders} and then operate per-reader,
  * instead of using this class.
  *
  * @lucene.experimental
  */

 public final class MultiFields extends Fields {
   private final Fields[] subs;
   private final ReaderUtil.Slice[] subSlices;
   private final Map<String,Terms> terms = new ConcurrentHashMap<String,Terms>();

   /** Returns a single {@link Fields} instance for this
    *  reader, merging fields/terms/docs/positions on the
    *  fly.  This method will not return null.
    *
    *  <p><b>NOTE</b>: this is a slow way to access postings.
    *  It's better to get the sub-readers (using {@link
    *  Gather}) and iterate through them
    *  yourself. */
   public static Fields getFields(IndexReader r) throws IOException {
     final IndexReader[] subs = r.getSequentialSubReaders();
     if (subs == null) {
       // already an atomic reader
       return r.fields();
     } else if (subs.length == 0) {
       // no fields
       return null;
     } else if (subs.length == 1) {
       return getFields(subs[0]);
     } else {

       Fields currentFields = r.retrieveFields();
       if (currentFields == null) {

         final List<Fields> fields = new ArrayList<Fields>();
         final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();

         new ReaderUtil.Gather(r) {
           @Override
           protected void add(int base, IndexReader r) throws IOException {
             final Fields f = r.fields();
             if (f != null) {
               fields.add(f);
               slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
             }
           }
         }.run();

         if (fields.size() == 0) {
           return null;
         } else if (fields.size() == 1) {
           currentFields = fields.get(0);
         } else {
           currentFields = new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
                                          slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
         }
         r.storeFields(currentFields);
       }
       return currentFields;
     }
   }

   private static class MultiReaderBits implements Bits {
     private final int[] starts;
     private final IndexReader[] readers;
     private final Bits[] delDocs;

     public MultiReaderBits(int[] starts, IndexReader[] readers) {
       assert readers.length == starts.length-1;
       this.starts = starts;
       this.readers = readers;
       delDocs = new Bits[readers.length];
       for(int i=0;i<readers.length;i++) {
         delDocs[i] = readers[i].getDeletedDocs();
       }
     }

     public boolean get(int doc) {
       final int sub = ReaderUtil.subIndex(doc, starts);
       Bits dels = delDocs[sub];
       if (dels == null) {
         // NOTE: this is not sync'd but multiple threads can
         // come through here; I think this is OK -- worst
         // case is more than 1 thread ends up filling in the
         // sub Bits
         dels = readers[sub].getDeletedDocs();
         if (dels == null) {
           return false;
         } else {
           delDocs[sub] = dels;
         }
       }
       return dels.get(doc-starts[sub]);
     }

     public int length() {
       return starts[starts.length-1];
     }
   }

   public static Bits getDeletedDocs(IndexReader r) {
     Bits result;
     if (r.hasDeletions()) {

       final List<IndexReader> readers = new ArrayList<IndexReader>();
       final List<Integer> starts = new ArrayList<Integer>();

       try {
         final int maxDoc = new ReaderUtil.Gather(r) {
             @Override
             protected void add(int base, IndexReader r) throws IOException {
               // record all delDocs, even if they are null
               readers.add(r);
               starts.add(base);
             }
           }.run();
         starts.add(maxDoc);
       } catch (IOException ioe) {
         // should not happen
         throw new RuntimeException(ioe);
       }

       assert readers.size() > 0;
       if (readers.size() == 1) {
         // Only one actual sub reader -- optimize this case
         result = readers.get(0).getDeletedDocs();
       } else {
         int[] startsArray = new int[starts.size()];
         for(int i=0;i<startsArray.length;i++) {
           startsArray[i] = starts.get(i);
         }
         result = new MultiReaderBits(startsArray, readers.toArray(new IndexReader[readers.size()]));
       }

     } else {
       result = null;
     }

     return result;
   }

   /**  This method may return null if the field does not exist.*/
   public static Terms getTerms(IndexReader r, String field) throws IOException {
     final Fields fields = getFields(r);
     if (fields == null) {
       return null;
     } else {
       return fields.terms(field);
     }
   }

   /** Returns {@link DocsEnum} for the specified field &
    *  term.  This may return null if the term does not
    *  exist. */
   public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException {
     assert field != null;
     assert term != null;
     final Terms terms = getTerms(r, field);
     if (terms != null) {
       return terms.docs(skipDocs, term, null);
     } else {
       return null;
     }
   }

   /** Returns {@link DocsAndPositionsEnum} for the specified
    *  field & term.  This may return null if the term does
    *  not exist or positions were not indexed. */
   public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException {
     assert field != null;
     assert term != null;
     final Terms terms = getTerms(r, field);
     if (terms != null) {
       return terms.docsAndPositions(skipDocs, term, null);
     } else {
       return null;
     }
   }

   public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
     this.subs = subs;
     this.subSlices = subSlices;
   }

   @Override
   public FieldsEnum iterator() throws IOException {

     final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>();
     final List<ReaderUtil.Slice> fieldsSlices = new ArrayList<ReaderUtil.Slice>();
     for(int i=0;i<subs.length;i++) {
       fieldsEnums.add(subs[i].iterator());
       fieldsSlices.add(subSlices[i]);
     }
     if (fieldsEnums.size() == 0) {
       return FieldsEnum.EMPTY;
     } else {
       return new MultiFieldsEnum(fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY),
                                  fieldsSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
     }
   }

   @Override
   public Terms terms(String field) throws IOException {

     Terms result = terms.get(field);
     if (result != null)
       return result;


     // Lazy init: first time this field is requested, we
     // create & add to terms:
     final List<Terms> subs2 = new ArrayList<Terms>();
     final List<ReaderUtil.Slice> slices2 = new ArrayList<ReaderUtil.Slice>();

     // Gather all sub-readers that share this field
     for(int i=0;i<subs.length;i++) {
       final Terms terms = subs[i].terms(field);
       if (terms != null) {
         subs2.add(terms);
         slices2.add(subSlices[i]);
       }
     }
     if (subs2.size() == 0) {
       result = null;
       // don't cache this case with an unbounded cache, since the number of fields that don't exist
       // is unbounded.
     } else {
       result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY),
           slices2.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
       terms.put(field, result);
     }

     return result;
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Map;
	import java.util.List;
	import java.util.ArrayList;
	import java.util.concurrent.ConcurrentHashMap;

	import org.apache.lucene.util.ReaderUtil;
	import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;

	/**
	* Exposes flex API, merged from flex API of sub-segments.
	* This is useful when you're interacting with an {@link
	* IndexReader} implementation that consists of sequential
	* sub-readers (eg DirectoryReader or {@link
	* MultiReader}).
	*
	* <p><b>NOTE</b>: for multi readers, you'll get better
	* performance by gathering the sub readers using {@link
	* ReaderUtil#gatherSubReaders} and then operate per-reader,
	* instead of using this class.
	*
	* @lucene.experimental
	*/

	public final class MultiFields extends Fields {
	private final Fields[] subs;
	private final ReaderUtil.Slice[] subSlices;
	private final Map<String,Terms> terms = new ConcurrentHashMap<String,Terms>();

	/** Returns a single {@link Fields} instance for this
	* reader, merging fields/terms/docs/positions on the
	* fly. This method will not return null.
	*
	* <p><b>NOTE</b>: this is a slow way to access postings.
	* It's better to get the sub-readers (using {@link
	* Gather}) and iterate through them
	* yourself. */
	public static Fields getFields(IndexReader r) throws IOException {
	final IndexReader[] subs = r.getSequentialSubReaders();
	if (subs == null) {
	// already an atomic reader
	return r.fields();
	} else if (subs.length == 0) {
	// no fields
	return null;
	} else if (subs.length == 1) {
	return getFields(subs[0]);
	} else {

	Fields currentFields = r.retrieveFields();
	if (currentFields == null) {

	final List<Fields> fields = new ArrayList<Fields>();
	final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();

	new ReaderUtil.Gather(r) {
	@Override
	protected void add(int base, IndexReader r) throws IOException {
	final Fields f = r.fields();
	if (f != null) {
	fields.add(f);
	slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
	}
	}
	}.run();

	if (fields.size() == 0) {
	return null;
	} else if (fields.size() == 1) {
	currentFields = fields.get(0);
	} else {
	currentFields = new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
	slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
	}
	r.storeFields(currentFields);
	}
	return currentFields;
	}
	}

	private static class MultiReaderBits implements Bits {
	private final int[] starts;
	private final IndexReader[] readers;
	private final Bits[] delDocs;

	public MultiReaderBits(int[] starts, IndexReader[] readers) {
	assert readers.length == starts.length-1;
	this.starts = starts;
	this.readers = readers;
	delDocs = new Bits[readers.length];
	for(int i=0;i<readers.length;i++) {
	delDocs[i] = readers[i].getDeletedDocs();
	}
	}

	public boolean get(int doc) {
	final int sub = ReaderUtil.subIndex(doc, starts);
	Bits dels = delDocs[sub];
	if (dels == null) {
	// NOTE: this is not sync'd but multiple threads can
	// come through here; I think this is OK -- worst
	// case is more than 1 thread ends up filling in the
	// sub Bits
	dels = readers[sub].getDeletedDocs();
	if (dels == null) {
	return false;
	} else {
	delDocs[sub] = dels;
	}
	}
	return dels.get(doc-starts[sub]);
	}

	public int length() {
	return starts[starts.length-1];
	}
	}

	public static Bits getDeletedDocs(IndexReader r) {
	Bits result;
	if (r.hasDeletions()) {

	final List<IndexReader> readers = new ArrayList<IndexReader>();
	final List<Integer> starts = new ArrayList<Integer>();

	try {
	final int maxDoc = new ReaderUtil.Gather(r) {
	@Override
	protected void add(int base, IndexReader r) throws IOException {
	// record all delDocs, even if they are null
	readers.add(r);
	starts.add(base);
	}
	}.run();
	starts.add(maxDoc);
	} catch (IOException ioe) {
	// should not happen
	throw new RuntimeException(ioe);
	}

	assert readers.size() > 0;
	if (readers.size() == 1) {
	// Only one actual sub reader -- optimize this case
	result = readers.get(0).getDeletedDocs();
	} else {
	int[] startsArray = new int[starts.size()];
	for(int i=0;i<startsArray.length;i++) {
	startsArray[i] = starts.get(i);
	}
	result = new MultiReaderBits(startsArray, readers.toArray(new IndexReader[readers.size()]));
	}

	} else {
	result = null;
	}

	return result;
	}

	/** This method may return null if the field does not exist.*/
	public static Terms getTerms(IndexReader r, String field) throws IOException {
	final Fields fields = getFields(r);
	if (fields == null) {
	return null;
	} else {
	return fields.terms(field);
	}
	}

	/** Returns {@link DocsEnum} for the specified field &
	* term. This may return null if the term does not
	* exist. */
	public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException {
	assert field != null;
	assert term != null;
	final Terms terms = getTerms(r, field);
	if (terms != null) {
	return terms.docs(skipDocs, term, null);
	} else {
	return null;
	}
	}

	/** Returns {@link DocsAndPositionsEnum} for the specified
	* field & term. This may return null if the term does
	* not exist or positions were not indexed. */
	public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException {
	assert field != null;
	assert term != null;
	final Terms terms = getTerms(r, field);
	if (terms != null) {
	return terms.docsAndPositions(skipDocs, term, null);
	} else {
	return null;
	}
	}

	public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
	this.subs = subs;
	this.subSlices = subSlices;
	}

	@Override
	public FieldsEnum iterator() throws IOException {

	final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>();
	final List<ReaderUtil.Slice> fieldsSlices = new ArrayList<ReaderUtil.Slice>();
	for(int i=0;i<subs.length;i++) {
	fieldsEnums.add(subs[i].iterator());
	fieldsSlices.add(subSlices[i]);
	}
	if (fieldsEnums.size() == 0) {
	return FieldsEnum.EMPTY;
	} else {
	return new MultiFieldsEnum(fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY),
	fieldsSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
	}
	}

	@Override
	public Terms terms(String field) throws IOException {

	Terms result = terms.get(field);
	if (result != null)
	return result;


	// Lazy init: first time this field is requested, we
	// create & add to terms:
	final List<Terms> subs2 = new ArrayList<Terms>();
	final List<ReaderUtil.Slice> slices2 = new ArrayList<ReaderUtil.Slice>();

	// Gather all sub-readers that share this field
	for(int i=0;i<subs.length;i++) {
	final Terms terms = subs[i].terms(field);
	if (terms != null) {
	subs2.add(terms);
	slices2.add(subSlices[i]);
	}
	}
	if (subs2.size() == 0) {
	result = null;
	// don't cache this case with an unbounded cache, since the number of fields that don't exist
	// is unbounded.
	} else {
	result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY),
	slices2.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
	terms.put(field, result);
	}

	return result;
	}
	}