| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java |
| --- trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java 2016-03-08 17:22:26.824938630 -0500 |
| +++ indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -108,7 +108,7 @@ |
| } |
| |
| @Override |
| - public final SegmentInfoFormat segmentInfoFormat() { |
| + public SegmentInfoFormat segmentInfoFormat() { |
| return segmentInfosFormat; |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java |
| --- trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -0,0 +1,105 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs.lucene50; |
| + |
| + |
| +import java.io.IOException; |
| +import java.util.Collections; |
| +import java.util.Map; |
| +import java.util.Set; |
| + |
| +import org.apache.lucene.codecs.CodecUtil; |
| +import org.apache.lucene.codecs.SegmentInfoFormat; |
| +import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.IndexFileNames; |
| +import org.apache.lucene.index.IndexWriter; // javadocs |
| +import org.apache.lucene.index.SegmentInfo; // javadocs |
| +import org.apache.lucene.index.SegmentInfos; // javadocs |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| +import org.apache.lucene.store.DataOutput; // javadocs |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.IOContext; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.Version; |
| + |
| +/** |
| + * Lucene 5.0 Segment info format. |
| + * @deprecated Only for reading old 5.0-6.0 segments |
| + */ |
| +@Deprecated |
| +public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { |
| + |
| + /** Sole constructor. */ |
| + public Lucene50SegmentInfoFormat() { |
| + } |
| + |
| + @Override |
| + public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException { |
| + final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION); |
| + try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) { |
| + Throwable priorE = null; |
| + SegmentInfo si = null; |
| + try { |
| + int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, |
| + Lucene50SegmentInfoFormat.VERSION_START, |
| + Lucene50SegmentInfoFormat.VERSION_CURRENT, |
| + segmentID, ""); |
| + final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); |
| + |
| + final int docCount = input.readInt(); |
| + if (docCount < 0) { |
| + throw new CorruptIndexException("invalid docCount: " + docCount, input); |
| + } |
| + final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| + |
| + final Map<String,String> diagnostics; |
| + final Set<String> files; |
| + final Map<String,String> attributes; |
| + |
| + if (format >= VERSION_SAFE_MAPS) { |
| + diagnostics = input.readMapOfStrings(); |
| + files = input.readSetOfStrings(); |
| + attributes = input.readMapOfStrings(); |
| + } else { |
| + diagnostics = Collections.unmodifiableMap(input.readStringStringMap()); |
| + files = Collections.unmodifiableSet(input.readStringSet()); |
| + attributes = Collections.unmodifiableMap(input.readStringStringMap()); |
| + } |
| + |
| + si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null); |
| + si.setFiles(files); |
| + } catch (Throwable exception) { |
| + priorE = exception; |
| + } finally { |
| + CodecUtil.checkFooter(input, priorE); |
| + } |
| + return si; |
| + } |
| + } |
| + |
| + @Override |
| + public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { |
| + throw new UnsupportedOperationException("this codec can only be used for reading"); |
| + } |
| + |
| + /** File extension used to store {@link SegmentInfo}. */ |
| + public final static String SI_EXTENSION = "si"; |
| + static final String CODEC_NAME = "Lucene50SegmentInfo"; |
| + static final int VERSION_START = 0; |
| + static final int VERSION_SAFE_MAPS = 1; |
| + static final int VERSION_CURRENT = VERSION_SAFE_MAPS; |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java |
| --- trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -0,0 +1,175 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs.lucene60; |
| + |
| +import java.util.Objects; |
| + |
| +import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.codecs.CompoundFormat; |
| +import org.apache.lucene.codecs.DocValuesFormat; |
| +import org.apache.lucene.codecs.FieldInfosFormat; |
| +import org.apache.lucene.codecs.FilterCodec; |
| +import org.apache.lucene.codecs.LiveDocsFormat; |
| +import org.apache.lucene.codecs.NormsFormat; |
| +import org.apache.lucene.codecs.PointsFormat; |
| +import org.apache.lucene.codecs.PostingsFormat; |
| +import org.apache.lucene.codecs.SegmentInfoFormat; |
| +import org.apache.lucene.codecs.StoredFieldsFormat; |
| +import org.apache.lucene.codecs.TermVectorsFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; |
| +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; |
| +import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat; |
| +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; |
| +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; |
| + |
| +/** |
| + * Implements the Lucene 6.0 index format, with configurable per-field postings |
| + * and docvalues formats. |
| + * <p> |
| + * If you want to reuse functionality of this codec in another codec, extend |
| + * {@link FilterCodec}. |
| + * |
| + * @see org.apache.lucene.codecs.lucene60 package documentation for file format details. |
| + * |
| + * @lucene.experimental |
| + */ |
| +public class Lucene60Codec extends Codec { |
| + private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); |
| + private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat(); |
| + private final SegmentInfoFormat segmentInfosFormat = new Lucene50SegmentInfoFormat(); |
| + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); |
| + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); |
| + |
| + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { |
| + @Override |
| + public PostingsFormat getPostingsFormatForField(String field) { |
| + return Lucene60Codec.this.getPostingsFormatForField(field); |
| + } |
| + }; |
| + |
| + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { |
| + @Override |
| + public DocValuesFormat getDocValuesFormatForField(String field) { |
| + return Lucene60Codec.this.getDocValuesFormatForField(field); |
| + } |
| + }; |
| + |
| + private final StoredFieldsFormat storedFieldsFormat; |
| + |
| + /** |
| + * Instantiates a new codec. |
| + */ |
| + public Lucene60Codec() { |
| + this(Mode.BEST_SPEED); |
| + } |
| + |
| + /** |
| + * Instantiates a new codec, specifying the stored fields compression |
| + * mode to use. |
| + * @param mode stored fields compression mode to use for newly |
| + * flushed/merged segments. |
| + */ |
| + public Lucene60Codec(Mode mode) { |
| + super("Lucene60"); |
| + this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode)); |
| + } |
| + |
| + @Override |
| + public final StoredFieldsFormat storedFieldsFormat() { |
| + return storedFieldsFormat; |
| + } |
| + |
| + @Override |
| + public final TermVectorsFormat termVectorsFormat() { |
| + return vectorsFormat; |
| + } |
| + |
| + @Override |
| + public final PostingsFormat postingsFormat() { |
| + return postingsFormat; |
| + } |
| + |
| + @Override |
| + public final FieldInfosFormat fieldInfosFormat() { |
| + return fieldInfosFormat; |
| + } |
| + |
| + @Override |
| + public final SegmentInfoFormat segmentInfoFormat() { |
| + return segmentInfosFormat; |
| + } |
| + |
| + @Override |
| + public final LiveDocsFormat liveDocsFormat() { |
| + return liveDocsFormat; |
| + } |
| + |
| + @Override |
| + public final CompoundFormat compoundFormat() { |
| + return compoundFormat; |
| + } |
| + |
| + @Override |
| + public final PointsFormat pointsFormat() { |
| + return new Lucene60PointsFormat(); |
| + } |
| + |
| + /** Returns the postings format that should be used for writing |
| + * new segments of <code>field</code>. |
| + * |
| + * The default implementation always returns "Lucene50". |
| + * <p> |
| + * <b>WARNING:</b> if you subclass, you are responsible for index |
| + * backwards compatibility: future version of Lucene are only |
| + * guaranteed to be able to read the default implementation. |
| + */ |
| + public PostingsFormat getPostingsFormatForField(String field) { |
| + return defaultFormat; |
| + } |
| + |
| + /** Returns the docvalues format that should be used for writing |
| + * new segments of <code>field</code>. |
| + * |
| + * The default implementation always returns "Lucene54". |
| + * <p> |
| + * <b>WARNING:</b> if you subclass, you are responsible for index |
| + * backwards compatibility: future version of Lucene are only |
| + * guaranteed to be able to read the default implementation. |
| + */ |
| + public DocValuesFormat getDocValuesFormatForField(String field) { |
| + return defaultDVFormat; |
| + } |
| + |
| + @Override |
| + public final DocValuesFormat docValuesFormat() { |
| + return docValuesFormat; |
| + } |
| + |
| + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50"); |
| + private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54"); |
| + |
| + private final NormsFormat normsFormat = new Lucene53NormsFormat(); |
| + |
| + @Override |
| + public final NormsFormat normsFormat() { |
| + return normsFormat; |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec indexsort/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec |
| --- trunk/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-01-24 13:09:49.836989951 -0500 |
| +++ indexsort/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-05-10 05:44:23.740471118 -0400 |
| @@ -16,3 +16,4 @@ |
| org.apache.lucene.codecs.lucene50.Lucene50Codec |
| org.apache.lucene.codecs.lucene53.Lucene53Codec |
| org.apache.lucene.codecs.lucene54.Lucene54Codec |
| +org.apache.lucene.codecs.lucene60.Lucene60Codec |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java |
| --- trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java 2016-02-16 11:18:34.633021814 -0500 |
| +++ indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -18,6 +18,7 @@ |
| |
| |
| import org.apache.lucene.codecs.NormsFormat; |
| +import org.apache.lucene.codecs.SegmentInfoFormat; |
| |
| /** |
| * Codec for testing 5.0 index format |
| @@ -26,9 +27,15 @@ |
| @Deprecated |
| final class Lucene50RWCodec extends Lucene50Codec { |
| private final NormsFormat normsFormat = new Lucene50RWNormsFormat(); |
| + private final SegmentInfoFormat segmentInfoFormat = new Lucene50RWSegmentInfoFormat(); |
| |
| @Override |
| public NormsFormat normsFormat() { |
| return normsFormat; |
| } |
| + |
| + @Override |
| + public SegmentInfoFormat segmentInfoFormat() { |
| + return segmentInfoFormat; |
| + } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java |
| --- trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -0,0 +1,140 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs.lucene50; |
| + |
| + |
| +import java.io.IOException; |
| +import java.util.Collections; |
| +import java.util.Map; |
| +import java.util.Set; |
| + |
| +import org.apache.lucene.codecs.CodecUtil; |
| +import org.apache.lucene.codecs.SegmentInfoFormat; |
| +import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.IndexFileNames; |
| +import org.apache.lucene.index.IndexWriter; // javadocs |
| +import org.apache.lucene.index.SegmentInfo; // javadocs |
| +import org.apache.lucene.index.SegmentInfos; // javadocs |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| +import org.apache.lucene.store.DataOutput; // javadocs |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.IOContext; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.Version; |
| + |
| +/** |
| + * Read-write version of 5.0 SegmentInfoFormat for testing |
| + * @deprecated for test purposes only |
| + */ |
| +@Deprecated |
| +public class Lucene50RWSegmentInfoFormat extends Lucene50SegmentInfoFormat { |
| + |
| + /** Sole constructor. */ |
| + public Lucene50RWSegmentInfoFormat() { |
| + } |
| + |
| + @Override |
| + public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException { |
| + final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION); |
| + try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) { |
| + Throwable priorE = null; |
| + SegmentInfo si = null; |
| + try { |
| + int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, |
| + Lucene50SegmentInfoFormat.VERSION_START, |
| + Lucene50SegmentInfoFormat.VERSION_CURRENT, |
| + segmentID, ""); |
| + final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); |
| + |
| + final int docCount = input.readInt(); |
| + if (docCount < 0) { |
| + throw new CorruptIndexException("invalid docCount: " + docCount, input); |
| + } |
| + final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| + |
| + final Map<String,String> diagnostics; |
| + final Set<String> files; |
| + final Map<String,String> attributes; |
| + |
| + if (format >= VERSION_SAFE_MAPS) { |
| + diagnostics = input.readMapOfStrings(); |
| + files = input.readSetOfStrings(); |
| + attributes = input.readMapOfStrings(); |
| + } else { |
| + diagnostics = Collections.unmodifiableMap(input.readStringStringMap()); |
| + files = Collections.unmodifiableSet(input.readStringSet()); |
| + attributes = Collections.unmodifiableMap(input.readStringStringMap()); |
| + } |
| + |
| + si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null); |
| + si.setFiles(files); |
| + } catch (Throwable exception) { |
| + priorE = exception; |
| + } finally { |
| + CodecUtil.checkFooter(input, priorE); |
| + } |
| + return si; |
| + } |
| + } |
| + |
| + @Override |
| + public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { |
| + final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION); |
| + |
| + assert si.getIndexSort() == null; |
| + |
| + try (IndexOutput output = dir.createOutput(fileName, ioContext)) { |
| + // Only add the file once we've successfully created it, else IFD assert can trip: |
| + si.addFile(fileName); |
| + CodecUtil.writeIndexHeader(output, |
| + Lucene50SegmentInfoFormat.CODEC_NAME, |
| + Lucene50SegmentInfoFormat.VERSION_CURRENT, |
| + si.getId(), |
| + ""); |
| + Version version = si.getVersion(); |
| + if (version.major < 5) { |
| + throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si); |
| + } |
| + // Write the Lucene version that created this segment, since 3.1 |
| + output.writeInt(version.major); |
| + output.writeInt(version.minor); |
| + output.writeInt(version.bugfix); |
| + assert version.prerelease == 0; |
| + output.writeInt(si.maxDoc()); |
| + |
| + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); |
| + output.writeMapOfStrings(si.getDiagnostics()); |
| + Set<String> files = si.files(); |
| + for (String file : files) { |
| + if (!IndexFileNames.parseSegmentName(file).equals(si.name)) { |
| + throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files); |
| + } |
| + } |
| + output.writeSetOfStrings(files); |
| + output.writeMapOfStrings(si.getAttributes()); |
| + |
| + CodecUtil.writeFooter(output); |
| + } |
| + } |
| + |
| + /** File extension used to store {@link SegmentInfo}. */ |
| + public final static String SI_EXTENSION = "si"; |
| + static final String CODEC_NAME = "Lucene50SegmentInfo"; |
| + static final int VERSION_START = 0; |
| + static final int VERSION_SAFE_MAPS = 1; |
| + static final int VERSION_CURRENT = VERSION_SAFE_MAPS; |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java indexsort/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java |
| --- trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java 2016-02-16 11:18:34.637021814 -0500 |
| +++ indexsort/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -29,7 +29,7 @@ |
| import org.apache.lucene.benchmark.byTask.utils.Config; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.lucene60.Lucene60Codec; |
| +import org.apache.lucene.codecs.lucene62.Lucene62Codec; |
| import org.apache.lucene.index.ConcurrentMergeScheduler; |
| import org.apache.lucene.index.IndexCommit; |
| import org.apache.lucene.index.IndexDeletionPolicy; |
| @@ -139,7 +139,7 @@ |
| if (defaultCodec == null && postingsFormat != null) { |
| try { |
| final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat); |
| - iwConf.setCodec(new Lucene60Codec() { |
| + iwConf.setCodec(new Lucene62Codec() { |
| @Override |
| public PostingsFormat getPostingsFormatForField(String field) { |
| return postingsFormatChosen; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/classification/build.xml indexsort/lucene/classification/build.xml |
| --- trunk/lucene/classification/build.xml 2016-04-24 06:00:46.361895938 -0400 |
| +++ indexsort/lucene/classification/build.xml 2016-05-10 05:44:23.740471118 -0400 |
| @@ -28,7 +28,6 @@ |
| <path refid="base.classpath"/> |
| <pathelement path="${queries.jar}"/> |
| <pathelement path="${grouping.jar}"/> |
| - <pathelement path="${misc.jar}"/> |
| </path> |
| |
| <path id="test.classpath"> |
| @@ -37,17 +36,16 @@ |
| <path refid="test.base.classpath"/> |
| </path> |
| |
| - <target name="compile-core" depends="jar-misc,jar-grouping,jar-queries,jar-analyzers-common,common.compile-core" /> |
| + <target name="compile-core" depends="jar-grouping,jar-queries,jar-analyzers-common,common.compile-core" /> |
| |
| <target name="jar-core" depends="common.jar-core" /> |
| |
| - <target name="javadocs" depends="javadocs-misc,javadocs-grouping,javadocs-misc,compile-core,check-javadocs-uptodate" |
| + <target name="javadocs" depends="javadocs-grouping,compile-core,check-javadocs-uptodate" |
| unless="javadocs-uptodate-${name}"> |
| <invoke-module-javadoc> |
| <links> |
| <link href="../queries"/> |
| <link href="../grouping"/> |
| - <link href="../misc"/> |
| </links> |
| </invoke-module-javadoc> |
| </target> |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java indexsort/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java |
| --- trunk/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java 2016-04-24 06:00:46.361895938 -0400 |
| +++ indexsort/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -29,6 +29,7 @@ |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.IndexableField; |
| import org.apache.lucene.index.LeafReader; |
| +import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| @@ -38,7 +39,6 @@ |
| import org.apache.lucene.search.grouping.GroupingSearch; |
| import org.apache.lucene.search.grouping.TopGroups; |
| import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.uninverting.UninvertingReader; |
| |
| /** |
| * Utility class for creating training / test / cross validation indexes from the original index. |
| @@ -68,7 +68,7 @@ |
| * @param crossValidationIndex a {@link Directory} used to write the cross validation index |
| * @param analyzer {@link Analyzer} used to create the new docs |
| * @param termVectors {@code true} if term vectors should be kept |
| - * @param classFieldName names of the field used as the label for classification |
| + * @param classFieldName name of the field used as the label for classification; this must be indexed with sorted doc values |
| * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used |
| * @throws IOException if any writing operation fails on any of the indexes |
| */ |
| @@ -80,30 +80,23 @@ |
| IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer)); |
| IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer)); |
| |
| - // try to get the exact no. of existing classes |
| - Terms terms = originalIndex.terms(classFieldName); |
| - long noOfClasses = -1; |
| - if (terms != null) { |
| - noOfClasses = terms.size(); |
| - |
| - } |
| - if (noOfClasses == -1) { |
| - noOfClasses = 10000; // fallback |
| + // get the exact no. of existing classes |
| + SortedDocValues classValues = originalIndex.getSortedDocValues(classFieldName); |
| + if (classValues == null) { |
| + throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values"); |
| } |
| |
| - HashMap<String, UninvertingReader.Type> mapping = new HashMap<>(); |
| - mapping.put(classFieldName, UninvertingReader.Type.SORTED); |
| - UninvertingReader uninvertingReader = new UninvertingReader(originalIndex, mapping); |
| + int noOfClasses = classValues.getValueCount(); |
| |
| try { |
| |
| - IndexSearcher indexSearcher = new IndexSearcher(uninvertingReader); |
| + IndexSearcher indexSearcher = new IndexSearcher(originalIndex); |
| GroupingSearch gs = new GroupingSearch(classFieldName); |
| gs.setGroupSort(Sort.INDEXORDER); |
| gs.setSortWithinGroup(Sort.INDEXORDER); |
| gs.setAllGroups(true); |
| gs.setGroupDocsLimit(originalIndex.maxDoc()); |
| - TopGroups<Object> topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, (int) noOfClasses); |
| + TopGroups<Object> topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, noOfClasses); |
| |
| // set the type to be indexed, stored, with term vectors |
| FieldType ft = new FieldType(TextField.TYPE_STORED); |
| @@ -156,7 +149,7 @@ |
| testWriter.close(); |
| cvWriter.close(); |
| trainingWriter.close(); |
| - uninvertingReader.close(); |
| + originalIndex.close(); |
| } |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java indexsort/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java |
| --- trunk/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java 2016-04-24 06:00:46.361895938 -0400 |
| +++ indexsort/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -70,7 +70,9 @@ |
| doc = new Document(); |
| doc.add(new Field(idFieldName, "id" + Integer.toString(i), ft)); |
| doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft)); |
| - doc.add(new Field(classFieldName, Integer.toString(rnd.nextInt(10)), ft)); |
| + String className = Integer.toString(rnd.nextInt(10)); |
| + doc.add(new Field(classFieldName, className, ft)); |
| + doc.add(new SortedDocValuesField(classFieldName, new BytesRef(className))); |
| indexWriter.addDocument(doc); |
| } |
| |
| @@ -89,13 +91,11 @@ |
| super.tearDown(); |
| } |
| |
| - |
| @Test |
| public void testSplitOnAllFields() throws Exception { |
| assertSplit(originalIndex, 0.1, 0.1); |
| } |
| |
| - |
| @Test |
| public void testSplitOnSomeFields() throws Exception { |
| assertSplit(originalIndex, 0.2, 0.35, idFieldName, textFieldName); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java |
| --- trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java 2016-02-16 11:18:34.649021815 -0500 |
| +++ indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -36,6 +36,7 @@ |
| private IndexOutput out; |
| private final BytesRefBuilder scratch = new BytesRefBuilder(); |
| private final SegmentWriteState writeState; |
| + final String segment; |
| |
| final static BytesRef END = new BytesRef("END"); |
| final static BytesRef FIELD = new BytesRef("field "); |
| @@ -49,6 +50,7 @@ |
| |
| public SimpleTextFieldsWriter(SegmentWriteState writeState) throws IOException { |
| final String fileName = SimpleTextPostingsFormat.getPostingsFileName(writeState.segmentInfo.name, writeState.segmentSuffix); |
| + segment = writeState.segmentInfo.name; |
| out = writeState.directory.createOutput(fileName, writeState.context); |
| this.writeState = writeState; |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java |
| --- trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java 2016-02-16 11:18:34.649021815 -0500 |
| +++ indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -31,6 +31,8 @@ |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| @@ -59,6 +61,11 @@ |
| final static BytesRef SI_NUM_FILES = new BytesRef(" files "); |
| final static BytesRef SI_FILE = new BytesRef(" file "); |
| final static BytesRef SI_ID = new BytesRef(" id "); |
| + final static BytesRef SI_SORT = new BytesRef(" sort "); |
| + final static BytesRef SI_SORT_FIELD = new BytesRef(" field "); |
| + final static BytesRef SI_SORT_TYPE = new BytesRef(" type "); |
| + final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse "); |
| + final static BytesRef SI_SORT_MISSING = new BytesRef(" missing "); |
| |
| public static final String SI_EXTENSION = "si"; |
| |
| @@ -137,10 +144,119 @@ |
| + ", got: " + StringHelper.idToString(id), input); |
| } |
| |
| + SimpleTextUtil.readLine(input, scratch); |
| + assert StringHelper.startsWith(scratch.get(), SI_SORT); |
| + final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch)); |
| + SortField[] sortField = new SortField[numSortFields]; |
| + for (int i = 0; i < numSortFields; ++i) { |
| + SimpleTextUtil.readLine(input, scratch); |
| + assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD); |
| + final String field = readString(SI_SORT_FIELD.length, scratch); |
| + |
| + SimpleTextUtil.readLine(input, scratch); |
| + assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE); |
| + final String typeAsString = readString(SI_SORT_TYPE.length, scratch); |
| + |
| + final SortField.Type type; |
| + switch (typeAsString) { |
| + case "string": |
| + type = SortField.Type.STRING; |
| + break; |
| + case "long": |
| + type = SortField.Type.LONG; |
| + break; |
| + case "int": |
| + type = SortField.Type.INT; |
| + break; |
| + case "double": |
| + type = SortField.Type.DOUBLE; |
| + break; |
| + case "float": |
| + type = SortField.Type.FLOAT; |
| + break; |
| + default: |
| + throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input); |
| + } |
| + |
| + SimpleTextUtil.readLine(input, scratch); |
| + assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE); |
| + final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch)); |
| + |
| + SimpleTextUtil.readLine(input, scratch); |
| + assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING); |
| + final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch); |
| + final Object missingValue; |
| + switch (type) { |
| + case STRING: |
| + switch (missingLastAsString) { |
| + case "null": |
| + missingValue = null; |
| + break; |
| + case "first": |
| + missingValue = SortField.STRING_FIRST; |
| + break; |
| + case "last": |
| + missingValue = SortField.STRING_LAST; |
| + break; |
| + default: |
| + throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input); |
| + } |
| + break; |
| + case LONG: |
| + switch (missingLastAsString) { |
| + case "null": |
| + missingValue = null; |
| + break; |
| + default: |
| + missingValue = Long.parseLong(missingLastAsString); |
| + break; |
| + } |
| + break; |
| + case INT: |
| + switch (missingLastAsString) { |
| + case "null": |
| + missingValue = null; |
| + break; |
| + default: |
| + missingValue = Integer.parseInt(missingLastAsString); |
| + break; |
| + } |
| + break; |
| + case DOUBLE: |
| + switch (missingLastAsString) { |
| + case "null": |
| + missingValue = null; |
| + break; |
| + default: |
| + missingValue = Double.parseDouble(missingLastAsString); |
| + break; |
| + } |
| + break; |
| + case FLOAT: |
| + switch (missingLastAsString) { |
| + case "null": |
| + missingValue = null; |
| + break; |
| + default: |
| + missingValue = Float.parseFloat(missingLastAsString); |
| + break; |
| + } |
| + break; |
| + default: |
| + throw new AssertionError(); |
| + } |
| + sortField[i] = new SortField(field, type, reverse); |
| + if (missingValue != null) { |
| + sortField[i].setMissingValue(missingValue); |
| + } |
| + } |
| + Sort indexSort = sortField.length == 0 ? null : new Sort(sortField); |
| + |
| SimpleTextUtil.checkFooter(input); |
| |
| SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, |
| - isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes)); |
| + isCompoundFile, null, Collections.unmodifiableMap(diagnostics), |
| + id, Collections.unmodifiableMap(attributes), indexSort); |
| info.setFiles(files); |
| return info; |
| } |
| @@ -223,6 +339,62 @@ |
| SimpleTextUtil.write(output, new BytesRef(si.getId())); |
| SimpleTextUtil.writeNewline(output); |
| |
| + Sort indexSort = si.getIndexSort(); |
| + SimpleTextUtil.write(output, SI_SORT); |
| + final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length; |
| + SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch); |
| + SimpleTextUtil.writeNewline(output); |
| + for (int i = 0; i < numSortFields; ++i) { |
| + final SortField sortField = indexSort.getSort()[i]; |
| + |
| + SimpleTextUtil.write(output, SI_SORT_FIELD); |
| + SimpleTextUtil.write(output, sortField.getField(), scratch); |
| + SimpleTextUtil.writeNewline(output); |
| + |
| + SimpleTextUtil.write(output, SI_SORT_TYPE); |
| + final String sortType; |
| + switch (sortField.getType()) { |
| + case STRING: |
| + sortType = "string"; |
| + break; |
| + case LONG: |
| + sortType = "long"; |
| + break; |
| + case INT: |
| + sortType = "int"; |
| + break; |
| + case DOUBLE: |
| + sortType = "double"; |
| + break; |
| + case FLOAT: |
| + sortType = "float"; |
| + break; |
| + default: |
| + throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); |
| + } |
| + SimpleTextUtil.write(output, sortType, scratch); |
| + SimpleTextUtil.writeNewline(output); |
| + |
| + SimpleTextUtil.write(output, SI_SORT_REVERSE); |
| + SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch); |
| + SimpleTextUtil.writeNewline(output); |
| + |
| + SimpleTextUtil.write(output, SI_SORT_MISSING); |
| + final Object missingValue = sortField.getMissingValue(); |
| + final String missing; |
| + if (missingValue == null) { |
| + missing = "null"; |
| + } else if (missingValue == SortField.STRING_FIRST) { |
| + missing = "first"; |
| + } else if (missingValue == SortField.STRING_LAST) { |
| + missing = "last"; |
| + } else { |
| + missing = missingValue.toString(); |
| + } |
| + SimpleTextUtil.write(output, missing, scratch); |
| + SimpleTextUtil.writeNewline(output); |
| + } |
| + |
| SimpleTextUtil.writeChecksum(output, scratch); |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java |
| --- trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java 2016-02-16 11:18:34.649021815 -0500 |
| +++ indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -143,7 +143,6 @@ |
| } else { |
| write(TYPE_STRING); |
| newLine(); |
| - |
| write(VALUE); |
| write(field.stringValue()); |
| newLine(); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/Codec.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/Codec.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/Codec.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/Codec.java 2016-05-10 05:44:23.740471118 -0400 |
| @@ -57,7 +57,7 @@ |
| } |
| |
| // TODO: should we use this, or maybe a system property is better? |
| - static Codec defaultCodec = LOADER.lookup("Lucene60"); |
| + static Codec defaultCodec = LOADER.lookup("Lucene62"); |
| } |
| |
| private final String name; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java 2016-02-16 11:18:34.657021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -486,6 +486,12 @@ |
| |
| @Override |
| public int merge(MergeState mergeState) throws IOException { |
| + if (mergeState.segmentInfo.getIndexSort() != null) { |
| + // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub |
| + // being copied over...? |
| + return super.merge(mergeState); |
| + } |
| + |
| int docCount = 0; |
| int numReaders = mergeState.maxDocs.length; |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java 2016-02-16 11:18:34.657021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -730,6 +730,11 @@ |
| |
| @Override |
| public int merge(MergeState mergeState) throws IOException { |
| + if (mergeState.segmentInfo.getIndexSort() != null) { |
| + // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub |
| + // being copied over...? |
| + return super.merge(mergeState); |
| + } |
| int docCount = 0; |
| int numReaders = mergeState.maxDocs.length; |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java 2016-02-16 11:18:34.653021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -16,7 +16,6 @@ |
| */ |
| package org.apache.lucene.codecs; |
| |
| - |
| import java.io.Closeable; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| @@ -25,12 +24,13 @@ |
| import java.util.NoSuchElementException; |
| |
| import org.apache.lucene.index.BinaryDocValues; |
| +import org.apache.lucene.index.DocIDMerger; |
| +import org.apache.lucene.index.DocValues; |
| +import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FilteredTermsEnum; |
| import org.apache.lucene.index.MergeState; |
| -import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.MultiDocValues.OrdinalMap; |
| -import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SegmentWriteState; // javadocs |
| import org.apache.lucene.index.SortedDocValues; |
| @@ -44,6 +44,8 @@ |
| import org.apache.lucene.util.LongValues; |
| import org.apache.lucene.util.packed.PackedInts; |
| |
| +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| + |
| /** |
| * Abstract API that consumes numeric, binary and |
| * sorted docvalues. Concrete implementations of this |
| @@ -240,6 +242,32 @@ |
| } |
| } |
| } |
| + |
| + /** Tracks state of one numeric sub-reader that we are merging */ |
| + private static class NumericDocValuesSub extends DocIDMerger.Sub { |
| + |
| + private final NumericDocValues values; |
| + private final Bits docsWithField; |
| + private int docID = -1; |
| + private final int maxDoc; |
| + |
| + public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, Bits docsWithField, int maxDoc) { |
| + super(docMap); |
| + this.values = values; |
| + this.docsWithField = docsWithField; |
| + this.maxDoc = maxDoc; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + } |
| |
| /** |
| * Merges the numeric docvalues from <code>toMerge</code>. |
| @@ -248,20 +276,23 @@ |
| * an Iterable that merges and filters deleted documents on the fly. |
| */ |
| public void mergeNumericField(final FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge, final List<Bits> docsWithField) throws IOException { |
| - |
| addNumericField(fieldInfo, |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + |
| + // We must make a new DocIDMerger for each iterator: |
| + List<NumericDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| long nextValue; |
| boolean nextHasValue; |
| - int currentMaxDoc; |
| - NumericDocValues currentValues; |
| - Bits currentLiveDocs; |
| - Bits currentDocsWithField; |
| boolean nextIsSet; |
| |
| @Override |
| @@ -276,7 +307,7 @@ |
| |
| @Override |
| public Number next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -285,43 +316,46 @@ |
| } |
| |
| private boolean setNext() { |
| - while (true) { |
| - if (readerUpto == toMerge.size()) { |
| - return false; |
| - } |
| - |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < toMerge.size()) { |
| - currentValues = toMerge.get(readerUpto); |
| - currentDocsWithField = docsWithField.get(readerUpto); |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - nextIsSet = true; |
| - nextValue = currentValues.get(docIDUpto); |
| - if (nextValue == 0 && currentDocsWithField.get(docIDUpto) == false) { |
| - nextHasValue = false; |
| - } else { |
| - nextHasValue = true; |
| - } |
| - docIDUpto++; |
| - return true; |
| - } |
| - |
| - docIDUpto++; |
| + NumericDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| + return false; |
| } |
| + nextIsSet = true; |
| + nextValue = sub.values.get(sub.docID); |
| + nextHasValue = nextValue != 0 || sub.docsWithField.get(sub.docID); |
| + return true; |
| } |
| }; |
| } |
| }); |
| } |
| |
| + /** Tracks state of one binary sub-reader that we are merging */ |
| + private static class BinaryDocValuesSub extends DocIDMerger.Sub { |
| + |
| + private final BinaryDocValues values; |
| + private final Bits docsWithField; |
| + private int docID = -1; |
| + private final int maxDoc; |
| + |
| + public BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values, Bits docsWithField, int maxDoc) { |
| + super(docMap); |
| + this.values = values; |
| + this.docsWithField = docsWithField; |
| + this.maxDoc = maxDoc; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + } |
| + |
| /** |
| * Merges the binary docvalues from <code>toMerge</code>. |
| * <p> |
| @@ -329,20 +363,23 @@ |
| * an Iterable that merges and filters deleted documents on the fly. |
| */ |
| public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List<BinaryDocValues> toMerge, final List<Bits> docsWithField) throws IOException { |
| - |
| addBinaryField(fieldInfo, |
| new Iterable<BytesRef>() { |
| @Override |
| public Iterator<BytesRef> iterator() { |
| + |
| + // We must make a new DocIDMerger for each iterator: |
| + List<BinaryDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<BytesRef>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| BytesRef nextValue; |
| BytesRef nextPointer; // points to null if missing, or nextValue |
| - int currentMaxDoc; |
| - BinaryDocValues currentValues; |
| - Bits currentLiveDocs; |
| - Bits currentDocsWithField; |
| boolean nextIsSet; |
| |
| @Override |
| @@ -357,7 +394,7 @@ |
| |
| @Override |
| public BytesRef next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -367,42 +404,49 @@ |
| |
| private boolean setNext() { |
| while (true) { |
| - if (readerUpto == toMerge.size()) { |
| - return false; |
| - } |
| - |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < toMerge.size()) { |
| - currentValues = toMerge.get(readerUpto); |
| - currentDocsWithField = docsWithField.get(readerUpto); |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - nextIsSet = true; |
| - if (currentDocsWithField.get(docIDUpto)) { |
| - nextValue = currentValues.get(docIDUpto); |
| - nextPointer = nextValue; |
| - } else { |
| - nextPointer = null; |
| - } |
| - docIDUpto++; |
| - return true; |
| + BinaryDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| + return false; |
| + } |
| + nextIsSet = true; |
| + if (sub.docsWithField.get(sub.docID)) { |
| + nextPointer = nextValue = sub.values.get(sub.docID); |
| + } else { |
| + nextPointer = null; |
| + } |
| + return true; |
| } |
| - |
| - docIDUpto++; |
| } |
| - } |
| }; |
| } |
| }); |
| } |
| |
| + /** Tracks state of one sorted numeric sub-reader that we are merging */ |
| + private static class SortedNumericDocValuesSub extends DocIDMerger.Sub { |
| + |
| + private final SortedNumericDocValues values; |
| + private int docID = -1; |
| + private final int maxDoc; |
| + |
| + public SortedNumericDocValuesSub(MergeState.DocMap docMap, SortedNumericDocValues values, int maxDoc) { |
| + super(docMap); |
| + this.values = values; |
| + this.maxDoc = maxDoc; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + values.setDocument(docID); |
| + return docID; |
| + } |
| + } |
| + } |
| + |
| /** |
| * Merges the sorted docvalues from <code>toMerge</code>. |
| * <p> |
| @@ -410,21 +454,24 @@ |
| * iterables that filter deleted documents. |
| */ |
| public void mergeSortedNumericField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedNumericDocValues> toMerge) throws IOException { |
| - final int numReaders = toMerge.size(); |
| - final SortedNumericDocValues dvs[] = toMerge.toArray(new SortedNumericDocValues[numReaders]); |
| |
| - // step 3: add field |
| addSortedNumericField(fieldInfo, |
| // doc -> value count |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + |
| + // We must make a new DocIDMerger for each iterator: |
| + List<SortedNumericDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| int nextValue; |
| - int currentMaxDoc; |
| - Bits currentLiveDocs; |
| boolean nextIsSet; |
| |
| @Override |
| @@ -439,7 +486,7 @@ |
| |
| @Override |
| public Number next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -449,30 +496,13 @@ |
| |
| private boolean setNext() { |
| while (true) { |
| - if (readerUpto == numReaders) { |
| + SortedNumericDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| return false; |
| } |
| - |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < numReaders) { |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - nextIsSet = true; |
| - SortedNumericDocValues dv = dvs[readerUpto]; |
| - dv.setDocument(docIDUpto); |
| - nextValue = dv.count(); |
| - docIDUpto++; |
| - return true; |
| - } |
| - |
| - docIDUpto++; |
| + nextIsSet = true; |
| + nextValue = sub.values.count(); |
| + return true; |
| } |
| } |
| }; |
| @@ -482,15 +512,21 @@ |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + // We must make a new DocIDMerger for each iterator: |
| + List<SortedNumericDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| long nextValue; |
| - int currentMaxDoc; |
| - Bits currentLiveDocs; |
| boolean nextIsSet; |
| int valueUpto; |
| int valueLength; |
| + SortedNumericDocValuesSub current; |
| |
| @Override |
| public boolean hasNext() { |
| @@ -504,7 +540,7 @@ |
| |
| @Override |
| public Number next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -514,38 +550,21 @@ |
| |
| private boolean setNext() { |
| while (true) { |
| - if (readerUpto == numReaders) { |
| - return false; |
| - } |
| |
| if (valueUpto < valueLength) { |
| - nextValue = dvs[readerUpto].valueAt(valueUpto); |
| + nextValue = current.values.valueAt(valueUpto); |
| valueUpto++; |
| nextIsSet = true; |
| return true; |
| } |
| |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < numReaders) { |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - assert docIDUpto < currentMaxDoc; |
| - SortedNumericDocValues dv = dvs[readerUpto]; |
| - dv.setDocument(docIDUpto); |
| - valueUpto = 0; |
| - valueLength = dv.count(); |
| - docIDUpto++; |
| - continue; |
| + current = docIDMerger.next(); |
| + if (current == null) { |
| + return false; |
| } |
| - |
| - docIDUpto++; |
| + valueUpto = 0; |
| + valueLength = current.values.count(); |
| + continue; |
| } |
| } |
| }; |
| @@ -554,6 +573,32 @@ |
| ); |
| } |
| |
| + /** Tracks state of one sorted sub-reader that we are merging */ |
| + private static class SortedDocValuesSub extends DocIDMerger.Sub { |
| + |
| + private final SortedDocValues values; |
| + private int docID = -1; |
| + private final int maxDoc; |
| + private final LongValues map; |
| + |
| + public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, int maxDoc, LongValues map) { |
| + super(docMap); |
| + this.values = values; |
| + this.maxDoc = maxDoc; |
| + this.map = map; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + } |
| + |
| /** |
| * Merges the sorted docvalues from <code>toMerge</code>. |
| * <p> |
| @@ -608,7 +653,7 @@ |
| |
| @Override |
| public BytesRef next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| int segmentNumber = map.getFirstSegmentNumber(currentOrd); |
| @@ -629,13 +674,17 @@ |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + // We must make a new DocIDMerger for each iterator: |
| + List<SortedDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new SortedDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i))); |
| + } |
| + |
| + final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| int nextValue; |
| - int currentMaxDoc; |
| - Bits currentLiveDocs; |
| - LongValues currentMap; |
| boolean nextIsSet; |
| |
| @Override |
| @@ -650,7 +699,7 @@ |
| |
| @Override |
| public Number next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -661,30 +710,15 @@ |
| |
| private boolean setNext() { |
| while (true) { |
| - if (readerUpto == numReaders) { |
| + SortedDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| return false; |
| } |
| |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < numReaders) { |
| - currentMap = map.getGlobalOrds(readerUpto); |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - nextIsSet = true; |
| - int segOrd = dvs[readerUpto].getOrd(docIDUpto); |
| - nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd); |
| - docIDUpto++; |
| - return true; |
| - } |
| - |
| - docIDUpto++; |
| + nextIsSet = true; |
| + int segOrd = sub.values.getOrd(sub.docID); |
| + nextValue = segOrd == -1 ? -1 : (int) sub.map.get(segOrd); |
| + return true; |
| } |
| } |
| }; |
| @@ -693,6 +727,37 @@ |
| ); |
| } |
| |
| + /** Tracks state of one sorted set sub-reader that we are merging */ |
| + private static class SortedSetDocValuesSub extends DocIDMerger.Sub { |
| + |
| + private final SortedSetDocValues values; |
| + int docID = -1; |
| + private final int maxDoc; |
| + private final LongValues map; |
| + |
| + public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, int maxDoc, LongValues map) { |
| + super(docMap); |
| + this.values = values; |
| + this.maxDoc = maxDoc; |
| + this.map = map; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return "SortedSetDocValuesSub(docID=" + docID + " mappedDocID=" + mappedDocID + " values=" + values + ")"; |
| + } |
| + } |
| + |
| /** |
| * Merges the sortedset docvalues from <code>toMerge</code>. |
| * <p> |
| @@ -700,14 +765,12 @@ |
| * an Iterable that merges ordinals and values and filters deleted documents . |
| */ |
| public void mergeSortedSetField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException { |
| - final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]); |
| - final int numReaders = mergeState.maxDocs.length; |
| |
| // step 1: iterate thru each sub and mark terms still in use |
| - TermsEnum liveTerms[] = new TermsEnum[dvs.length]; |
| + TermsEnum liveTerms[] = new TermsEnum[toMerge.size()]; |
| long[] weights = new long[liveTerms.length]; |
| for (int sub = 0; sub < liveTerms.length; sub++) { |
| - SortedSetDocValues dv = dvs[sub]; |
| + SortedSetDocValues dv = toMerge.get(sub); |
| Bits liveDocs = mergeState.liveDocs[sub]; |
| int maxDoc = mergeState.maxDocs[sub]; |
| if (liveDocs == null) { |
| @@ -748,12 +811,12 @@ |
| |
| @Override |
| public BytesRef next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| int segmentNumber = map.getFirstSegmentNumber(currentOrd); |
| long segmentOrd = map.getFirstSegmentOrd(currentOrd); |
| - final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); |
| + final BytesRef term = toMerge.get(segmentNumber).lookupOrd(segmentOrd); |
| currentOrd++; |
| return term; |
| } |
| @@ -769,12 +832,18 @@ |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + |
| + // We must make a new DocIDMerger for each iterator: |
| + List<SortedSetDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i))); |
| + } |
| + |
| + final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| int nextValue; |
| - int currentMaxDoc; |
| - Bits currentLiveDocs; |
| boolean nextIsSet; |
| |
| @Override |
| @@ -789,7 +858,7 @@ |
| |
| @Override |
| public Number next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -800,33 +869,18 @@ |
| |
| private boolean setNext() { |
| while (true) { |
| - if (readerUpto == numReaders) { |
| + SortedSetDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| return false; |
| } |
| - |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < numReaders) { |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - nextIsSet = true; |
| - SortedSetDocValues dv = dvs[readerUpto]; |
| - dv.setDocument(docIDUpto); |
| - nextValue = 0; |
| - while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { |
| - nextValue++; |
| - } |
| - docIDUpto++; |
| - return true; |
| - } |
| - |
| - docIDUpto++; |
| + sub.values.setDocument(sub.docID); |
| + nextValue = 0; |
| + while (sub.values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { |
| + nextValue++; |
| + } |
| + //System.out.println(" doc " + sub + " -> ord count = " + nextValue); |
| + nextIsSet = true; |
| + return true; |
| } |
| } |
| }; |
| @@ -836,13 +890,18 @@ |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + |
| + // We must make a new DocIDMerger for each iterator: |
| + List<SortedSetDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i))); |
| + } |
| + |
| + final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| long nextValue; |
| - int currentMaxDoc; |
| - Bits currentLiveDocs; |
| - LongValues currentMap; |
| boolean nextIsSet; |
| long ords[] = new long[8]; |
| int ordUpto; |
| @@ -860,7 +919,7 @@ |
| |
| @Override |
| public Number next() { |
| - if (!hasNext()) { |
| + if (hasNext() == false) { |
| throw new NoSuchElementException(); |
| } |
| assert nextIsSet; |
| @@ -871,10 +930,6 @@ |
| |
| private boolean setNext() { |
| while (true) { |
| - if (readerUpto == numReaders) { |
| - return false; |
| - } |
| - |
| if (ordUpto < ordLength) { |
| nextValue = ords[ordUpto]; |
| ordUpto++; |
| @@ -882,35 +937,22 @@ |
| return true; |
| } |
| |
| - if (docIDUpto == currentMaxDoc) { |
| - readerUpto++; |
| - if (readerUpto < numReaders) { |
| - currentMap = map.getGlobalOrds(readerUpto); |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - currentMaxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| + SortedSetDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| + return false; |
| } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - assert docIDUpto < currentMaxDoc; |
| - SortedSetDocValues dv = dvs[readerUpto]; |
| - dv.setDocument(docIDUpto); |
| - ordUpto = ordLength = 0; |
| - long ord; |
| - while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { |
| - if (ordLength == ords.length) { |
| - ords = ArrayUtil.grow(ords, ordLength+1); |
| - } |
| - ords[ordLength] = currentMap.get(ord); |
| - ordLength++; |
| + sub.values.setDocument(sub.docID); |
| + |
| + ordUpto = ordLength = 0; |
| + long ord; |
| + while ((ord = sub.values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { |
| + if (ordLength == ords.length) { |
| + ords = ArrayUtil.grow(ords, ordLength+1); |
| } |
| - docIDUpto++; |
| - continue; |
| + ords[ordLength] = sub.map.get(ord); |
| + ordLength++; |
| } |
| - |
| - docIDUpto++; |
| + continue; |
| } |
| } |
| }; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 2016-02-16 11:18:34.657021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,166 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.codecs.lucene50; |
| - |
| - |
| -import java.io.IOException; |
| -import java.util.Collections; |
| -import java.util.Map; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.CodecUtil; |
| -import org.apache.lucene.codecs.SegmentInfoFormat; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexWriter; // javadocs |
| -import org.apache.lucene.index.SegmentInfo; // javadocs |
| -import org.apache.lucene.index.SegmentInfos; // javadocs |
| -import org.apache.lucene.store.ChecksumIndexInput; |
| -import org.apache.lucene.store.DataOutput; // javadocs |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.util.Version; |
| - |
| -/** |
| - * Lucene 5.0 Segment info format. |
| - * <p> |
| - * Files: |
| - * <ul> |
| - * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, Footer |
| - * </ul> |
| - * Data types: |
| - * <ul> |
| - * <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li> |
| - * <li>SegSize --> {@link DataOutput#writeInt Int32}</li> |
| - * <li>SegVersion --> {@link DataOutput#writeString String}</li> |
| - * <li>Files --> {@link DataOutput#writeSetOfStrings Set<String>}</li> |
| - * <li>Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}</li> |
| - * <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li> |
| - * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| - * </ul> |
| - * Field Descriptions: |
| - * <ul> |
| - * <li>SegVersion is the code version that created the segment.</li> |
| - * <li>SegSize is the number of documents contained in the segment index.</li> |
| - * <li>IsCompoundFile records whether the segment is written as a compound file or |
| - * not. If this is -1, the segment is not a compound file. If it is 1, the segment |
| - * is a compound file.</li> |
| - * <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, |
| - * for each segment it creates. It includes metadata like the current Lucene |
| - * version, OS, Java version, why the segment was created (merge, flush, |
| - * addIndexes), etc.</li> |
| - * <li>Files is a list of files referred to by this segment.</li> |
| - * </ul> |
| - * |
| - * @see SegmentInfos |
| - * @lucene.experimental |
| - */ |
| -public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { |
| - |
| - /** Sole constructor. */ |
| - public Lucene50SegmentInfoFormat() { |
| - } |
| - |
| - @Override |
| - public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException { |
| - final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION); |
| - try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) { |
| - Throwable priorE = null; |
| - SegmentInfo si = null; |
| - try { |
| - int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, |
| - Lucene50SegmentInfoFormat.VERSION_START, |
| - Lucene50SegmentInfoFormat.VERSION_CURRENT, |
| - segmentID, ""); |
| - final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); |
| - |
| - final int docCount = input.readInt(); |
| - if (docCount < 0) { |
| - throw new CorruptIndexException("invalid docCount: " + docCount, input); |
| - } |
| - final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| - |
| - final Map<String,String> diagnostics; |
| - final Set<String> files; |
| - final Map<String,String> attributes; |
| - |
| - if (format >= VERSION_SAFE_MAPS) { |
| - diagnostics = input.readMapOfStrings(); |
| - files = input.readSetOfStrings(); |
| - attributes = input.readMapOfStrings(); |
| - } else { |
| - diagnostics = Collections.unmodifiableMap(input.readStringStringMap()); |
| - files = Collections.unmodifiableSet(input.readStringSet()); |
| - attributes = Collections.unmodifiableMap(input.readStringStringMap()); |
| - } |
| - |
| - si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes); |
| - si.setFiles(files); |
| - } catch (Throwable exception) { |
| - priorE = exception; |
| - } finally { |
| - CodecUtil.checkFooter(input, priorE); |
| - } |
| - return si; |
| - } |
| - } |
| - |
| - @Override |
| - public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { |
| - final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION); |
| - |
| - try (IndexOutput output = dir.createOutput(fileName, ioContext)) { |
| - // Only add the file once we've successfully created it, else IFD assert can trip: |
| - si.addFile(fileName); |
| - CodecUtil.writeIndexHeader(output, |
| - Lucene50SegmentInfoFormat.CODEC_NAME, |
| - Lucene50SegmentInfoFormat.VERSION_CURRENT, |
| - si.getId(), |
| - ""); |
| - Version version = si.getVersion(); |
| - if (version.major < 5) { |
| - throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si); |
| - } |
| - // Write the Lucene version that created this segment, since 3.1 |
| - output.writeInt(version.major); |
| - output.writeInt(version.minor); |
| - output.writeInt(version.bugfix); |
| - assert version.prerelease == 0; |
| - output.writeInt(si.maxDoc()); |
| - |
| - output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); |
| - output.writeMapOfStrings(si.getDiagnostics()); |
| - Set<String> files = si.files(); |
| - for (String file : files) { |
| - if (!IndexFileNames.parseSegmentName(file).equals(si.name)) { |
| - throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files); |
| - } |
| - } |
| - output.writeSetOfStrings(files); |
| - output.writeMapOfStrings(si.getAttributes()); |
| - CodecUtil.writeFooter(output); |
| - } |
| - } |
| - |
| - /** File extension used to store {@link SegmentInfo}. */ |
| - public final static String SI_EXTENSION = "si"; |
| - static final String CODEC_NAME = "Lucene50SegmentInfo"; |
| - static final int VERSION_START = 0; |
| - static final int VERSION_SAFE_MAPS = 1; |
| - static final int VERSION_CURRENT = VERSION_SAFE_MAPS; |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java 2016-01-24 13:09:49.884989952 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -135,7 +135,7 @@ |
| * <p>Each segment index maintains the following:</p> |
| * <ul> |
| * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}. |
| + * {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment info}. |
| * This contains metadata about a segment, such as the number of documents, |
| * what files it uses, |
| * </li> |
| @@ -235,7 +235,7 @@ |
| * file.</td> |
| * </tr> |
| * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}</td> |
| + * <td>{@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment Info}</td> |
| * <td>.si</td> |
| * <td>Stores metadata about a segment</td> |
| * </tr> |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,176 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.codecs.lucene60; |
| - |
| - |
| -import java.util.Objects; |
| - |
| -import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.codecs.CompoundFormat; |
| -import org.apache.lucene.codecs.DocValuesFormat; |
| -import org.apache.lucene.codecs.FieldInfosFormat; |
| -import org.apache.lucene.codecs.FilterCodec; |
| -import org.apache.lucene.codecs.LiveDocsFormat; |
| -import org.apache.lucene.codecs.NormsFormat; |
| -import org.apache.lucene.codecs.PointsFormat; |
| -import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.SegmentInfoFormat; |
| -import org.apache.lucene.codecs.StoredFieldsFormat; |
| -import org.apache.lucene.codecs.TermVectorsFormat; |
| -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; |
| -import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; |
| -import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat; |
| -import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; |
| -import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; |
| -import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; |
| -import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat; |
| -import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; |
| -import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; |
| - |
| -/** |
| - * Implements the Lucene 6.0 index format, with configurable per-field postings |
| - * and docvalues formats. |
| - * <p> |
| - * If you want to reuse functionality of this codec in another codec, extend |
| - * {@link FilterCodec}. |
| - * |
| - * @see org.apache.lucene.codecs.lucene60 package documentation for file format details. |
| - * |
| - * @lucene.experimental |
| - */ |
| -public class Lucene60Codec extends Codec { |
| - private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); |
| - private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat(); |
| - private final SegmentInfoFormat segmentInfosFormat = new Lucene50SegmentInfoFormat(); |
| - private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); |
| - private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); |
| - |
| - private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { |
| - @Override |
| - public PostingsFormat getPostingsFormatForField(String field) { |
| - return Lucene60Codec.this.getPostingsFormatForField(field); |
| - } |
| - }; |
| - |
| - private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { |
| - @Override |
| - public DocValuesFormat getDocValuesFormatForField(String field) { |
| - return Lucene60Codec.this.getDocValuesFormatForField(field); |
| - } |
| - }; |
| - |
| - private final StoredFieldsFormat storedFieldsFormat; |
| - |
| - /** |
| - * Instantiates a new codec. |
| - */ |
| - public Lucene60Codec() { |
| - this(Mode.BEST_SPEED); |
| - } |
| - |
| - /** |
| - * Instantiates a new codec, specifying the stored fields compression |
| - * mode to use. |
| - * @param mode stored fields compression mode to use for newly |
| - * flushed/merged segments. |
| - */ |
| - public Lucene60Codec(Mode mode) { |
| - super("Lucene60"); |
| - this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode)); |
| - } |
| - |
| - @Override |
| - public final StoredFieldsFormat storedFieldsFormat() { |
| - return storedFieldsFormat; |
| - } |
| - |
| - @Override |
| - public final TermVectorsFormat termVectorsFormat() { |
| - return vectorsFormat; |
| - } |
| - |
| - @Override |
| - public final PostingsFormat postingsFormat() { |
| - return postingsFormat; |
| - } |
| - |
| - @Override |
| - public final FieldInfosFormat fieldInfosFormat() { |
| - return fieldInfosFormat; |
| - } |
| - |
| - @Override |
| - public final SegmentInfoFormat segmentInfoFormat() { |
| - return segmentInfosFormat; |
| - } |
| - |
| - @Override |
| - public final LiveDocsFormat liveDocsFormat() { |
| - return liveDocsFormat; |
| - } |
| - |
| - @Override |
| - public final CompoundFormat compoundFormat() { |
| - return compoundFormat; |
| - } |
| - |
| - @Override |
| - public final PointsFormat pointsFormat() { |
| - return new Lucene60PointsFormat(); |
| - } |
| - |
| - /** Returns the postings format that should be used for writing |
| - * new segments of <code>field</code>. |
| - * |
| - * The default implementation always returns "Lucene50". |
| - * <p> |
| - * <b>WARNING:</b> if you subclass, you are responsible for index |
| - * backwards compatibility: future version of Lucene are only |
| - * guaranteed to be able to read the default implementation. |
| - */ |
| - public PostingsFormat getPostingsFormatForField(String field) { |
| - return defaultFormat; |
| - } |
| - |
| - /** Returns the docvalues format that should be used for writing |
| - * new segments of <code>field</code>. |
| - * |
| - * The default implementation always returns "Lucene50". |
| - * <p> |
| - * <b>WARNING:</b> if you subclass, you are responsible for index |
| - * backwards compatibility: future version of Lucene are only |
| - * guaranteed to be able to read the default implementation. |
| - */ |
| - public DocValuesFormat getDocValuesFormatForField(String field) { |
| - return defaultDVFormat; |
| - } |
| - |
| - @Override |
| - public final DocValuesFormat docValuesFormat() { |
| - return docValuesFormat; |
| - } |
| - |
| - private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50"); |
| - private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54"); |
| - |
| - private final NormsFormat normsFormat = new Lucene53NormsFormat(); |
| - |
| - @Override |
| - public final NormsFormat normsFormat() { |
| - return normsFormat; |
| - } |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java 2016-04-24 06:00:46.365895938 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -123,6 +123,13 @@ |
| |
| @Override |
| public void merge(MergeState mergeState) throws IOException { |
| + if (mergeState.segmentInfo.getIndexSort() != null) { |
| + // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub |
| + // being copied over...? |
| + super.merge(mergeState); |
| + return; |
| + } |
| + |
| for(PointsReader reader : mergeState.pointsReaders) { |
| if (reader instanceof Lucene60PointsReader == false) { |
| // We can only bulk merge when all to-be-merged segments use our format: |
| @@ -171,7 +178,6 @@ |
| singleValuePerDoc)) { |
| List<BKDReader> bkdReaders = new ArrayList<>(); |
| List<MergeState.DocMap> docMaps = new ArrayList<>(); |
| - List<Integer> docIDBases = new ArrayList<>(); |
| for(int i=0;i<mergeState.pointsReaders.length;i++) { |
| PointsReader reader = mergeState.pointsReaders[i]; |
| |
| @@ -191,7 +197,6 @@ |
| if (readerFieldInfo != null) { |
| BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number); |
| if (bkdReader != null) { |
| - docIDBases.add(mergeState.docBase[i]); |
| bkdReaders.add(bkdReader); |
| docMaps.add(mergeState.docMaps[i]); |
| } |
| @@ -199,7 +204,7 @@ |
| } |
| } |
| |
| - long fp = writer.merge(dataOut, docMaps, bkdReaders, docIDBases); |
| + long fp = writer.merge(dataOut, docMaps, bkdReaders); |
| if (fp != -1) { |
| indexFPs.put(fieldInfo.name, fp); |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -16,400 +16,7 @@ |
| */ |
| |
| /** |
| - * Lucene 6.0 file format. |
| - * |
| - * <h1>Apache Lucene - Index File Formats</h1> |
| - * <div> |
| - * <ul> |
| - * <li><a href="#Introduction">Introduction</a></li> |
| - * <li><a href="#Definitions">Definitions</a> |
| - * <ul> |
| - * <li><a href="#Inverted_Indexing">Inverted Indexing</a></li> |
| - * <li><a href="#Types_of_Fields">Types of Fields</a></li> |
| - * <li><a href="#Segments">Segments</a></li> |
| - * <li><a href="#Document_Numbers">Document Numbers</a></li> |
| - * </ul> |
| - * </li> |
| - * <li><a href="#Overview">Index Structure Overview</a></li> |
| - * <li><a href="#File_Naming">File Naming</a></li> |
| - * <li><a href="#file-names">Summary of File Extensions</a> |
| - * <ul> |
| - * <li><a href="#Lock_File">Lock File</a></li> |
| - * <li><a href="#History">History</a></li> |
| - * <li><a href="#Limitations">Limitations</a></li> |
| - * </ul> |
| - * </li> |
| - * </ul> |
| - * </div> |
| - * <a name="Introduction"></a> |
| - * <h2>Introduction</h2> |
| - * <div> |
| - * <p>This document defines the index file formats used in this version of Lucene. |
| - * If you are using a different version of Lucene, please consult the copy of |
| - * <code>docs/</code> that was distributed with |
| - * the version you are using.</p> |
| - * <p>Apache Lucene is written in Java, but several efforts are underway to write |
| - * <a href="http://wiki.apache.org/lucene-java/LuceneImplementations">versions of |
| - * Lucene in other programming languages</a>. If these versions are to remain |
| - * compatible with Apache Lucene, then a language-independent definition of the |
| - * Lucene index format is required. This document thus attempts to provide a |
| - * complete and independent definition of the Apache Lucene file formats.</p> |
| - * <p>As Lucene evolves, this document should evolve. Versions of Lucene in |
| - * different programming languages should endeavor to agree on file formats, and |
| - * generate new versions of this document.</p> |
| - * </div> |
| - * <a name="Definitions"></a> |
| - * <h2>Definitions</h2> |
| - * <div> |
| - * <p>The fundamental concepts in Lucene are index, document, field and term.</p> |
| - * <p>An index contains a sequence of documents.</p> |
| - * <ul> |
| - * <li>A document is a sequence of fields.</li> |
| - * <li>A field is a named sequence of terms.</li> |
| - * <li>A term is a sequence of bytes.</li> |
| - * </ul> |
| - * <p>The same sequence of bytes in two different fields is considered a different |
| - * term. Thus terms are represented as a pair: the string naming the field, and the |
| - * bytes within the field.</p> |
| - * <a name="Inverted_Indexing"></a> |
| - * <h3>Inverted Indexing</h3> |
| - * <p>The index stores statistics about terms in order to make term-based search |
| - * more efficient. Lucene's index falls into the family of indexes known as an |
| - * <i>inverted index.</i> This is because it can list, for a term, the documents |
| - * that contain it. This is the inverse of the natural relationship, in which |
| - * documents list terms.</p> |
| - * <a name="Types_of_Fields"></a> |
| - * <h3>Types of Fields</h3> |
| - * <p>In Lucene, fields may be <i>stored</i>, in which case their text is stored |
| - * in the index literally, in a non-inverted manner. Fields that are inverted are |
| - * called <i>indexed</i>. A field may be both stored and indexed.</p> |
| - * <p>The text of a field may be <i>tokenized</i> into terms to be indexed, or the |
| - * text of a field may be used literally as a term to be indexed. Most fields are |
| - * tokenized, but sometimes it is useful for certain identifier fields to be |
| - * indexed literally.</p> |
| - * <p>See the {@link org.apache.lucene.document.Field Field} |
| - * java docs for more information on Fields.</p> |
| - * <a name="Segments"></a> |
| - * <h3>Segments</h3> |
| - * <p>Lucene indexes may be composed of multiple sub-indexes, or <i>segments</i>. |
| - * Each segment is a fully independent index, which could be searched separately. |
| - * Indexes evolve by:</p> |
| - * <ol> |
| - * <li>Creating new segments for newly added documents.</li> |
| - * <li>Merging existing segments.</li> |
| - * </ol> |
| - * <p>Searches may involve multiple segments and/or multiple indexes, each index |
| - * potentially composed of a set of segments.</p> |
| - * <a name="Document_Numbers"></a> |
| - * <h3>Document Numbers</h3> |
| - * <p>Internally, Lucene refers to documents by an integer <i>document number</i>. |
| - * The first document added to an index is numbered zero, and each subsequent |
| - * document added gets a number one greater than the previous.</p> |
| - * <p>Note that a document's number may change, so caution should be taken when |
| - * storing these numbers outside of Lucene. In particular, numbers may change in |
| - * the following situations:</p> |
| - * <ul> |
| - * <li> |
| - * <p>The numbers stored in each segment are unique only within the segment, and |
| - * must be converted before they can be used in a larger context. The standard |
| - * technique is to allocate each segment a range of values, based on the range of |
| - * numbers used in that segment. To convert a document number from a segment to an |
| - * external value, the segment's <i>base</i> document number is added. To convert |
| - * an external value back to a segment-specific value, the segment is identified |
| - * by the range that the external value is in, and the segment's base value is |
| - * subtracted. For example two five document segments might be combined, so that |
| - * the first segment has a base value of zero, and the second of five. Document |
| - * three from the second segment would have an external value of eight.</p> |
| - * </li> |
| - * <li> |
| - * <p>When documents are deleted, gaps are created in the numbering. These are |
| - * eventually removed as the index evolves through merging. Deleted documents are |
| - * dropped when segments are merged. A freshly-merged segment thus has no gaps in |
| - * its numbering.</p> |
| - * </li> |
| - * </ul> |
| - * </div> |
| - * <a name="Overview"></a> |
| - * <h2>Index Structure Overview</h2> |
| - * <div> |
| - * <p>Each segment index maintains the following:</p> |
| - * <ul> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}. |
| - * This contains metadata about a segment, such as the number of documents, |
| - * what files it uses, |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Field names}. |
| - * This contains the set of field names used in the index. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Stored Field values}. |
| - * This contains, for each document, a list of attribute-value pairs, where the attributes |
| - * are field names. These are used to store auxiliary information about the document, such as |
| - * its title, url, or an identifier to access a database. The set of stored fields are what is |
| - * returned for each hit when searching. This is keyed by document number. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term dictionary}. |
| - * A dictionary containing all of the terms used in all of the |
| - * indexed fields of all of the documents. The dictionary also contains the number |
| - * of documents which contain the term, and pointers to the term's frequency and |
| - * proximity data. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Frequency data}. |
| - * For each term in the dictionary, the numbers of all the |
| - * documents that contain that term, and the frequency of the term in that |
| - * document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Proximity data}. |
| - * For each term in the dictionary, the positions that the |
| - * term occurs in each document. Note that this will not exist if all fields in |
| - * all documents omit position data. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Normalization factors}. |
| - * For each field in each document, a value is stored |
| - * that is multiplied into the score for hits on that field. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vectors}. |
| - * For each field in each document, the term vector (sometimes |
| - * called document vector) may be stored. A term vector consists of term text and |
| - * term frequency. To add Term Vectors to your index see the |
| - * {@link org.apache.lucene.document.Field Field} constructors |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-document values}. |
| - * Like stored values, these are also keyed by document |
| - * number, but are generally intended to be loaded into main memory for fast |
| - * access. Whereas stored values are generally intended for summary results from |
| - * searches, per-document values are useful for things like scoring factors. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. |
| - * An optional file indicating which documents are live. |
| - * </li> |
| - * <li> |
| - * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}. |
| - * Optional pair of files, recording dimensionally indexed fields, to enable fast |
| - * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D) |
| - * and geographic shape intersection (2D, 3D). |
| - * </li> |
| - * </ul> |
| - * <p>Details on each of these are provided in their linked pages.</p> |
| - * </div> |
| - * <a name="File_Naming"></a> |
| - * <h2>File Naming</h2> |
| - * <div> |
| - * <p>All files belonging to a segment have the same name with varying extensions. |
| - * The extensions correspond to the different file formats described below. When |
| - * using the Compound File format (default in 1.4 and greater) these files (except |
| - * for the Segment info file, the Lock file, and Deleted documents file) are collapsed |
| - * into a single .cfs file (see below for details)</p> |
| - * <p>Typically, all segments in an index are stored in a single directory, |
| - * although this is not required.</p> |
| - * <p>As of version 2.1 (lock-less commits), file names are never re-used. |
| - * That is, when any file is saved |
| - * to the Directory it is given a never before used filename. This is achieved |
| - * using a simple generations approach. For example, the first segments file is |
| - * segments_1, then segments_2, etc. The generation is a sequential long integer |
| - * represented in alpha-numeric (base 36) form.</p> |
| - * </div> |
| - * <a name="file-names"></a> |
| - * <h2>Summary of File Extensions</h2> |
| - * <div> |
| - * <p>The following table summarizes the names and extensions of the files in |
| - * Lucene:</p> |
| - * <table cellspacing="1" cellpadding="4" summary="lucene filenames by extension"> |
| - * <tr> |
| - * <th>Name</th> |
| - * <th>Extension</th> |
| - * <th>Brief Description</th> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.index.SegmentInfos Segments File}</td> |
| - * <td>segments_N</td> |
| - * <td>Stores information about a commit point</td> |
| - * </tr> |
| - * <tr> |
| - * <td><a href="#Lock_File">Lock File</a></td> |
| - * <td>write.lock</td> |
| - * <td>The Write lock prevents multiple IndexWriters from writing to the same |
| - * file.</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}</td> |
| - * <td>.si</td> |
| - * <td>Stores metadata about a segment</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat Compound File}</td> |
| - * <td>.cfs, .cfe</td> |
| - * <td>An optional "virtual" file consisting of all the other index files for |
| - * systems that frequently run out of file handles.</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Fields}</td> |
| - * <td>.fnm</td> |
| - * <td>Stores information about the fields</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Index}</td> |
| - * <td>.fdx</td> |
| - * <td>Contains pointers to field data</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Data}</td> |
| - * <td>.fdt</td> |
| - * <td>The stored fields for documents</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Dictionary}</td> |
| - * <td>.tim</td> |
| - * <td>The term dictionary, stores term info</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Index}</td> |
| - * <td>.tip</td> |
| - * <td>The index into the Term Dictionary</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Frequencies}</td> |
| - * <td>.doc</td> |
| - * <td>Contains the list of docs which contain each term along with frequency</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Positions}</td> |
| - * <td>.pos</td> |
| - * <td>Stores position information about where a term occurs in the index</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Payloads}</td> |
| - * <td>.pay</td> |
| - * <td>Stores additional per-position metadata information such as character offsets and user payloads</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Norms}</td> |
| - * <td>.nvd, .nvm</td> |
| - * <td>Encodes length and boost factors for docs and fields</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-Document Values}</td> |
| - * <td>.dvd, .dvm</td> |
| - * <td>Encodes additional scoring factors or other per-document information.</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Index}</td> |
| - * <td>.tvx</td> |
| - * <td>Stores offset into the document data file</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Documents}</td> |
| - * <td>.tvd</td> |
| - * <td>Contains information about each document that has term vectors</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Fields}</td> |
| - * <td>.tvf</td> |
| - * <td>The field level info about term vectors</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td> |
| - * <td>.liv</td> |
| - * <td>Info about what files are live</td> |
| - * </tr> |
| - * <tr> |
| - * <td>{@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}</td> |
| - * <td>.dii, .dim</td> |
| - * <td>Holds indexed points, if any</td> |
| - * </tr> |
| - * </table> |
| - * </div> |
| - * <a name="Lock_File"></a> |
| - * <h2>Lock File</h2> |
| - * The write lock, which is stored in the index directory by default, is named |
| - * "write.lock". If the lock directory is different from the index directory then |
| - * the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix |
| - * derived from the full path to the index directory. When this file is present, a |
| - * writer is currently modifying the index (adding or removing documents). This |
| - * lock file ensures that only one writer is modifying the index at a time. |
| - * <a name="History"></a> |
| - * <h2>History</h2> |
| - * <p>Compatibility notes are provided in this document, describing how file |
| - * formats have changed from prior versions:</p> |
| - * <ul> |
| - * <li>In version 2.1, the file format was changed to allow lock-less commits (ie, |
| - * no more commit lock). The change is fully backwards compatible: you can open a |
| - * pre-2.1 index for searching or adding/deleting of docs. When the new segments |
| - * file is saved (committed), it will be written in the new file format (meaning |
| - * no specific "upgrade" process is needed). But note that once a commit has |
| - * occurred, pre-2.1 Lucene will not be able to read the index.</li> |
| - * <li>In version 2.3, the file format was changed to allow segments to share a |
| - * single set of doc store (vectors & stored fields) files. This allows for |
| - * faster indexing in certain cases. The change is fully backwards compatible (in |
| - * the same way as the lock-less commits change in 2.1).</li> |
| - * <li>In version 2.4, Strings are now written as true UTF-8 byte sequence, not |
| - * Java's modified UTF-8. See <a href="http://issues.apache.org/jira/browse/LUCENE-510"> |
| - * LUCENE-510</a> for details.</li> |
| - * <li>In version 2.9, an optional opaque Map<String,String> CommitUserData |
| - * may be passed to IndexWriter's commit methods (and later retrieved), which is |
| - * recorded in the segments_N file. See <a href="http://issues.apache.org/jira/browse/LUCENE-1382"> |
| - * LUCENE-1382</a> for details. Also, |
| - * diagnostics were added to each segment written recording details about why it |
| - * was written (due to flush, merge; which OS/JRE was used; etc.). See issue |
| - * <a href="http://issues.apache.org/jira/browse/LUCENE-1654">LUCENE-1654</a> for details.</li> |
| - * <li>In version 3.0, compressed fields are no longer written to the index (they |
| - * can still be read, but on merge the new segment will write them, uncompressed). |
| - * See issue <a href="http://issues.apache.org/jira/browse/LUCENE-1960">LUCENE-1960</a> |
| - * for details.</li> |
| - * <li>In version 3.1, segments records the code version that created them. See |
| - * <a href="http://issues.apache.org/jira/browse/LUCENE-2720">LUCENE-2720</a> for details. |
| - * Additionally segments track explicitly whether or not they have term vectors. |
| - * See <a href="http://issues.apache.org/jira/browse/LUCENE-2811">LUCENE-2811</a> |
| - * for details.</li> |
| - * <li>In version 3.2, numeric fields are written as natively to stored fields |
| - * file, previously they were stored in text format only.</li> |
| - * <li>In version 3.4, fields can omit position data while still indexing term |
| - * frequencies.</li> |
| - * <li>In version 4.0, the format of the inverted index became extensible via |
| - * the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage |
| - * ({@code DocValues}) was introduced. Normalization factors need no longer be a |
| - * single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. |
| - * Terms need not be unicode strings, they can be any byte sequence. Term offsets |
| - * can optionally be indexed into the postings lists. Payloads can be stored in the |
| - * term vectors.</li> |
| - * <li>In version 4.1, the format of the postings list changed to use either |
| - * of FOR compression or variable-byte encoding, depending upon the frequency |
| - * of the term. Terms appearing only once were changed to inline directly into |
| - * the term dictionary. Stored fields are compressed by default. </li> |
| - * <li>In version 4.2, term vectors are compressed by default. DocValues has |
| - * a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining |
| - * on multi-valued fields.</li> |
| - * <li>In version 4.5, DocValues were extended to explicitly represent missing values.</li> |
| - * <li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to |
| - * allow updating NumericDocValues fields.</li> |
| - * <li>In version 4.8, checksum footers were added to the end of each index file |
| - * for improved data integrity. Specifically, the last 8 bytes of every index file |
| - * contain the zlib-crc32 checksum of the file.</li> |
| - * <li>In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) |
| - * that is suitable for faceting/sorting/analytics. |
| - * <li>In version 5.4, DocValues have been improved to store more information on disk: |
| - * addresses for binary fields and ord indexes for multi-valued fields. |
| - * <li>In version 6.0, Points were added, for multi-dimensional range/distance search. |
| - * </li> |
| - * </ul> |
| - * <a name="Limitations"></a> |
| - * <h2>Limitations</h2> |
| - * <div> |
| - * <p>Lucene uses a Java <code>int</code> to refer to |
| - * document numbers, and the index file format uses an <code>Int32</code> |
| - * on-disk to store document numbers. This is a limitation |
| - * of both the index file format and the current implementation. Eventually these |
| - * should be replaced with either <code>UInt64</code> values, or |
| - * better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.</p> |
| - * </div> |
| + * Components from the Lucene 6.0 index format. See {@link org.apache.lucene.codecs.lucene62} |
| + * for an overview of the index format. |
| */ |
| package org.apache.lucene.codecs.lucene60; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -0,0 +1,176 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs.lucene62; |
| + |
| +import java.util.Objects; |
| + |
| +import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.codecs.CompoundFormat; |
| +import org.apache.lucene.codecs.DocValuesFormat; |
| +import org.apache.lucene.codecs.FieldInfosFormat; |
| +import org.apache.lucene.codecs.FilterCodec; |
| +import org.apache.lucene.codecs.LiveDocsFormat; |
| +import org.apache.lucene.codecs.NormsFormat; |
| +import org.apache.lucene.codecs.PointsFormat; |
| +import org.apache.lucene.codecs.PostingsFormat; |
| +import org.apache.lucene.codecs.SegmentInfoFormat; |
| +import org.apache.lucene.codecs.StoredFieldsFormat; |
| +import org.apache.lucene.codecs.TermVectorsFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; |
| +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; |
| +import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; |
| +import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat; |
| +import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat; |
| +import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat; |
| +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; |
| +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; |
| + |
| +/** |
| + * Implements the Lucene 6.2 index format, with configurable per-field postings |
| + * and docvalues formats. |
| + * <p> |
| + * If you want to reuse functionality of this codec in another codec, extend |
| + * {@link FilterCodec}. |
| + * |
| + * @see org.apache.lucene.codecs.lucene60 package documentation for file format details. |
| + * |
| + * @lucene.experimental |
| + */ |
| +public class Lucene62Codec extends Codec { |
| + private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); |
| + private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat(); |
| + private final SegmentInfoFormat segmentInfosFormat = new Lucene62SegmentInfoFormat(); |
| + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); |
| + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); |
| + |
| + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { |
| + @Override |
| + public PostingsFormat getPostingsFormatForField(String field) { |
| + return Lucene62Codec.this.getPostingsFormatForField(field); |
| + } |
| + }; |
| + |
| + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { |
| + @Override |
| + public DocValuesFormat getDocValuesFormatForField(String field) { |
| + return Lucene62Codec.this.getDocValuesFormatForField(field); |
| + } |
| + }; |
| + |
| + private final StoredFieldsFormat storedFieldsFormat; |
| + |
| + /** |
| + * Instantiates a new codec. |
| + */ |
| + public Lucene62Codec() { |
| + this(Mode.BEST_SPEED); |
| + } |
| + |
| + /** |
| + * Instantiates a new codec, specifying the stored fields compression |
| + * mode to use. |
| + * @param mode stored fields compression mode to use for newly |
| + * flushed/merged segments. |
| + */ |
| + public Lucene62Codec(Mode mode) { |
| + super("Lucene62"); |
| + this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode)); |
| + } |
| + |
| + @Override |
| + public final StoredFieldsFormat storedFieldsFormat() { |
| + return storedFieldsFormat; |
| + } |
| + |
| + @Override |
| + public final TermVectorsFormat termVectorsFormat() { |
| + return vectorsFormat; |
| + } |
| + |
| + @Override |
| + public final PostingsFormat postingsFormat() { |
| + return postingsFormat; |
| + } |
| + |
| + @Override |
| + public final FieldInfosFormat fieldInfosFormat() { |
| + return fieldInfosFormat; |
| + } |
| + |
| + @Override |
| + public final SegmentInfoFormat segmentInfoFormat() { |
| + return segmentInfosFormat; |
| + } |
| + |
| + @Override |
| + public final LiveDocsFormat liveDocsFormat() { |
| + return liveDocsFormat; |
| + } |
| + |
| + @Override |
| + public final CompoundFormat compoundFormat() { |
| + return compoundFormat; |
| + } |
| + |
| + @Override |
| + public final PointsFormat pointsFormat() { |
| + return new Lucene60PointsFormat(); |
| + } |
| + |
| + /** Returns the postings format that should be used for writing |
| + * new segments of <code>field</code>. |
| + * |
| + * The default implementation always returns "Lucene50". |
| + * <p> |
| + * <b>WARNING:</b> if you subclass, you are responsible for index |
| + * backwards compatibility: future version of Lucene are only |
| + * guaranteed to be able to read the default implementation. |
| + */ |
| + public PostingsFormat getPostingsFormatForField(String field) { |
| + return defaultFormat; |
| + } |
| + |
| + /** Returns the docvalues format that should be used for writing |
| + * new segments of <code>field</code>. |
| + * |
| + * The default implementation always returns "Lucene54". |
| + * <p> |
| + * <b>WARNING:</b> if you subclass, you are responsible for index |
| + * backwards compatibility: future version of Lucene are only |
| + * guaranteed to be able to read the default implementation. |
| + */ |
| + public DocValuesFormat getDocValuesFormatForField(String field) { |
| + return defaultDVFormat; |
| + } |
| + |
| + @Override |
| + public final DocValuesFormat docValuesFormat() { |
| + return docValuesFormat; |
| + } |
| + |
| + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50"); |
| + private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54"); |
| + |
| + private final NormsFormat normsFormat = new Lucene53NormsFormat(); |
| + |
| + @Override |
| + public final NormsFormat normsFormat() { |
| + return normsFormat; |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -0,0 +1,319 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs.lucene62; |
| + |
| +import java.io.IOException; |
| +import java.util.Collections; |
| +import java.util.Map; |
| +import java.util.Set; |
| + |
| +import org.apache.lucene.codecs.CodecUtil; |
| +import org.apache.lucene.codecs.SegmentInfoFormat; |
| +import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.IndexFileNames; |
| +import org.apache.lucene.index.IndexWriter; // javadocs |
| +import org.apache.lucene.index.SegmentInfo; // javadocs |
| +import org.apache.lucene.index.SegmentInfos; // javadocs |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| +import org.apache.lucene.store.DataOutput; // javadocs |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.IOContext; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.Version; |
| + |
| +/** |
| + * Lucene 6.2 Segment info format. |
| + * <p> |
| + * Files: |
| + * <ul> |
| + * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer |
| + * </ul> |
| + * Data types: |
| + * <ul> |
| + * <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li> |
| + * <li>SegSize --> {@link DataOutput#writeInt Int32}</li> |
| + * <li>SegVersion --> {@link DataOutput#writeString String}</li> |
| + * <li>Files --> {@link DataOutput#writeSetOfStrings Set<String>}</li> |
| + * <li>Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}</li> |
| + * <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li> |
| + * <li>IndexSort --> {@link DataOutput#writeVInt Int32} count, followed by {@code count} SortField</li> |
| + * <li>SortField --> {@link DataOutput#writeString String} field name, followed by {@link DataOutput#writeVInt Int32} sort type ID, |
| + * followed by {@link DataOutput#writeByte Int8} indicatating reversed sort, followed by a type-specific encoding of the optional missing value |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| + * </ul> |
| + * Field Descriptions: |
| + * <ul> |
| + * <li>SegVersion is the code version that created the segment.</li> |
| + * <li>SegSize is the number of documents contained in the segment index.</li> |
| + * <li>IsCompoundFile records whether the segment is written as a compound file or |
| + * not. If this is -1, the segment is not a compound file. If it is 1, the segment |
| + * is a compound file.</li> |
| + * <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, |
| + * for each segment it creates. It includes metadata like the current Lucene |
| + * version, OS, Java version, why the segment was created (merge, flush, |
| + * addIndexes), etc.</li> |
| + * <li>Files is a list of files referred to by this segment.</li> |
| + * </ul> |
| + * |
| + * @see SegmentInfos |
| + * @lucene.experimental |
| + */ |
| +public class Lucene62SegmentInfoFormat extends SegmentInfoFormat { |
| + |
| + /** Sole constructor. */ |
| + public Lucene62SegmentInfoFormat() { |
| + } |
| + |
| + @Override |
| + public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException { |
| + final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION); |
| + try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) { |
| + Throwable priorE = null; |
| + SegmentInfo si = null; |
| + try { |
| + int format = CodecUtil.checkIndexHeader(input, Lucene62SegmentInfoFormat.CODEC_NAME, |
| + Lucene62SegmentInfoFormat.VERSION_START, |
| + Lucene62SegmentInfoFormat.VERSION_CURRENT, |
| + segmentID, ""); |
| + final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); |
| + |
| + final int docCount = input.readInt(); |
| + if (docCount < 0) { |
| + throw new CorruptIndexException("invalid docCount: " + docCount, input); |
| + } |
| + final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| + |
| + final Map<String,String> diagnostics = input.readMapOfStrings(); |
| + final Set<String> files = input.readSetOfStrings(); |
| + final Map<String,String> attributes = input.readMapOfStrings(); |
| + |
| + int numSortFields = input.readVInt(); |
| + Sort indexSort; |
| + if (numSortFields > 0) { |
| + SortField[] sortFields = new SortField[numSortFields]; |
| + for(int i=0;i<numSortFields;i++) { |
| + String fieldName = input.readString(); |
| + int sortTypeID = input.readVInt(); |
| + SortField.Type sortType; |
| + switch(sortTypeID) { |
| + case 0: |
| + sortType = SortField.Type.STRING; |
| + break; |
| + case 1: |
| + sortType = SortField.Type.LONG; |
| + break; |
| + case 2: |
| + sortType = SortField.Type.INT; |
| + break; |
| + case 3: |
| + sortType = SortField.Type.DOUBLE; |
| + break; |
| + case 4: |
| + sortType = SortField.Type.FLOAT; |
| + break; |
| + default: |
| + throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input); |
| + } |
| + byte b = input.readByte(); |
| + boolean reverse; |
| + if (b == 0) { |
| + reverse = true; |
| + } else if (b == 1) { |
| + reverse = false; |
| + } else { |
| + throw new CorruptIndexException("invalid index sort reverse: " + b, input); |
| + } |
| + |
| + sortFields[i] = new SortField(fieldName, sortType, reverse); |
| + |
| + Object missingValue; |
| + b = input.readByte(); |
| + if (b == 0) { |
| + missingValue = null; |
| + } else { |
| + switch(sortType) { |
| + case STRING: |
| + if (b == 1) { |
| + missingValue = SortField.STRING_LAST; |
| + } else if (b == 2) { |
| + missingValue = SortField.STRING_FIRST; |
| + } else { |
| + throw new CorruptIndexException("invalid missing value flag: " + b, input); |
| + } |
| + break; |
| + case LONG: |
| + if (b != 1) { |
| + throw new CorruptIndexException("invalid missing value flag: " + b, input); |
| + } |
| + missingValue = input.readLong(); |
| + break; |
| + case INT: |
| + if (b != 1) { |
| + throw new CorruptIndexException("invalid missing value flag: " + b, input); |
| + } |
| + missingValue = input.readInt(); |
| + break; |
| + case DOUBLE: |
| + if (b != 1) { |
| + throw new CorruptIndexException("invalid missing value flag: " + b, input); |
| + } |
| + missingValue = Double.longBitsToDouble(input.readLong()); |
| + break; |
| + case FLOAT: |
| + if (b != 1) { |
| + throw new CorruptIndexException("invalid missing value flag: " + b, input); |
| + } |
| + missingValue = Float.intBitsToFloat(input.readInt()); |
| + break; |
| + default: |
| + throw new AssertionError("unhandled sortType=" + sortType); |
| + } |
| + } |
| + if (missingValue != null) { |
| + sortFields[i].setMissingValue(missingValue); |
| + } |
| + } |
| + indexSort = new Sort(sortFields); |
| + } else if (numSortFields < 0) { |
| + throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input); |
| + } else { |
| + indexSort = null; |
| + } |
| + |
| + si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort); |
| + si.setFiles(files); |
| + } catch (Throwable exception) { |
| + priorE = exception; |
| + } finally { |
| + CodecUtil.checkFooter(input, priorE); |
| + } |
| + return si; |
| + } |
| + } |
| + |
| + @Override |
| + public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { |
| + final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene62SegmentInfoFormat.SI_EXTENSION); |
| + |
| + try (IndexOutput output = dir.createOutput(fileName, ioContext)) { |
| + // Only add the file once we've successfully created it, else IFD assert can trip: |
| + si.addFile(fileName); |
| + CodecUtil.writeIndexHeader(output, |
| + Lucene62SegmentInfoFormat.CODEC_NAME, |
| + Lucene62SegmentInfoFormat.VERSION_CURRENT, |
| + si.getId(), |
| + ""); |
| + Version version = si.getVersion(); |
| + if (version.major < 5) { |
| + throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si); |
| + } |
| + // Write the Lucene version that created this segment, since 3.1 |
| + output.writeInt(version.major); |
| + output.writeInt(version.minor); |
| + output.writeInt(version.bugfix); |
| + assert version.prerelease == 0; |
| + output.writeInt(si.maxDoc()); |
| + |
| + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); |
| + output.writeMapOfStrings(si.getDiagnostics()); |
| + Set<String> files = si.files(); |
| + for (String file : files) { |
| + if (!IndexFileNames.parseSegmentName(file).equals(si.name)) { |
| + throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files); |
| + } |
| + } |
| + output.writeSetOfStrings(files); |
| + output.writeMapOfStrings(si.getAttributes()); |
| + |
| + Sort indexSort = si.getIndexSort(); |
| + int numSortFields = indexSort == null ? 0 : indexSort.getSort().length; |
| + output.writeVInt(numSortFields); |
| + for (int i = 0; i < numSortFields; ++i) { |
| + SortField sortField = indexSort.getSort()[i]; |
| + output.writeString(sortField.getField()); |
| + int sortTypeID; |
| + switch (sortField.getType()) { |
| + case STRING: |
| + sortTypeID = 0; |
| + break; |
| + case LONG: |
| + sortTypeID = 1; |
| + break; |
| + case INT: |
| + sortTypeID = 2; |
| + break; |
| + case DOUBLE: |
| + sortTypeID = 3; |
| + break; |
| + case FLOAT: |
| + sortTypeID = 4; |
| + break; |
| + default: |
| + throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); |
| + } |
| + output.writeVInt(sortTypeID); |
| + output.writeByte((byte) (sortField.getReverse() ? 0 : 1)); |
| + |
| + // write missing value |
| + Object missingValue = sortField.getMissingValue(); |
| + if (missingValue == null) { |
| + output.writeByte((byte) 0); |
| + } else { |
| + switch(sortField.getType()) { |
| + case STRING: |
| + if (missingValue == SortField.STRING_LAST) { |
| + output.writeByte((byte) 1); |
| + } else if (missingValue == SortField.STRING_FIRST) { |
| + output.writeByte((byte) 2); |
| + } else { |
| + throw new AssertionError("unrecognized missing value for STRING field \"" + sortField.getField() + "\": " + missingValue); |
| + } |
| + break; |
| + case LONG: |
| + output.writeByte((byte) 1); |
| + output.writeLong(((Long) missingValue).longValue()); |
| + break; |
| + case INT: |
| + output.writeByte((byte) 1); |
| + output.writeInt(((Integer) missingValue).intValue()); |
| + break; |
| + case DOUBLE: |
| + output.writeByte((byte) 1); |
| + output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue())); |
| + break; |
| + case FLOAT: |
| + output.writeByte((byte) 1); |
| + output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue())); |
| + break; |
| + default: |
| + throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); |
| + } |
| + } |
| + } |
| + |
| + CodecUtil.writeFooter(output); |
| + } |
| + } |
| + |
| + /** File extension used to store {@link SegmentInfo}. */ |
| + public final static String SI_EXTENSION = "si"; |
| + static final String CODEC_NAME = "Lucene62SegmentInfo"; |
| + static final int VERSION_START = 0; |
| + static final int VERSION_CURRENT = VERSION_START; |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -0,0 +1,24 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Components from the Lucene 6.2 index format |
| + * See {@link org.apache.lucene.codecs.lucene62} for an overview |
| + * of the index format. |
| + */ |
| + |
| +package org.apache.lucene.codecs.lucene62; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java 2016-02-16 11:18:34.653021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -16,7 +16,6 @@ |
| */ |
| package org.apache.lucene.codecs; |
| |
| - |
| import java.io.Closeable; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| @@ -24,6 +23,7 @@ |
| import java.util.List; |
| import java.util.NoSuchElementException; |
| |
| +import org.apache.lucene.index.DocIDMerger; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.MergeState; |
| @@ -31,6 +31,8 @@ |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.util.Bits; |
| |
| +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| + |
| /** |
| * Abstract API that consumes normalization values. |
| * Concrete implementations of this |
| @@ -98,6 +100,30 @@ |
| } |
| } |
| |
| + /** Tracks state of one numeric sub-reader that we are merging */ |
| + private static class NumericDocValuesSub extends DocIDMerger.Sub { |
| + |
| + private final NumericDocValues values; |
| + private int docID = -1; |
| + private final int maxDoc; |
| + |
| + public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, int maxDoc) { |
| + super(docMap); |
| + this.values = values; |
| + this.maxDoc = maxDoc; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + } |
| + |
| /** |
| * Merges the norms from <code>toMerge</code>. |
| * <p> |
| @@ -111,13 +137,18 @@ |
| new Iterable<Number>() { |
| @Override |
| public Iterator<Number> iterator() { |
| + |
| + // We must make a new DocIDMerger for each iterator: |
| + List<NumericDocValuesSub> subs = new ArrayList<>(); |
| + assert mergeState.docMaps.length == toMerge.size(); |
| + for(int i=0;i<toMerge.size();i++) { |
| + subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| return new Iterator<Number>() { |
| - int readerUpto = -1; |
| - int docIDUpto; |
| long nextValue; |
| - int maxDoc; |
| - NumericDocValues currentValues; |
| - Bits currentLiveDocs; |
| boolean nextIsSet; |
| |
| @Override |
| @@ -141,31 +172,13 @@ |
| } |
| |
| private boolean setNext() { |
| - while (true) { |
| - if (readerUpto == toMerge.size()) { |
| - return false; |
| - } |
| - |
| - if (currentValues == null || docIDUpto == maxDoc) { |
| - readerUpto++; |
| - if (readerUpto < toMerge.size()) { |
| - currentValues = toMerge.get(readerUpto); |
| - currentLiveDocs = mergeState.liveDocs[readerUpto]; |
| - maxDoc = mergeState.maxDocs[readerUpto]; |
| - } |
| - docIDUpto = 0; |
| - continue; |
| - } |
| - |
| - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { |
| - nextIsSet = true; |
| - nextValue = currentValues.get(docIDUpto); |
| - docIDUpto++; |
| - return true; |
| - } |
| - |
| - docIDUpto++; |
| + NumericDocValuesSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| + return false; |
| } |
| + nextIsSet = true; |
| + nextValue = sub.values.get(sub.docID); |
| + return true; |
| } |
| }; |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java 2016-04-24 06:00:46.365895938 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -76,7 +76,6 @@ |
| } |
| |
| MergeState.DocMap docMap = mergeState.docMaps[i]; |
| - int docBase = mergeState.docBase[i]; |
| pointsReader.intersect(fieldInfo.name, |
| new IntersectVisitor() { |
| @Override |
| @@ -90,7 +89,7 @@ |
| int newDocID = docMap.get(docID); |
| if (newDocID != -1) { |
| // Not deleted: |
| - mergedVisitor.visit(docBase + newDocID, packedValue); |
| + mergedVisitor.visit(newDocID, packedValue); |
| } |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java 2016-02-16 11:18:34.653021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -20,10 +20,13 @@ |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.nio.charset.StandardCharsets; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.StoredField; |
| +import org.apache.lucene.index.DocIDMerger; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.IndexableField; |
| @@ -33,6 +36,8 @@ |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| |
| +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| + |
| /** |
| * Codec API for writing stored fields: |
| * <ol> |
| @@ -73,6 +78,30 @@ |
| * check that this is the case to detect the JRE bug described |
| * in LUCENE-1282. */ |
| public abstract void finish(FieldInfos fis, int numDocs) throws IOException; |
| + |
| + private static class StoredFieldsMergeSub extends DocIDMerger.Sub { |
| + private final StoredFieldsReader reader; |
| + private final int maxDoc; |
| + private final MergeVisitor visitor; |
| + int docID = -1; |
| + |
| + public StoredFieldsMergeSub(MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) { |
| + super(docMap); |
| + this.maxDoc = maxDoc; |
| + this.reader = reader; |
| + this.visitor = visitor; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + } |
| |
| /** Merges in the stored fields from the readers in |
| * <code>mergeState</code>. The default implementation skips |
| @@ -82,23 +111,26 @@ |
| * Implementations can override this method for more sophisticated |
| * merging (bulk-byte copying, etc). */ |
| public int merge(MergeState mergeState) throws IOException { |
| - int docCount = 0; |
| - for (int i=0;i<mergeState.storedFieldsReaders.length;i++) { |
| + List<StoredFieldsMergeSub> subs = new ArrayList<>(); |
| + for(int i=0;i<mergeState.storedFieldsReaders.length;i++) { |
| StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i]; |
| storedFieldsReader.checkIntegrity(); |
| - MergeVisitor visitor = new MergeVisitor(mergeState, i); |
| - int maxDoc = mergeState.maxDocs[i]; |
| - Bits liveDocs = mergeState.liveDocs[i]; |
| - for (int docID=0;docID<maxDoc;docID++) { |
| - if (liveDocs != null && !liveDocs.get(docID)) { |
| - // skip deleted docs |
| - continue; |
| - } |
| - startDocument(); |
| - storedFieldsReader.visitDocument(docID, visitor); |
| - finishDocument(); |
| - docCount++; |
| + subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| + int docCount = 0; |
| + while (true) { |
| + StoredFieldsMergeSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| + break; |
| } |
| + assert sub.mappedDocID == docCount; |
| + startDocument(); |
| + sub.reader.visitDocument(sub.docID, sub.visitor); |
| + finishDocument(); |
| + docCount++; |
| } |
| finish(mergeState.mergeFieldInfos, docCount); |
| return docCount; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java 2016-02-16 11:18:34.653021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -16,16 +16,18 @@ |
| */ |
| package org.apache.lucene.codecs; |
| |
| - |
| import java.io.Closeable; |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| import java.util.Iterator; |
| +import java.util.List; |
| |
| -import org.apache.lucene.index.PostingsEnum; |
| +import org.apache.lucene.index.DocIDMerger; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.Fields; |
| import org.apache.lucene.index.MergeState; |
| +import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.search.DocIdSetIterator; |
| @@ -34,6 +36,8 @@ |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| |
| +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| + |
| /** |
| * Codec API for writing term vectors: |
| * <ol> |
| @@ -160,6 +164,28 @@ |
| } |
| } |
| |
| + private static class TermVectorsMergeSub extends DocIDMerger.Sub { |
| + private final TermVectorsReader reader; |
| + private final int maxDoc; |
| + int docID = -1; |
| + |
| + public TermVectorsMergeSub(MergeState.DocMap docMap, TermVectorsReader reader, int maxDoc) { |
| + super(docMap); |
| + this.maxDoc = maxDoc; |
| + this.reader = reader; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + } |
| + |
| /** Merges in the term vectors from the readers in |
| * <code>mergeState</code>. The default implementation skips |
| * over deleted documents, and uses {@link #startDocument(int)}, |
| @@ -170,32 +196,35 @@ |
| * Implementations can override this method for more sophisticated |
| * merging (bulk-byte copying, etc). */ |
| public int merge(MergeState mergeState) throws IOException { |
| + |
| + List<TermVectorsMergeSub> subs = new ArrayList<>(); |
| + for(int i=0;i<mergeState.termVectorsReaders.length;i++) { |
| + TermVectorsReader reader = mergeState.termVectorsReaders[i]; |
| + if (reader != null) { |
| + reader.checkIntegrity(); |
| + } |
| + subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i])); |
| + } |
| + |
| + final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); |
| + |
| int docCount = 0; |
| - int numReaders = mergeState.maxDocs.length; |
| - for (int i = 0; i < numReaders; i++) { |
| - int maxDoc = mergeState.maxDocs[i]; |
| - Bits liveDocs = mergeState.liveDocs[i]; |
| - TermVectorsReader termVectorsReader = mergeState.termVectorsReaders[i]; |
| - if (termVectorsReader != null) { |
| - termVectorsReader.checkIntegrity(); |
| + while (true) { |
| + TermVectorsMergeSub sub = docIDMerger.next(); |
| + if (sub == null) { |
| + break; |
| } |
| |
| - for (int docID=0;docID<maxDoc;docID++) { |
| - if (liveDocs != null && !liveDocs.get(docID)) { |
| - // skip deleted docs |
| - continue; |
| - } |
| - // NOTE: it's very important to first assign to vectors then pass it to |
| - // termVectorsWriter.addAllDocVectors; see LUCENE-1282 |
| - Fields vectors; |
| - if (termVectorsReader == null) { |
| - vectors = null; |
| - } else { |
| - vectors = termVectorsReader.get(docID); |
| - } |
| - addAllDocVectors(vectors, mergeState); |
| - docCount++; |
| + // NOTE: it's very important to first assign to vectors then pass it to |
| + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 |
| + Fields vectors; |
| + if (sub.reader == null) { |
| + vectors = null; |
| + } else { |
| + vectors = sub.reader.get(sub.docID); |
| } |
| + addAllDocVectors(vectors, mergeState); |
| + docCount++; |
| } |
| finish(mergeState.mergeFieldInfos, docCount); |
| return docCount; |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java indexsort/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java 2016-03-10 16:23:24.691676109 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -43,6 +43,9 @@ |
| import org.apache.lucene.document.DocumentStoredFieldVisitor; |
| import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; |
| import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.LeafFieldComparator; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| @@ -217,6 +220,9 @@ |
| |
| /** Status for testing of PointValues (null if PointValues could not be tested). */ |
| public PointsStatus pointsStatus; |
| + |
| + /** Status of index sort */ |
| + public IndexSortStatus indexSortStatus; |
| } |
| |
| /** |
| @@ -374,6 +380,16 @@ |
| /** Exception thrown during doc values test (null on success) */ |
| public Throwable error = null; |
| } |
| + |
| + /** |
| + * Status from testing index sort |
| + */ |
| + public static final class IndexSortStatus { |
| + |
| + /** Exception thrown during term index test (null on success) */ |
| + public Throwable error = null; |
| + } |
| + |
| } |
| |
| /** Create a new CheckIndex on the directory. */ |
| @@ -632,6 +648,7 @@ |
| int toLoseDocCount = info.info.maxDoc(); |
| |
| SegmentReader reader = null; |
| + Sort previousIndexSort = null; |
| |
| try { |
| msg(infoStream, " version=" + (version == null ? "3.0" : version)); |
| @@ -642,6 +659,17 @@ |
| msg(infoStream, " compound=" + info.info.getUseCompoundFile()); |
| segInfoStat.compound = info.info.getUseCompoundFile(); |
| msg(infoStream, " numFiles=" + info.files().size()); |
| + Sort indexSort = info.info.getIndexSort(); |
| + if (indexSort != null) { |
| + msg(infoStream, " sort=" + indexSort); |
| + if (previousIndexSort != null) { |
| + if (previousIndexSort.equals(indexSort) == false) { |
| + throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort); |
| + } |
| + } else { |
| + previousIndexSort = indexSort; |
| + } |
| + } |
| segInfoStat.numFiles = info.files().size(); |
| segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.); |
| msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); |
| @@ -722,6 +750,9 @@ |
| // Test PointValues |
| segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast); |
| |
| + // Test index sort |
| + segInfoStat.indexSortStatus = testSort(reader, indexSort, infoStream, failFast); |
| + |
| // Rethrow the first exception we encountered |
| // This will cause stats for failed segments to be incremented properly |
| if (segInfoStat.liveDocStatus.error != null) { |
| @@ -790,6 +821,68 @@ |
| |
| return result; |
| } |
| + |
| + public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException { |
| + // This segment claims its documents are sorted according to the incoming sort ... let's make sure: |
| + |
| + long startNS = System.nanoTime(); |
| + |
| + Status.IndexSortStatus status = new Status.IndexSortStatus(); |
| + |
| + if (sort != null) { |
| + if (infoStream != null) { |
| + infoStream.print(" test: check index sort....."); |
| + } |
| + |
| + SortField fields[] = sort.getSort(); |
| + final int reverseMul[] = new int[fields.length]; |
| + final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length]; |
| + |
| + LeafReaderContext readerContext = new LeafReaderContext(reader); |
| + |
| + for (int i = 0; i < fields.length; i++) { |
| + reverseMul[i] = fields[i].getReverse() ? -1 : 1; |
| + comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext); |
| + } |
| + |
| + int maxDoc = reader.maxDoc(); |
| + |
| + try { |
| + |
| + for(int docID=1;docID < maxDoc;docID++) { |
| + |
| + int cmp = 0; |
| + |
| + for (int i = 0; i < comparators.length; i++) { |
| + // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, |
| + // the segments are always the same here... |
| + comparators[i].copy(0, docID-1); |
| + comparators[i].setBottom(0); |
| + cmp = reverseMul[i] * comparators[i].compareBottom(docID); |
| + if (cmp != 0) { |
| + break; |
| + } |
| + } |
| + |
| + if (cmp > 0) { |
| + throw new RuntimeException("segment has indexSort=" + sort + " but docID=" + (docID-1) + " sorts after docID=" + docID); |
| + } |
| + } |
| + msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime()-startNS))); |
| + } catch (Throwable e) { |
| + if (failFast) { |
| + IOUtils.reThrow(e); |
| + } |
| + msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]"); |
| + status.error = e; |
| + if (infoStream != null) { |
| + e.printStackTrace(infoStream); |
| + } |
| + } |
| + } |
| + |
| + return status; |
| + } |
| |
| /** |
| * Test live docs. |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java indexsort/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -0,0 +1,175 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +package org.apache.lucene.index; |
| + |
| +import java.io.IOException; |
| +import java.util.List; |
| + |
| +import org.apache.lucene.search.DocIdSetIterator; // javadocs |
| +import org.apache.lucene.util.Bits; |
| +import org.apache.lucene.util.PriorityQueue; |
| + |
| +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| + |
| +/** Utility class to help merging documents from sub-readers according to either simple |
| + * concatenated (unsorted) order, or by a specified index-time sort, skipping |
| + * deleted documents and remapping non-deleted documents. */ |
| + |
| +public class DocIDMerger<T extends DocIDMerger.Sub> { |
| + |
| + private final List<T> subs; |
| + |
| + // Used when indexSort != null: |
| + private final PriorityQueue<T> queue; |
| + private boolean first; |
| + |
| + // Used when indexIsSorted |
| + private T current; |
| + private int nextIndex; |
| + |
| + public static abstract class Sub { |
| + public int mappedDocID; |
| + final MergeState.DocMap docMap; |
| + |
| + public Sub(MergeState.DocMap docMap) { |
| + this.docMap = docMap; |
| + } |
| + |
| + /** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */ |
| + public abstract int nextDoc(); |
| + } |
| + |
| + public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) { |
| + this.subs = subs; |
| + |
| + if (indexIsSorted) { |
| + queue = new PriorityQueue<T>(maxCount) { |
| + @Override |
| + protected boolean lessThan(Sub a, Sub b) { |
| + assert a.mappedDocID != b.mappedDocID; |
| + return a.mappedDocID < b.mappedDocID; |
| + } |
| + }; |
| + } else { |
| + // We simply concatentate |
| + queue = null; |
| + } |
| + |
| + reset(); |
| + } |
| + |
| + public DocIDMerger(List<T> subs, boolean indexIsSorted) { |
| + this(subs, subs.size(), indexIsSorted); |
| + } |
| + |
| + /** Reuse API, currently only used by postings during merge */ |
| + public void reset() { |
| + if (queue != null) { |
| + // caller may not have fully consumed the queue: |
| + queue.clear(); |
| + for(T sub : subs) { |
| + while (true) { |
| + int docID = sub.nextDoc(); |
| + if (docID == NO_MORE_DOCS) { |
| + // all docs in this sub were deleted; do not add it to the queue! |
| + break; |
| + } |
| + |
| + int mappedDocID = sub.docMap.get(docID); |
| + if (mappedDocID == -1) { |
| + // doc was deleted |
| + continue; |
| + } else { |
| + sub.mappedDocID = mappedDocID; |
| + queue.add(sub); |
| + break; |
| + } |
| + } |
| + } |
| + first = true; |
| + } else if (subs.size() > 0) { |
| + current = subs.get(0); |
| + nextIndex = 1; |
| + } else { |
| + current = null; |
| + nextIndex = 0; |
| + } |
| + } |
| + |
| + /** Returns null when done */ |
| + public T next() { |
| + // Loop until we find a non-deleted document |
| + if (queue != null) { |
| + T top = queue.top(); |
| + if (top == null) { |
| + // NOTE: it's annoying that caller is allowed to call us again even after we returned null before |
| + return null; |
| + } |
| + |
| + if (first == false) { |
| + while (true) { |
| + int docID = top.nextDoc(); |
| + if (docID == NO_MORE_DOCS) { |
| + queue.pop(); |
| + top = queue.top(); |
| + break; |
| + } |
| + int mappedDocID = top.docMap.get(docID); |
| + if (mappedDocID == -1) { |
| + // doc was deleted |
| + continue; |
| + } else { |
| + top.mappedDocID = mappedDocID; |
| + top = queue.updateTop(); |
| + break; |
| + } |
| + } |
| + } |
| + |
| + first = false; |
| + |
| + return top; |
| + |
| + } else { |
| + while (true) { |
| + if (current == null) { |
| + // NOTE: it's annoying that caller is allowed to call us again even after we returned null before |
| + return null; |
| + } |
| + int docID = current.nextDoc(); |
| + if (docID == NO_MORE_DOCS) { |
| + if (nextIndex == subs.size()) { |
| + current = null; |
| + return null; |
| + } |
| + current = subs.get(nextIndex); |
| + nextIndex++; |
| + continue; |
| + } |
| + int mappedDocID = current.docMap.get(docID); |
| + if (mappedDocID == -1) { |
| + // doc is deleted |
| + continue; |
| + } |
| + |
| + current.mappedDocID = mappedDocID; |
| + return current; |
| + } |
| + } |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java indexsort/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java 2016-03-02 04:32:40.435807336 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -178,7 +178,7 @@ |
| pendingUpdates.clear(); |
| deleteSlice = deleteQueue.newSlice(); |
| |
| - segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| assert numDocsInRAM == 0; |
| if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { |
| infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/DocValues.java indexsort/lucene/core/src/java/org/apache/lucene/index/DocValues.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/DocValues.java 2016-02-16 11:18:34.661021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/DocValues.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -210,7 +210,7 @@ |
| (expected.length == 1 |
| ? "(expected=" + expected[0] |
| : "(expected one of " + Arrays.toString(expected)) + "). " + |
| - "Use UninvertingReader or index with docvalues."); |
| + "Re-index with correct docvalues type."); |
| } |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -25,6 +25,7 @@ |
| import org.apache.lucene.codecs.PointsReader; |
| import org.apache.lucene.codecs.StoredFieldsReader; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| |
| /** |
| @@ -102,6 +103,11 @@ |
| } |
| |
| @Override |
| + public Sort getIndexSort() { |
| + return in.getIndexSort(); |
| + } |
| + |
| + @Override |
| public void addCoreClosedListener(CoreClosedListener listener) { |
| in.addCoreClosedListener(listener); |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java 2016-04-24 06:00:46.365895938 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java 2016-05-10 05:44:23.744471118 -0400 |
| @@ -22,6 +22,7 @@ |
| import java.util.Objects; |
| |
| import org.apache.lucene.search.QueryCache; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -472,6 +473,12 @@ |
| } |
| |
| @Override |
| + public Sort getIndexSort() { |
| + ensureOpen(); |
| + return in.getIndexSort(); |
| + } |
| + |
| + @Override |
| public void checkIntegrity() throws IOException { |
| ensureOpen(); |
| in.checkIntegrity(); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java 2016-04-24 06:00:46.365895938 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -18,16 +18,19 @@ |
| |
| |
| import java.io.PrintStream; |
| +import java.util.EnumSet; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; |
| import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| import org.apache.lucene.search.similarities.Similarity; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.PrintStreamInfoStream; |
| -import org.apache.lucene.util.SetOnce; |
| import org.apache.lucene.util.SetOnce.AlreadySetException; |
| +import org.apache.lucene.util.SetOnce; |
| |
| /** |
| * Holds all the configuration that is used to create an {@link IndexWriter}. |
| @@ -439,6 +442,26 @@ |
| return this; |
| } |
| |
| + /** We only allow sorting on these types */ |
| + private static final EnumSet<SortField.Type> ALLOWED_INDEX_SORT_TYPES = EnumSet.of(SortField.Type.STRING, |
| + SortField.Type.LONG, |
| + SortField.Type.INT, |
| + SortField.Type.DOUBLE, |
| + SortField.Type.FLOAT); |
| + |
| + /** |
| + * Set the {@link Sort} order to use when merging segments. Note that newly flushed segments will remain unsorted. |
| + */ |
| + public IndexWriterConfig setIndexSort(Sort sort) { |
| + for(SortField sortField : sort.getSort()) { |
| + if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) { |
| + throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField); |
| + } |
| + } |
| + this.indexSort = sort; |
| + return this; |
| + } |
| + |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(super.toString()); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java 2016-04-24 06:00:46.365895938 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -16,7 +16,6 @@ |
| */ |
| package org.apache.lucene.index; |
| |
| - |
| import java.io.Closeable; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| @@ -32,8 +31,8 @@ |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Locale; |
| -import java.util.Map; |
| import java.util.Map.Entry; |
| +import java.util.Map; |
| import java.util.Queue; |
| import java.util.Set; |
| import java.util.concurrent.atomic.AtomicInteger; |
| @@ -49,6 +48,7 @@ |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.search.Query; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| @@ -937,6 +937,8 @@ |
| // NOTE: this is correct even for an NRT reader because we'll pull FieldInfos even for the un-committed segments: |
| globalFieldNumberMap = getFieldNumberMap(); |
| |
| + validateIndexSort(); |
| + |
| config.getFlushPolicy().init(config); |
| docWriter = new DocumentsWriter(this, config, directoryOrig, directory); |
| eventQueue = docWriter.eventQueue(); |
| @@ -1000,6 +1002,20 @@ |
| } |
| } |
| |
| + /** Confirms that the incoming index sort (if any) matches the existing index sort (if any). This is unfortunately just best effort, |
| + * because it could be the old index only has flushed segments. */ |
| + private void validateIndexSort() { |
| + Sort indexSort = config.getIndexSort(); |
| + if (indexSort != null) { |
| + for(SegmentCommitInfo info : segmentInfos) { |
| + Sort segmentIndexSort = info.info.getIndexSort(); |
| + if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) { |
| + throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort); |
| + } |
| + } |
| + } |
| + } |
| + |
| // reads latest field infos for the commit |
| // this is used on IW init and addIndexes(Dir) to create/update the global field map. |
| // TODO: fix tests abusing this method! |
| @@ -2472,7 +2488,8 @@ |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| * @throws IllegalArgumentException if addIndexes would cause |
| - * the index to exceed {@link #MAX_DOCS} |
| + * the index to exceed {@link #MAX_DOCS}, or if the indoming |
| + * index sort does not match this index's index sort |
| */ |
| public void addIndexes(Directory... dirs) throws IOException { |
| ensureOpen(); |
| @@ -2481,6 +2498,8 @@ |
| |
| List<Lock> locks = acquireWriteLocks(dirs); |
| |
| + Sort indexSort = config.getIndexSort(); |
| + |
| boolean successTop = false; |
| |
| try { |
| @@ -2513,6 +2532,13 @@ |
| for (SegmentCommitInfo info : sis) { |
| assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; |
| |
| + Sort segmentIndexSort = info.info.getIndexSort(); |
| + |
| + if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) { |
| + // TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK |
| + throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort); |
| + } |
| + |
| String newSegName = newSegmentName(); |
| |
| if (infoStream.isEnabled("IW")) { |
| @@ -2609,6 +2635,8 @@ |
| // long so we can detect int overflow: |
| long numDocs = 0; |
| |
| + Sort indexSort = config.getIndexSort(); |
| + |
| try { |
| if (infoStream.isEnabled("IW")) { |
| infoStream.message("IW", "flush at addIndexes(CodecReader...)"); |
| @@ -2618,6 +2646,10 @@ |
| String mergedName = newSegmentName(); |
| for (CodecReader leaf : readers) { |
| numDocs += leaf.numDocs(); |
| + Sort leafIndexSort = leaf.getIndexSort(); |
| + if (indexSort != null && leafIndexSort != null && indexSort.equals(leafIndexSort) == false) { |
| + throw new IllegalArgumentException("cannot change index sort from " + leafIndexSort + " to " + indexSort); |
| + } |
| } |
| |
| // Best-effort up front check: |
| @@ -2630,7 +2662,7 @@ |
| TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); |
| |
| SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, mergedName, -1, |
| - false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort()); |
| |
| SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir, |
| globalFieldNumberMap, |
| @@ -2715,7 +2747,7 @@ |
| // Same SI as before but we change directory and name |
| SegmentInfo newInfo = new SegmentInfo(directoryOrig, info.info.getVersion(), segName, info.info.maxDoc(), |
| info.info.getUseCompoundFile(), info.info.getCodec(), |
| - info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes()); |
| + info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes(), info.info.getIndexSort()); |
| SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getDelGen(), |
| info.getFieldInfosGen(), info.getDocValuesGen()); |
| |
| @@ -3243,16 +3275,13 @@ |
| |
| private static class MergedDeletesAndUpdates { |
| ReadersAndUpdates mergedDeletesAndUpdates = null; |
| - MergePolicy.DocMap docMap = null; |
| boolean initializedWritableLiveDocs = false; |
| |
| MergedDeletesAndUpdates() {} |
| |
| - final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, boolean initWritableLiveDocs) throws IOException { |
| + final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, boolean initWritableLiveDocs) throws IOException { |
| if (mergedDeletesAndUpdates == null) { |
| mergedDeletesAndUpdates = readerPool.get(merge.info, true); |
| - docMap = merge.getDocMap(mergeState); |
| - assert docMap.isConsistent(merge.info.info.maxDoc()); |
| } |
| if (initWritableLiveDocs && !initializedWritableLiveDocs) { |
| mergedDeletesAndUpdates.initWritableLiveDocs(); |
| @@ -3262,18 +3291,18 @@ |
| |
| } |
| |
| - private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, |
| + private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, |
| MergedDeletesAndUpdates holder, String[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, |
| - DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) throws IOException { |
| + DocValuesFieldUpdates.Iterator[] updatesIters, int segment, int curDoc) throws IOException { |
| int newDoc = -1; |
| for (int idx = 0; idx < mergingFields.length; idx++) { |
| DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx]; |
| if (updatesIter.doc() == curDoc) { // document has an update |
| if (holder.mergedDeletesAndUpdates == null) { |
| - holder.init(readerPool, merge, mergeState, false); |
| + holder.init(readerPool, merge, false); |
| } |
| if (newDoc == -1) { // map once per all field updates, but only if there are any updates |
| - newDoc = holder.docMap.map(docUpto); |
| + newDoc = mergeState.docMaps[segment].get(curDoc); |
| } |
| DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx]; |
| dvUpdates.add(newDoc, updatesIter.value()); |
| @@ -3306,13 +3335,13 @@ |
| |
| // Carefully merge deletes that occurred after we |
| // started merging: |
| - int docUpto = 0; |
| long minGen = Long.MAX_VALUE; |
| |
| // Lazy init (only when we find a delete to carry over): |
| final MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates(); |
| final DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container(); |
| - |
| + |
| + assert sourceSegments.size() == mergeState.docMaps.length; |
| for (int i = 0; i < sourceSegments.size(); i++) { |
| SegmentCommitInfo info = sourceSegments.get(i); |
| minGen = Math.min(info.getBufferedDeletesGen(), minGen); |
| @@ -3375,21 +3404,20 @@ |
| // since we started the merge, so we |
| // must merge them: |
| for (int j = 0; j < maxDoc; j++) { |
| - if (!prevLiveDocs.get(j)) { |
| - assert !currentLiveDocs.get(j); |
| - } else { |
| - if (!currentLiveDocs.get(j)) { |
| - if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) { |
| - holder.init(readerPool, merge, mergeState, true); |
| - } |
| - holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto)); |
| - if (mergingFields != null) { // advance all iters beyond the deleted document |
| - skipDeletedDoc(updatesIters, j); |
| - } |
| - } else if (mergingFields != null) { |
| - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); |
| + if (prevLiveDocs.get(j) == false) { |
| + // if the document was deleted before, it better still be deleted! |
| + assert currentLiveDocs.get(j) == false; |
| + } else if (currentLiveDocs.get(j) == false) { |
| + // the document was deleted while we were merging: |
| + if (holder.mergedDeletesAndUpdates == null || holder.initializedWritableLiveDocs == false) { |
| + holder.init(readerPool, merge, true); |
| + } |
| + holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j))); |
| + if (mergingFields != null) { // advance all iters beyond the deleted document |
| + skipDeletedDoc(updatesIters, j); |
| } |
| - docUpto++; |
| + } else if (mergingFields != null) { |
| + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); |
| } |
| } |
| } else if (mergingFields != null) { |
| @@ -3397,50 +3425,38 @@ |
| for (int j = 0; j < maxDoc; j++) { |
| if (prevLiveDocs.get(j)) { |
| // document isn't deleted, check if any of the fields have an update to it |
| - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); |
| - // advance docUpto for every non-deleted document |
| - docUpto++; |
| + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); |
| } else { |
| // advance all iters beyond the deleted document |
| skipDeletedDoc(updatesIters, j); |
| } |
| } |
| - } else { |
| - docUpto += info.info.maxDoc() - info.getDelCount() - rld.getPendingDeleteCount(); |
| } |
| } else if (currentLiveDocs != null) { |
| assert currentLiveDocs.length() == maxDoc; |
| // This segment had no deletes before but now it |
| // does: |
| for (int j = 0; j < maxDoc; j++) { |
| - if (!currentLiveDocs.get(j)) { |
| + if (currentLiveDocs.get(j) == false) { |
| if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) { |
| - holder.init(readerPool, merge, mergeState, true); |
| + holder.init(readerPool, merge, true); |
| } |
| - holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto)); |
| + holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j))); |
| if (mergingFields != null) { // advance all iters beyond the deleted document |
| skipDeletedDoc(updatesIters, j); |
| } |
| } else if (mergingFields != null) { |
| - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); |
| + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); |
| } |
| - docUpto++; |
| } |
| } else if (mergingFields != null) { |
| // no deletions before or after, but there were updates |
| for (int j = 0; j < maxDoc; j++) { |
| - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); |
| - // advance docUpto for every non-deleted document |
| - docUpto++; |
| + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); |
| } |
| - } else { |
| - // No deletes or updates before or after |
| - docUpto += info.info.maxDoc(); |
| } |
| } |
| |
| - assert docUpto == merge.info.info.maxDoc(); |
| - |
| if (mergedDVUpdates.any()) { |
| // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates); |
| boolean success = false; |
| @@ -3881,7 +3897,7 @@ |
| // ConcurrentMergePolicy we keep deterministic segment |
| // names. |
| final String mergeSegmentName = newSegmentName(); |
| - SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort()); |
| Map<String,String> details = new HashMap<>(); |
| details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); |
| details.put("mergeFactor", Integer.toString(merge.segments.size())); |
| @@ -4082,10 +4098,13 @@ |
| } |
| |
| // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); |
| - |
| - // we pass merge.getMergeReaders() instead of merge.readers to allow the |
| - // OneMerge to return a view over the actual segments to merge |
| - final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), |
| + |
| + // Let the merge wrap readers |
| + List<CodecReader> mergeReaders = new ArrayList<>(); |
| + for (SegmentReader reader : merge.readers) { |
| + mergeReaders.add(merge.wrapForMerge(reader)); |
| + } |
| + final SegmentMerger merger = new SegmentMerger(mergeReaders, |
| merge.info.info, infoStream, dirWrapper, |
| globalFieldNumberMap, |
| context); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/LeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/LeafReader.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/LeafReader.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/LeafReader.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -20,6 +20,7 @@ |
| import java.io.IOException; |
| |
| import org.apache.lucene.index.IndexReader.ReaderClosedListener; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| |
| /** {@code LeafReader} is an abstract class, providing an interface for accessing an |
| @@ -312,4 +313,7 @@ |
| * @lucene.internal |
| */ |
| public abstract void checkIntegrity() throws IOException; |
| + |
| + /** Returns null if this leaf is unsorted, or the {@link Sort} that it was sorted by */ |
| + public abstract Sort getIndexSort(); |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java indexsort/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java 2016-02-16 11:18:34.665021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -23,6 +23,7 @@ |
| import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.similarities.Similarity; |
| import org.apache.lucene.util.InfoStream; |
| |
| @@ -94,6 +95,9 @@ |
| /** True if calls to {@link IndexWriter#close()} should first do a commit. */ |
| protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE; |
| |
| + /** The sort order to use to write merged segments. */ |
| + protected Sort indexSort = null; |
| + |
| // used by IndexWriterConfig |
| LiveIndexWriterConfig(Analyzer analyzer) { |
| this.analyzer = analyzer; |
| @@ -445,6 +449,14 @@ |
| return commitOnClose; |
| } |
| |
| + /** |
| + * Set the index-time {@link Sort} order. Merged segments will be written |
| + * in this order. |
| + */ |
| + public Sort getIndexSort() { |
| + return indexSort; |
| + } |
| + |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| @@ -467,6 +479,7 @@ |
| sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); |
| sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n"); |
| sb.append("commitOnClose=").append(getCommitOnClose()).append("\n"); |
| + sb.append("indexSort=").append(getIndexSort()).append("\n"); |
| return sb.toString(); |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java indexsort/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java 2016-04-28 20:11:21.846721717 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -18,8 +18,11 @@ |
| |
| |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| |
| import org.apache.lucene.index.MultiPostingsEnum.EnumWithSlice; |
| +import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** |
| @@ -30,52 +33,66 @@ |
| */ |
| |
| final class MappingMultiPostingsEnum extends PostingsEnum { |
| - private MultiPostingsEnum.EnumWithSlice[] subs; |
| - int numSubs; |
| - int upto; |
| - MergeState.DocMap currentMap; |
| - PostingsEnum current; |
| - int currentBase; |
| - int doc = -1; |
| - private MergeState mergeState; |
| MultiPostingsEnum multiDocsAndPositionsEnum; |
| final String field; |
| + final DocIDMerger<MappingPostingsSub> docIDMerger; |
| + private MappingPostingsSub current; |
| + private final MappingPostingsSub[] allSubs; |
| + private final List<MappingPostingsSub> subs = new ArrayList<>(); |
| + |
| + private static class MappingPostingsSub extends DocIDMerger.Sub { |
| + public PostingsEnum postings; |
| + |
| + public MappingPostingsSub(MergeState.DocMap docMap) { |
| + super(docMap); |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + try { |
| + return postings.nextDoc(); |
| + } catch (IOException ioe) { |
| + throw new RuntimeException(ioe); |
| + } |
| + } |
| + } |
| |
| /** Sole constructor. */ |
| - public MappingMultiPostingsEnum(String field, MergeState mergeState) { |
| + public MappingMultiPostingsEnum(String field, MergeState mergeState) throws IOException { |
| this.field = field; |
| - this.mergeState = mergeState; |
| + allSubs = new MappingPostingsSub[mergeState.fieldsProducers.length]; |
| + for(int i=0;i<allSubs.length;i++) { |
| + allSubs[i] = new MappingPostingsSub(mergeState.docMaps[i]); |
| + } |
| + this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.segmentInfo.getIndexSort() != null); |
| } |
| |
| - MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) { |
| - this.numSubs = postingsEnum.getNumSubs(); |
| - this.subs = postingsEnum.getSubs(); |
| - upto = -1; |
| - doc = -1; |
| - current = null; |
| + MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException { |
| this.multiDocsAndPositionsEnum = postingsEnum; |
| + MultiPostingsEnum.EnumWithSlice[] subsArray = postingsEnum.getSubs(); |
| + int count = postingsEnum.getNumSubs(); |
| + subs.clear(); |
| + for(int i=0;i<count;i++) { |
| + MappingPostingsSub sub = allSubs[subsArray[i].slice.readerIndex]; |
| + sub.postings = subsArray[i].postingsEnum; |
| + subs.add(sub); |
| + } |
| + docIDMerger.reset(); |
| return this; |
| } |
| |
| - /** How many sub-readers we are merging. |
| - * @see #getSubs */ |
| - public int getNumSubs() { |
| - return numSubs; |
| - } |
| - |
| - /** Returns sub-readers we are merging. */ |
| - public EnumWithSlice[] getSubs() { |
| - return subs; |
| - } |
| - |
| @Override |
| public int freq() throws IOException { |
| - return current.freq(); |
| + return current.postings.freq(); |
| } |
| |
| @Override |
| public int docID() { |
| - return doc; |
| + if (current == null) { |
| + return -1; |
| + } else { |
| + return current.mappedDocID; |
| + } |
| } |
| |
| @Override |
| @@ -85,66 +102,47 @@ |
| |
| @Override |
| public int nextDoc() throws IOException { |
| - while(true) { |
| - if (current == null) { |
| - if (upto == numSubs-1) { |
| - return this.doc = NO_MORE_DOCS; |
| - } else { |
| - upto++; |
| - final int reader = subs[upto].slice.readerIndex; |
| - current = subs[upto].postingsEnum; |
| - currentBase = mergeState.docBase[reader]; |
| - currentMap = mergeState.docMaps[reader]; |
| - } |
| - } |
| - |
| - int doc = current.nextDoc(); |
| - if (doc != NO_MORE_DOCS) { |
| - // compact deletions |
| - doc = currentMap.get(doc); |
| - if (doc == -1) { |
| - continue; |
| - } |
| - return this.doc = currentBase + doc; |
| - } else { |
| - current = null; |
| - } |
| + current = docIDMerger.next(); |
| + if (current == null) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return current.mappedDocID; |
| } |
| } |
| |
| @Override |
| public int nextPosition() throws IOException { |
| - int pos = current.nextPosition(); |
| + int pos = current.postings.nextPosition(); |
| if (pos < 0) { |
| - throw new CorruptIndexException("position=" + pos + " is negative, field=\"" + field + " doc=" + doc, |
| - mergeState.fieldsProducers[upto].toString()); |
| + throw new CorruptIndexException("position=" + pos + " is negative, field=\"" + field + " doc=" + current.mappedDocID, |
| + current.postings.toString()); |
| } else if (pos > IndexWriter.MAX_POSITION) { |
| - throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + doc, |
| - mergeState.fieldsProducers[upto].toString()); |
| + throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + current.mappedDocID, |
| + current.postings.toString()); |
| } |
| return pos; |
| } |
| |
| @Override |
| public int startOffset() throws IOException { |
| - return current.startOffset(); |
| + return current.postings.startOffset(); |
| } |
| |
| @Override |
| public int endOffset() throws IOException { |
| - return current.endOffset(); |
| + return current.postings.endOffset(); |
| } |
| |
| @Override |
| public BytesRef getPayload() throws IOException { |
| - return current.getPayload(); |
| + return current.postings.getPayload(); |
| } |
| |
| @Override |
| public long cost() { |
| long cost = 0; |
| - for (EnumWithSlice enumWithSlice : subs) { |
| - cost += enumWithSlice.postingsEnum.cost(); |
| + for (MappingPostingsSub sub : subs) { |
| + cost += sub.postings.cost(); |
| } |
| return cost; |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java indexsort/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java 2016-02-16 11:18:34.665021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -58,31 +58,6 @@ |
| */ |
| public abstract class MergePolicy { |
| |
| - /** A map of doc IDs. */ |
| - public static abstract class DocMap { |
| - /** Sole constructor, typically invoked from sub-classes constructors. */ |
| - protected DocMap() {} |
| - |
| - /** Return the new doc ID according to its old value. */ |
| - public abstract int map(int old); |
| - |
| - /** Useful from an assert. */ |
| - boolean isConsistent(int maxDoc) { |
| - final FixedBitSet targets = new FixedBitSet(maxDoc); |
| - for (int i = 0; i < maxDoc; ++i) { |
| - final int target = map(i); |
| - if (target < 0 || target >= maxDoc) { |
| - assert false : "out of range: " + target + " not in [0-" + maxDoc + "["; |
| - return false; |
| - } else if (targets.get(target)) { |
| - assert false : target + " is already taken (" + i + ")"; |
| - return false; |
| - } |
| - } |
| - return true; |
| - } |
| - } |
| - |
| /** OneMerge provides the information necessary to perform |
| * an individual primitive merge operation, resulting in |
| * a single new segment. The merge spec includes the |
| @@ -140,25 +115,11 @@ |
| public void mergeFinished() throws IOException { |
| } |
| |
| - /** Expert: Get the list of readers to merge. Note that this list does not |
| - * necessarily match the list of segments to merge and should only be used |
| - * to feed SegmentMerger to initialize a merge. When a {@link OneMerge} |
| - * reorders doc IDs, it must override {@link #getDocMap} too so that |
| - * deletes that happened during the merge can be applied to the newly |
| - * merged segment. */ |
| - public List<CodecReader> getMergeReaders() throws IOException { |
| - if (readers == null) { |
| - throw new IllegalStateException("IndexWriter has not initialized readers from the segment infos yet"); |
| - } |
| - final List<CodecReader> readers = new ArrayList<>(this.readers.size()); |
| - for (SegmentReader reader : this.readers) { |
| - if (reader.numDocs() > 0) { |
| - readers.add(reader); |
| - } |
| - } |
| - return Collections.unmodifiableList(readers); |
| + /** Wrap the reader in order to add/remove information to the merged segment. */ |
| + public CodecReader wrapForMerge(CodecReader reader) throws IOException { |
| + return reader; |
| } |
| - |
| + |
| /** |
| * Expert: Sets the {@link SegmentCommitInfo} of the merged segment. |
| * Allows sub-classes to e.g. set diagnostics properties. |
| @@ -175,20 +136,6 @@ |
| return info; |
| } |
| |
| - /** Expert: If {@link #getMergeReaders()} reorders document IDs, this method |
| - * must be overridden to return a mapping from the <i>natural</i> doc ID |
| - * (the doc ID that would result from a natural merge) to the actual doc |
| - * ID. This mapping is used to apply deletions that happened during the |
| - * merge to the new segment. */ |
| - public DocMap getDocMap(MergeState mergeState) { |
| - return new DocMap() { |
| - @Override |
| - public int map(int docID) { |
| - return docID; |
| - } |
| - }; |
| - } |
| - |
| /** Record that an exception occurred while executing |
| * this merge */ |
| synchronized void setException(Throwable error) { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java indexsort/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -0,0 +1,266 @@ |
| +package org.apache.lucene.index; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.codecs.DocValuesProducer; |
| +import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| +import org.apache.lucene.codecs.StoredFieldsReader; |
| +import org.apache.lucene.codecs.TermVectorsReader; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.util.Bits; |
| + |
| +/** This is a hack to make index sorting fast, with a {@link LeafReader} that always returns merge instances when you ask for the codec readers. */ |
| +class MergeReaderWrapper extends LeafReader { |
| + final CodecReader in; |
| + final FieldsProducer fields; |
| + final NormsProducer norms; |
| + final DocValuesProducer docValues; |
| + final StoredFieldsReader store; |
| + final TermVectorsReader vectors; |
| + |
| + MergeReaderWrapper(CodecReader in) throws IOException { |
| + this.in = in; |
| + |
| + FieldsProducer fields = in.getPostingsReader(); |
| + if (fields != null) { |
| + fields = fields.getMergeInstance(); |
| + } |
| + this.fields = fields; |
| + |
| + NormsProducer norms = in.getNormsReader(); |
| + if (norms != null) { |
| + norms = norms.getMergeInstance(); |
| + } |
| + this.norms = norms; |
| + |
| + DocValuesProducer docValues = in.getDocValuesReader(); |
| + if (docValues != null) { |
| + docValues = docValues.getMergeInstance(); |
| + } |
| + this.docValues = docValues; |
| + |
| + StoredFieldsReader store = in.getFieldsReader(); |
| + if (store != null) { |
| + store = store.getMergeInstance(); |
| + } |
| + this.store = store; |
| + |
| + TermVectorsReader vectors = in.getTermVectorsReader(); |
| + if (vectors != null) { |
| + vectors = vectors.getMergeInstance(); |
| + } |
| + this.vectors = vectors; |
| + } |
| + |
| + @Override |
| + public void addCoreClosedListener(CoreClosedListener listener) { |
| + in.addCoreClosedListener(listener); |
| + } |
| + |
| + @Override |
| + public void removeCoreClosedListener(CoreClosedListener listener) { |
| + in.removeCoreClosedListener(listener); |
| + } |
| + |
| + @Override |
| + public Fields fields() throws IOException { |
| + return fields; |
| + } |
| + |
| + @Override |
| + public NumericDocValues getNumericDocValues(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null) { |
| + // Field does not exist |
| + return null; |
| + } |
| + if (fi.getDocValuesType() != DocValuesType.NUMERIC) { |
| + // Field was not indexed with doc values |
| + return null; |
| + } |
| + return docValues.getNumeric(fi); |
| + } |
| + |
| + @Override |
| + public BinaryDocValues getBinaryDocValues(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null) { |
| + // Field does not exist |
| + return null; |
| + } |
| + if (fi.getDocValuesType() != DocValuesType.BINARY) { |
| + // Field was not indexed with doc values |
| + return null; |
| + } |
| + return docValues.getBinary(fi); |
| + } |
| + |
| + @Override |
| + public SortedDocValues getSortedDocValues(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null) { |
| + // Field does not exist |
| + return null; |
| + } |
| + if (fi.getDocValuesType() != DocValuesType.SORTED) { |
| + // Field was not indexed with doc values |
| + return null; |
| + } |
| + return docValues.getSorted(fi); |
| + } |
| + |
| + @Override |
| + public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null) { |
| + // Field does not exist |
| + return null; |
| + } |
| + if (fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) { |
| + // Field was not indexed with doc values |
| + return null; |
| + } |
| + return docValues.getSortedNumeric(fi); |
| + } |
| + |
| + @Override |
| + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null) { |
| + // Field does not exist |
| + return null; |
| + } |
| + if (fi.getDocValuesType() != DocValuesType.SORTED_SET) { |
| + // Field was not indexed with doc values |
| + return null; |
| + } |
| + return docValues.getSortedSet(fi); |
| + } |
| + |
| + @Override |
| + public Bits getDocsWithField(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null) { |
| + // Field does not exist |
| + return null; |
| + } |
| + if (fi.getDocValuesType() == DocValuesType.NONE) { |
| + // Field was not indexed with doc values |
| + return null; |
| + } |
| + return docValues.getDocsWithField(fi); |
| + } |
| + |
| + @Override |
| + public NumericDocValues getNormValues(String field) throws IOException { |
| + ensureOpen(); |
| + FieldInfo fi = getFieldInfos().fieldInfo(field); |
| + if (fi == null || !fi.hasNorms()) { |
| + // Field does not exist or does not index norms |
| + return null; |
| + } |
| + return norms.getNorms(fi); |
| + } |
| + |
| + @Override |
| + public FieldInfos getFieldInfos() { |
| + return in.getFieldInfos(); |
| + } |
| + |
| + @Override |
| + public Bits getLiveDocs() { |
| + return in.getLiveDocs(); |
| + } |
| + |
| + @Override |
| + public void checkIntegrity() throws IOException { |
| + in.checkIntegrity(); |
| + } |
| + |
| + @Override |
| + public Fields getTermVectors(int docID) throws IOException { |
| + ensureOpen(); |
| + checkBounds(docID); |
| + if (vectors == null) { |
| + return null; |
| + } |
| + return vectors.get(docID); |
| + } |
| + |
| + @Override |
| + public PointValues getPointValues() { |
| + return in.getPointValues(); |
| + } |
| + |
| + @Override |
| + public int numDocs() { |
| + return in.numDocs(); |
| + } |
| + |
| + @Override |
| + public int maxDoc() { |
| + return in.maxDoc(); |
| + } |
| + |
| + @Override |
| + public void document(int docID, StoredFieldVisitor visitor) throws IOException { |
| + ensureOpen(); |
| + checkBounds(docID); |
| + store.visitDocument(docID, visitor); |
| + } |
| + |
| + @Override |
| + protected void doClose() throws IOException { |
| + in.close(); |
| + } |
| + |
| + @Override |
| + public Object getCoreCacheKey() { |
| + return in.getCoreCacheKey(); |
| + } |
| + |
| + @Override |
| + public Object getCombinedCoreAndDeletesKey() { |
| + return in.getCombinedCoreAndDeletesKey(); |
| + } |
| + |
| + private void checkBounds(int docID) { |
| + if (docID < 0 || docID >= maxDoc()) { |
| + throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")"); |
| + } |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return "MergeReaderWrapper(" + in + ")"; |
| + } |
| + |
| + @Override |
| + public Sort getIndexSort() { |
| + return in.getIndexSort(); |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java indexsort/lucene/core/src/java/org/apache/lucene/index/MergeState.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/MergeState.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -18,6 +18,8 @@ |
| |
| |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Collections; |
| import java.util.List; |
| |
| import org.apache.lucene.codecs.DocValuesProducer; |
| @@ -26,6 +28,7 @@ |
| import org.apache.lucene.codecs.PointsReader; |
| import org.apache.lucene.codecs.StoredFieldsReader; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.packed.PackedInts; |
| @@ -36,6 +39,12 @@ |
| * @lucene.experimental */ |
| public class MergeState { |
| |
| + /** Maps document IDs from old segments to document IDs in the new segment */ |
| + public final DocMap[] docMaps; |
| + |
| + // Only used by IW when it must remap deletes that arrived against the merging segmetns while a merge was running: |
| + final DocMap[] leafDocMaps; |
| + |
| /** {@link SegmentInfo} of the newly merged segment. */ |
| public final SegmentInfo segmentInfo; |
| |
| @@ -60,18 +69,12 @@ |
| /** Live docs for each reader */ |
| public final Bits[] liveDocs; |
| |
| - /** Maps docIDs around deletions. */ |
| - public final DocMap[] docMaps; |
| - |
| /** Postings to merge */ |
| public final FieldsProducer[] fieldsProducers; |
| |
| /** Point readers to merge */ |
| public final PointsReader[] pointsReaders; |
| |
| - /** New docID base per reader. */ |
| - public final int[] docBase; |
| - |
| /** Max docs per reader */ |
| public final int[] maxDocs; |
| |
| @@ -79,11 +82,15 @@ |
| public final InfoStream infoStream; |
| |
| /** Sole constructor. */ |
| - MergeState(List<CodecReader> readers, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException { |
| + MergeState(List<CodecReader> originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException { |
| + |
| + this.infoStream = infoStream; |
| + |
| + final Sort indexSort = segmentInfo.getIndexSort(); |
| + int numReaders = originalReaders.size(); |
| + leafDocMaps = new DocMap[numReaders]; |
| + List<CodecReader> readers = maybeSortReaders(originalReaders, segmentInfo); |
| |
| - int numReaders = readers.size(); |
| - docMaps = new DocMap[numReaders]; |
| - docBase = new int[numReaders]; |
| maxDocs = new int[numReaders]; |
| fieldsProducers = new FieldsProducer[numReaders]; |
| normsProducers = new NormsProducer[numReaders]; |
| @@ -94,6 +101,7 @@ |
| fieldInfos = new FieldInfos[numReaders]; |
| liveDocs = new Bits[numReaders]; |
| |
| + int numDocs = 0; |
| for(int i=0;i<numReaders;i++) { |
| final CodecReader reader = readers.get(i); |
| |
| @@ -126,126 +134,138 @@ |
| if (pointsReaders[i] != null) { |
| pointsReaders[i] = pointsReaders[i].getMergeInstance(); |
| } |
| + numDocs += reader.numDocs(); |
| } |
| |
| - this.segmentInfo = segmentInfo; |
| - this.infoStream = infoStream; |
| + segmentInfo.setMaxDoc(numDocs); |
| |
| - setDocMaps(readers); |
| + this.segmentInfo = segmentInfo; |
| + this.docMaps = buildDocMaps(readers, indexSort); |
| } |
| |
| - // NOTE: removes any "all deleted" readers from mergeState.readers |
| - private void setDocMaps(List<CodecReader> readers) throws IOException { |
| - final int numReaders = maxDocs.length; |
| - |
| - // Remap docIDs |
| - int docBase = 0; |
| - for(int i=0;i<numReaders;i++) { |
| - final CodecReader reader = readers.get(i); |
| - this.docBase[i] = docBase; |
| - final DocMap docMap = DocMap.build(reader); |
| - docMaps[i] = docMap; |
| - docBase += docMap.numDocs(); |
| - } |
| - |
| - segmentInfo.setMaxDoc(docBase); |
| - } |
| + private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException { |
| |
| - /** |
| - * Remaps docids around deletes during merge |
| - */ |
| - public static abstract class DocMap { |
| + int numReaders = readers.size(); |
| |
| - DocMap() {} |
| + if (indexSort == null) { |
| + // no index sort ... we only must map around deletions, and rebase to the merged segment's docID space |
| |
| - /** Returns the mapped docID corresponding to the provided one. */ |
| - public abstract int get(int docID); |
| + int totalDocs = 0; |
| + DocMap[] docMaps = new DocMap[numReaders]; |
| |
| - /** Returns the total number of documents, ignoring |
| - * deletions. */ |
| - public abstract int maxDoc(); |
| - |
| - /** Returns the number of not-deleted documents. */ |
| - public final int numDocs() { |
| - return maxDoc() - numDeletedDocs(); |
| - } |
| - |
| - /** Returns the number of deleted documents. */ |
| - public abstract int numDeletedDocs(); |
| - |
| - /** Returns true if there are any deletions. */ |
| - public boolean hasDeletions() { |
| - return numDeletedDocs() > 0; |
| - } |
| - |
| - /** Creates a {@link DocMap} instance appropriate for |
| - * this reader. */ |
| - public static DocMap build(CodecReader reader) { |
| - final int maxDoc = reader.maxDoc(); |
| - if (!reader.hasDeletions()) { |
| - return new NoDelDocMap(maxDoc); |
| - } |
| - final Bits liveDocs = reader.getLiveDocs(); |
| - return build(maxDoc, liveDocs); |
| - } |
| - |
| - static DocMap build(final int maxDoc, final Bits liveDocs) { |
| - assert liveDocs != null; |
| - final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| - int del = 0; |
| - for (int i = 0; i < maxDoc; ++i) { |
| - docMapBuilder.add(i - del); |
| - if (!liveDocs.get(i)) { |
| - ++del; |
| + // Remap docIDs around deletions: |
| + for (int i = 0; i < numReaders; i++) { |
| + LeafReader reader = readers.get(i); |
| + Bits liveDocs = reader.getLiveDocs(); |
| + |
| + final PackedLongValues delDocMap; |
| + if (liveDocs != null) { |
| + delDocMap = removeDeletes(reader.maxDoc(), liveDocs); |
| + } else { |
| + delDocMap = null; |
| } |
| - } |
| - final PackedLongValues docMap = docMapBuilder.build(); |
| - final int numDeletedDocs = del; |
| - assert docMap.size() == maxDoc; |
| - return new DocMap() { |
| - |
| - @Override |
| - public int get(int docID) { |
| - if (!liveDocs.get(docID)) { |
| - return -1; |
| + |
| + final int docBase = totalDocs; |
| + docMaps[i] = new DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + if (liveDocs == null) { |
| + return docBase + docID; |
| + } else if (liveDocs.get(docID)) { |
| + return docBase + (int) delDocMap.get(docID); |
| + } else { |
| + return -1; |
| + } |
| } |
| - return (int) docMap.get(docID); |
| - } |
| + }; |
| + totalDocs += reader.numDocs(); |
| + } |
| |
| - @Override |
| - public int maxDoc() { |
| - return maxDoc; |
| - } |
| + return docMaps; |
| |
| - @Override |
| - public int numDeletedDocs() { |
| - return numDeletedDocs; |
| - } |
| - }; |
| + } else { |
| + // do a merge sort of the incoming leaves: |
| + return MultiSorter.sort(indexSort, readers); |
| } |
| } |
| |
| - private static final class NoDelDocMap extends DocMap { |
| - |
| - private final int maxDoc; |
| + private List<CodecReader> maybeSortReaders(List<CodecReader> originalReaders, SegmentInfo segmentInfo) throws IOException { |
| |
| - NoDelDocMap(int maxDoc) { |
| - this.maxDoc = maxDoc; |
| + // Default to identity: |
| + for(int i=0;i<originalReaders.size();i++) { |
| + leafDocMaps[i] = new DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + return docID; |
| + } |
| + }; |
| } |
| |
| - @Override |
| - public int get(int docID) { |
| - return docID; |
| + Sort indexSort = segmentInfo.getIndexSort(); |
| + if (indexSort == null) { |
| + return originalReaders; |
| } |
| |
| - @Override |
| - public int maxDoc() { |
| - return maxDoc; |
| + // If an incoming reader is not sorted, because it was flushed by IW, we sort it here: |
| + final Sorter sorter = new Sorter(indexSort); |
| + List<CodecReader> readers = new ArrayList<>(originalReaders.size()); |
| + |
| + for (CodecReader leaf : originalReaders) { |
| + Sort segmentSort = leaf.getIndexSort(); |
| + |
| + if (segmentSort == null) { |
| + // TODO: fix IW to also sort when flushing? It's somewhat tricky because of stored fields and term vectors, which write "live" |
| + // to their index files on each indexed document: |
| + |
| + // This segment was written by flush, so documents are not yet sorted, so we sort them now: |
| + Sorter.DocMap sortDocMap = sorter.sort(leaf); |
| + if (sortDocMap != null) { |
| + if (infoStream.isEnabled("SM")) { |
| + infoStream.message("SM", "segment " + leaf + " is not sorted; wrapping for sort " + indexSort + " now"); |
| + } |
| + leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap)); |
| + leafDocMaps[readers.size()] = new DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + return sortDocMap.oldToNew(docID); |
| + } |
| + }; |
| + } else { |
| + if (infoStream.isEnabled("SM")) { |
| + infoStream.message("SM", "segment " + leaf + " is not sorted, but is already accidentally in sort " + indexSort + " order"); |
| + } |
| + } |
| + |
| + } else { |
| + if (segmentSort.equals(indexSort) == false) { |
| + throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort + " but to-be-merged segment has sort=" + segmentSort); |
| + } |
| + if (infoStream.isEnabled("SM")) { |
| + infoStream.message("SM", "segment " + leaf + " already sorted"); |
| + } |
| + } |
| + |
| + readers.add(leaf); |
| } |
| |
| - @Override |
| - public int numDeletedDocs() { |
| - return 0; |
| + return readers; |
| + } |
| + |
| + /** A map of doc IDs. */ |
| + public static abstract class DocMap { |
| + /** Return the mapped docID or -1 if the given doc is not mapped. */ |
| + public abstract int get(int docID); |
| + } |
| + |
| + static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) { |
| + final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| + int del = 0; |
| + for (int i = 0; i < maxDoc; ++i) { |
| + docMapBuilder.add(i - del); |
| + if (liveDocs.get(i) == false) { |
| + ++del; |
| + } |
| } |
| + return docMapBuilder.build(); |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java indexsort/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java 2016-02-16 11:18:34.665021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -57,7 +57,7 @@ |
| return this.parent == parent; |
| } |
| |
| - /** Rre-use and reset this instance on the provided slices. */ |
| + /** Re-use and reset this instance on the provided slices. */ |
| public MultiPostingsEnum reset(final EnumWithSlice[] subs, final int numSubs) { |
| this.numSubs = numSubs; |
| for(int i=0;i<numSubs;i++) { |
| @@ -165,9 +165,6 @@ |
| /** Holds a {@link PostingsEnum} along with the |
| * corresponding {@link ReaderSlice}. */ |
| public final static class EnumWithSlice { |
| - EnumWithSlice() { |
| - } |
| - |
| /** {@link PostingsEnum} for this sub-reader. */ |
| public PostingsEnum postingsEnum; |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java indexsort/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -0,0 +1,353 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +package org.apache.lucene.index; |
| + |
| +import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| + |
| +import org.apache.lucene.index.MergeState; |
| +import org.apache.lucene.search.LeafFieldComparator; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| +import org.apache.lucene.util.Bits; |
| +import org.apache.lucene.util.PriorityQueue; |
| +import org.apache.lucene.util.packed.PackedInts; |
| +import org.apache.lucene.util.packed.PackedLongValues; |
| + |
| +final class MultiSorter { |
| + |
| + /** Does a merge sort of the leaves of the incoming reader, returning {@link MergeState#DocMap} to map each leaf's |
| + * documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */ |
| + static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException { |
| + |
| + // TODO: optimize if only 1 reader is incoming, though that's a rare case |
| + |
| + SortField fields[] = sort.getSort(); |
| + final CrossReaderComparator[] comparators = new CrossReaderComparator[fields.length]; |
| + for(int i=0;i<fields.length;i++) { |
| + comparators[i] = getComparator(readers, fields[i]); |
| + } |
| + |
| + int leafCount = readers.size(); |
| + |
| + PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) { |
| + @Override |
| + public boolean lessThan(LeafAndDocID a, LeafAndDocID b) { |
| + for(int i=0;i<comparators.length;i++) { |
| + int cmp = comparators[i].compare(a.readerIndex, a.docID, b.readerIndex, b.docID); |
| + if (cmp != 0) { |
| + return cmp < 0; |
| + } |
| + } |
| + |
| + // tie-break by docID natural order: |
| + if (a.readerIndex != b.readerIndex) { |
| + return a.readerIndex < b.readerIndex; |
| + } |
| + return a.docID < b.docID; |
| + } |
| + }; |
| + |
| + PackedLongValues.Builder[] builders = new PackedLongValues.Builder[leafCount]; |
| + |
| + for(int i=0;i<leafCount;i++) { |
| + CodecReader reader = readers.get(i); |
| + queue.add(new LeafAndDocID(i, reader.getLiveDocs(), reader.maxDoc())); |
| + builders[i] = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| + } |
| + |
| + int mappedDocID = 0; |
| + while (queue.size() != 0) { |
| + LeafAndDocID top = queue.top(); |
| + builders[top.readerIndex].add(mappedDocID); |
| + if (top.liveDocs == null || top.liveDocs.get(top.docID)) { |
| + mappedDocID++; |
| + } |
| + top.docID++; |
| + if (top.docID < top.maxDoc) { |
| + queue.updateTop(); |
| + } else { |
| + queue.pop(); |
| + } |
| + } |
| + |
| + MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount]; |
| + for(int i=0;i<leafCount;i++) { |
| + final PackedLongValues remapped = builders[i].build(); |
| + final Bits liveDocs = readers.get(i).getLiveDocs(); |
| + docMaps[i] = new MergeState.DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + if (liveDocs == null || liveDocs.get(docID)) { |
| + return (int) remapped.get(docID); |
| + } else { |
| + return -1; |
| + } |
| + } |
| + }; |
| + } |
| + |
| + return docMaps; |
| + } |
| + |
| + private static class LeafAndDocID { |
| + final int readerIndex; |
| + final Bits liveDocs; |
| + final int maxDoc; |
| + int docID; |
| + |
| + public LeafAndDocID(int readerIndex, Bits liveDocs, int maxDoc) { |
| + this.readerIndex = readerIndex; |
| + this.liveDocs = liveDocs; |
| + this.maxDoc = maxDoc; |
| + } |
| + } |
| + |
| + private interface CrossReaderComparator { |
| + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB); |
| + } |
| + |
| + private static CrossReaderComparator getComparator(List<CodecReader> readers, SortField sortField) throws IOException { |
| + switch(sortField.getType()) { |
| + |
| + case STRING: |
| + { |
| + // this uses the efficient segment-local ordinal map: |
| + MultiReader multiReader = new MultiReader(readers.toArray(new LeafReader[readers.size()])); |
| + final SortedDocValues sorted = MultiDocValues.getSortedValues(multiReader, sortField.getField()); |
| + final int[] docStarts = new int[readers.size()]; |
| + List<LeafReaderContext> leaves = multiReader.leaves(); |
| + for(int i=0;i<readers.size();i++) { |
| + docStarts[i] = leaves.get(i).docBase; |
| + } |
| + final int missingOrd; |
| + if (sortField.getMissingValue() == SortField.STRING_LAST) { |
| + missingOrd = Integer.MAX_VALUE; |
| + } else { |
| + missingOrd = Integer.MIN_VALUE; |
| + } |
| + |
| + final int reverseMul; |
| + if (sortField.getReverse()) { |
| + reverseMul = -1; |
| + } else { |
| + reverseMul = 1; |
| + } |
| + |
| + return new CrossReaderComparator() { |
| + @Override |
| + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { |
| + int ordA = sorted.getOrd(docStarts[readerIndexA] + docIDA); |
| + if (ordA == -1) { |
| + ordA = missingOrd; |
| + } |
| + int ordB = sorted.getOrd(docStarts[readerIndexB] + docIDB); |
| + if (ordB == -1) { |
| + ordB = missingOrd; |
| + } |
| + return reverseMul * Integer.compare(ordA, ordB); |
| + } |
| + }; |
| + } |
| + |
| + case LONG: |
| + { |
| + List<NumericDocValues> values = new ArrayList<>(); |
| + List<Bits> docsWithFields = new ArrayList<>(); |
| + for(CodecReader reader : readers) { |
| + values.add(DocValues.getNumeric(reader, sortField.getField())); |
| + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); |
| + } |
| + |
| + final int reverseMul; |
| + if (sortField.getReverse()) { |
| + reverseMul = -1; |
| + } else { |
| + reverseMul = 1; |
| + } |
| + |
| + final long missingValue; |
| + |
| + if (sortField.getMissingValue() != null) { |
| + missingValue = (Long) sortField.getMissingValue(); |
| + } else { |
| + missingValue = 0; |
| + } |
| + |
| + return new CrossReaderComparator() { |
| + @Override |
| + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { |
| + long valueA; |
| + if (docsWithFields.get(readerIndexA).get(docIDA)) { |
| + valueA = values.get(readerIndexA).get(docIDA); |
| + } else { |
| + valueA = missingValue; |
| + } |
| + |
| + long valueB; |
| + if (docsWithFields.get(readerIndexB).get(docIDB)) { |
| + valueB = values.get(readerIndexB).get(docIDB); |
| + } else { |
| + valueB = missingValue; |
| + } |
| + return reverseMul * Long.compare(valueA, valueB); |
| + } |
| + }; |
| + } |
| + |
| + case INT: |
| + { |
| + List<NumericDocValues> values = new ArrayList<>(); |
| + List<Bits> docsWithFields = new ArrayList<>(); |
| + for(CodecReader reader : readers) { |
| + values.add(DocValues.getNumeric(reader, sortField.getField())); |
| + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); |
| + } |
| + |
| + final int reverseMul; |
| + if (sortField.getReverse()) { |
| + reverseMul = -1; |
| + } else { |
| + reverseMul = 1; |
| + } |
| + |
| + final int missingValue; |
| + |
| + if (sortField.getMissingValue() != null) { |
| + missingValue = (Integer) sortField.getMissingValue(); |
| + } else { |
| + missingValue = 0; |
| + } |
| + |
| + return new CrossReaderComparator() { |
| + @Override |
| + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { |
| + int valueA; |
| + if (docsWithFields.get(readerIndexA).get(docIDA)) { |
| + valueA = (int) values.get(readerIndexA).get(docIDA); |
| + } else { |
| + valueA = missingValue; |
| + } |
| + |
| + int valueB; |
| + if (docsWithFields.get(readerIndexB).get(docIDB)) { |
| + valueB = (int) values.get(readerIndexB).get(docIDB); |
| + } else { |
| + valueB = missingValue; |
| + } |
| + return reverseMul * Integer.compare(valueA, valueB); |
| + } |
| + }; |
| + } |
| + |
| + case DOUBLE: |
| + { |
| + List<NumericDocValues> values = new ArrayList<>(); |
| + List<Bits> docsWithFields = new ArrayList<>(); |
| + for(CodecReader reader : readers) { |
| + values.add(DocValues.getNumeric(reader, sortField.getField())); |
| + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); |
| + } |
| + |
| + final int reverseMul; |
| + if (sortField.getReverse()) { |
| + reverseMul = -1; |
| + } else { |
| + reverseMul = 1; |
| + } |
| + |
| + final double missingValue; |
| + |
| + if (sortField.getMissingValue() != null) { |
| + missingValue = (Double) sortField.getMissingValue(); |
| + } else { |
| + missingValue = 0.0; |
| + } |
| + |
| + return new CrossReaderComparator() { |
| + @Override |
| + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { |
| + double valueA; |
| + if (docsWithFields.get(readerIndexA).get(docIDA)) { |
| + valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA)); |
| + } else { |
| + valueA = missingValue; |
| + } |
| + |
| + double valueB; |
| + if (docsWithFields.get(readerIndexB).get(docIDB)) { |
| + valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB)); |
| + } else { |
| + valueB = missingValue; |
| + } |
| + return reverseMul * Double.compare(valueA, valueB); |
| + } |
| + }; |
| + } |
| + |
| + case FLOAT: |
| + { |
| + List<NumericDocValues> values = new ArrayList<>(); |
| + List<Bits> docsWithFields = new ArrayList<>(); |
| + for(CodecReader reader : readers) { |
| + values.add(DocValues.getNumeric(reader, sortField.getField())); |
| + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); |
| + } |
| + |
| + final int reverseMul; |
| + if (sortField.getReverse()) { |
| + reverseMul = -1; |
| + } else { |
| + reverseMul = 1; |
| + } |
| + |
| + final float missingValue; |
| + |
| + if (sortField.getMissingValue() != null) { |
| + missingValue = (Float) sortField.getMissingValue(); |
| + } else { |
| + missingValue = 0.0f; |
| + } |
| + |
| + return new CrossReaderComparator() { |
| + @Override |
| + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { |
| + float valueA; |
| + if (docsWithFields.get(readerIndexA).get(docIDA)) { |
| + valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA)); |
| + } else { |
| + valueA = missingValue; |
| + } |
| + |
| + float valueB; |
| + if (docsWithFields.get(readerIndexB).get(docIDB)) { |
| + valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB)); |
| + } else { |
| + valueB = missingValue; |
| + } |
| + return reverseMul * Float.compare(valueA, valueB); |
| + } |
| + }; |
| + } |
| + |
| + default: |
| + throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType()); |
| + } |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java 2016-03-08 17:22:26.828938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -26,6 +26,7 @@ |
| import java.util.SortedMap; |
| import java.util.TreeMap; |
| |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| |
| /** An {@link LeafReader} which reads multiple, parallel indexes. Each index |
| @@ -55,6 +56,7 @@ |
| private final boolean closeSubReaders; |
| private final int maxDoc, numDocs; |
| private final boolean hasDeletions; |
| + private final Sort indexSort; |
| private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>(); |
| private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>(); |
| |
| @@ -100,8 +102,18 @@ |
| |
| // TODO: make this read-only in a cleaner way? |
| FieldInfos.Builder builder = new FieldInfos.Builder(); |
| + |
| + Sort indexSort = null; |
| + |
| // build FieldInfos and fieldToReader map: |
| for (final LeafReader reader : this.parallelReaders) { |
| + Sort leafIndexSort = reader.getIndexSort(); |
| + if (indexSort == null) { |
| + indexSort = leafIndexSort; |
| + } else if (leafIndexSort != null && indexSort.equals(leafIndexSort) == false) { |
| + throw new IllegalArgumentException("cannot combine LeafReaders that have different index sorts: saw both sort=" + indexSort + " and " + leafIndexSort); |
| + } |
| + |
| final FieldInfos readerFieldInfos = reader.getFieldInfos(); |
| for (FieldInfo fieldInfo : readerFieldInfos) { |
| // NOTE: first reader having a given field "wins": |
| @@ -115,6 +127,7 @@ |
| } |
| } |
| fieldInfos = builder.finish(); |
| + this.indexSort = indexSort; |
| |
| // build Fields instance |
| for (final LeafReader reader : this.parallelReaders) { |
| @@ -423,4 +436,10 @@ |
| ensureOpen(); |
| return parallelReaders; |
| } |
| + |
| + @Override |
| + public Sort getIndexSort() { |
| + return indexSort; |
| + } |
| + |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java 2016-02-16 11:18:34.669021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -28,6 +28,7 @@ |
| import java.util.regex.Matcher; |
| |
| import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.TrackingDirectoryWrapper; |
| import org.apache.lucene.util.StringHelper; |
| @@ -69,6 +70,8 @@ |
| |
| private final Map<String,String> attributes; |
| |
| + private final Sort indexSort; |
| + |
| // Tracks the Lucene version this segment was created with, since 3.1. Null |
| // indicates an older than 3.0 index, and it's used to detect a too old index. |
| // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and |
| @@ -93,7 +96,7 @@ |
| */ |
| public SegmentInfo(Directory dir, Version version, String name, int maxDoc, |
| boolean isCompoundFile, Codec codec, Map<String,String> diagnostics, |
| - byte[] id, Map<String,String> attributes) { |
| + byte[] id, Map<String,String> attributes, Sort indexSort) { |
| assert !(dir instanceof TrackingDirectoryWrapper); |
| this.dir = Objects.requireNonNull(dir); |
| this.version = Objects.requireNonNull(version); |
| @@ -107,6 +110,7 @@ |
| throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); |
| } |
| this.attributes = Objects.requireNonNull(attributes); |
| + this.indexSort = indexSort; |
| } |
| |
| /** |
| @@ -194,13 +198,9 @@ |
| s.append('/').append(delCount); |
| } |
| |
| - final String sorter_key = "sorter"; // SortingMergePolicy.SORTER_ID_PROP; // TODO: use this once we can import SortingMergePolicy (currently located in 'misc' instead of 'core') |
| - final String sorter_val = diagnostics.get(sorter_key); |
| - if (sorter_val != null) { |
| - s.append(":["); |
| - s.append(sorter_key); |
| - s.append('='); |
| - s.append(sorter_val); |
| + if (indexSort != null) { |
| + s.append(":[indexSort="); |
| + s.append(indexSort); |
| s.append(']'); |
| } |
| |
| @@ -311,5 +311,10 @@ |
| public Map<String,String> getAttributes() { |
| return attributes; |
| } |
| + |
| + /** Return the sort order of this segment, or null if the index has no sort. */ |
| + public Sort getIndexSort() { |
| + return indexSort; |
| + } |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java 2016-03-08 17:22:26.832938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -59,6 +59,11 @@ |
| this.codec = segmentInfo.getCodec(); |
| this.context = context; |
| this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); |
| + if (mergeState.infoStream.isEnabled("SM")) { |
| + if (segmentInfo.getIndexSort() != null) { |
| + mergeState.infoStream.message("SM", "index sort during merge: " + segmentInfo.getIndexSort()); |
| + } |
| + } |
| } |
| |
| /** True if any merging should happen */ |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java 2016-03-08 17:22:26.832938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -28,6 +28,7 @@ |
| import org.apache.lucene.codecs.PointsReader; |
| import org.apache.lucene.codecs.StoredFieldsReader; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.util.Bits; |
| @@ -303,4 +304,9 @@ |
| ensureOpen(); |
| core.removeCoreClosedListener(listener); |
| } |
| + |
| + @Override |
| + public Sort getIndexSort() { |
| + return si.info.getIndexSort(); |
| + } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java indexsort/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java 2016-03-08 17:22:26.832938630 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -26,6 +26,7 @@ |
| import org.apache.lucene.codecs.PointsReader; |
| import org.apache.lucene.codecs.StoredFieldsReader; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| |
| /** |
| @@ -125,6 +126,16 @@ |
| public void removeCoreClosedListener(CoreClosedListener listener) { |
| reader.removeCoreClosedListener(listener); |
| } |
| + |
| + @Override |
| + public String toString() { |
| + return "SlowCodecReaderWrapper(" + reader + ")"; |
| + } |
| + |
| + @Override |
| + public Sort getIndexSort() { |
| + return reader.getIndexSort(); |
| + } |
| }; |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/Sorter.java indexsort/lucene/core/src/java/org/apache/lucene/index/Sorter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/Sorter.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/Sorter.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -0,0 +1,288 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.index; |
| + |
| +import java.io.IOException; |
| +import java.util.Comparator; |
| + |
| +import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.LeafFieldComparator; |
| +import org.apache.lucene.search.Scorer; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| +import org.apache.lucene.util.TimSorter; |
| +import org.apache.lucene.util.packed.PackedInts; |
| +import org.apache.lucene.util.packed.PackedLongValues; |
| + |
| +/** |
| + * Sorts documents of a given index by returning a permutation on the document |
| + * IDs. |
| + * @lucene.experimental |
| + */ |
| +final class Sorter { |
| + final Sort sort; |
| + |
| + /** Creates a new Sorter to sort the index with {@code sort} */ |
| + Sorter(Sort sort) { |
| + if (sort.needsScores()) { |
| + throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score"); |
| + } |
| + this.sort = sort; |
| + } |
| + |
| + /** |
| + * A permutation of doc IDs. For every document ID between <tt>0</tt> and |
| + * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must |
| + * return <code>docID</code>. |
| + */ |
| + static abstract class DocMap { |
| + |
| + /** Given a doc ID from the original index, return its ordinal in the |
| + * sorted index. */ |
| + abstract int oldToNew(int docID); |
| + |
| + /** Given the ordinal of a doc ID, return its doc ID in the original index. */ |
| + abstract int newToOld(int docID); |
| + |
| + /** Return the number of documents in this map. This must be equal to the |
| + * {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the |
| + * {@link org.apache.lucene.index.LeafReader} which is sorted. */ |
| + abstract int size(); |
| + } |
| + |
| + /** Check consistency of a {@link DocMap}, useful for assertions. */ |
| + static boolean isConsistent(DocMap docMap) { |
| + final int maxDoc = docMap.size(); |
| + for (int i = 0; i < maxDoc; ++i) { |
| + final int newID = docMap.oldToNew(i); |
| + final int oldID = docMap.newToOld(newID); |
| + assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID; |
| + assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID; |
| + if (i != oldID || newID < 0 || newID >= maxDoc) { |
| + return false; |
| + } |
| + } |
| + return true; |
| + } |
| + |
| + /** A comparator of doc IDs. */ |
| + static abstract class DocComparator { |
| + |
| + /** Compare docID1 against docID2. The contract for the return value is the |
| + * same as {@link Comparator#compare(Object, Object)}. */ |
| + public abstract int compare(int docID1, int docID2); |
| + |
| + } |
| + |
| + private static final class DocValueSorter extends TimSorter { |
| + |
| + private final int[] docs; |
| + private final Sorter.DocComparator comparator; |
| + private final int[] tmp; |
| + |
| + DocValueSorter(int[] docs, Sorter.DocComparator comparator) { |
| + super(docs.length / 64); |
| + this.docs = docs; |
| + this.comparator = comparator; |
| + tmp = new int[docs.length / 64]; |
| + } |
| + |
| + @Override |
| + protected int compare(int i, int j) { |
| + return comparator.compare(docs[i], docs[j]); |
| + } |
| + |
| + @Override |
| + protected void swap(int i, int j) { |
| + int tmpDoc = docs[i]; |
| + docs[i] = docs[j]; |
| + docs[j] = tmpDoc; |
| + } |
| + |
| + @Override |
| + protected void copy(int src, int dest) { |
| + docs[dest] = docs[src]; |
| + } |
| + |
| + @Override |
| + protected void save(int i, int len) { |
| + System.arraycopy(docs, i, tmp, 0, len); |
| + } |
| + |
| + @Override |
| + protected void restore(int i, int j) { |
| + docs[j] = tmp[i]; |
| + } |
| + |
| + @Override |
| + protected int compareSaved(int i, int j) { |
| + return comparator.compare(tmp[i], docs[j]); |
| + } |
| + } |
| + |
| + /** Computes the old-to-new permutation over the given comparator. */ |
| + private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) { |
| + // check if the index is sorted |
| + boolean sorted = true; |
| + for (int i = 1; i < maxDoc; ++i) { |
| + if (comparator.compare(i-1, i) > 0) { |
| + sorted = false; |
| + break; |
| + } |
| + } |
| + if (sorted) { |
| + return null; |
| + } |
| + |
| + // sort doc IDs |
| + final int[] docs = new int[maxDoc]; |
| + for (int i = 0; i < maxDoc; i++) { |
| + docs[i] = i; |
| + } |
| + |
| + DocValueSorter sorter = new DocValueSorter(docs, comparator); |
| + // It can be common to sort a reader, add docs, sort it again, ... and in |
| + // that case timSort can save a lot of time |
| + sorter.sort(0, docs.length); // docs is now the newToOld mapping |
| + |
| + // The reason why we use MonotonicAppendingLongBuffer here is that it |
| + // wastes very little memory if the index is in random order but can save |
| + // a lot of memory if the index is already "almost" sorted |
| + final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| + for (int i = 0; i < maxDoc; ++i) { |
| + newToOldBuilder.add(docs[i]); |
| + } |
| + final PackedLongValues newToOld = newToOldBuilder.build(); |
| + |
| + // invert the docs mapping: |
| + for (int i = 0; i < maxDoc; ++i) { |
| + docs[(int) newToOld.get(i)] = i; |
| + } // docs is now the oldToNew mapping |
| + |
| + final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| + for (int i = 0; i < maxDoc; ++i) { |
| + oldToNewBuilder.add(docs[i]); |
| + } |
| + final PackedLongValues oldToNew = oldToNewBuilder.build(); |
| + |
| + return new Sorter.DocMap() { |
| + |
| + @Override |
| + public int oldToNew(int docID) { |
| + return (int) oldToNew.get(docID); |
| + } |
| + |
| + @Override |
| + public int newToOld(int docID) { |
| + return (int) newToOld.get(docID); |
| + } |
| + |
| + @Override |
| + public int size() { |
| + return maxDoc; |
| + } |
| + }; |
| + } |
| + |
| + /** |
| + * Returns a mapping from the old document ID to its new location in the |
| + * sorted index. Implementations can use the auxiliary |
| + * {@link #sort(int, DocComparator)} to compute the old-to-new permutation |
| + * given a list of documents and their corresponding values. |
| + * <p> |
| + * A return value of <tt>null</tt> is allowed and means that |
| + * <code>reader</code> is already sorted. |
| + * <p> |
| + * <b>NOTE:</b> deleted documents are expected to appear in the mapping as |
| + * well, they will however be marked as deleted in the sorted view. |
| + */ |
| + DocMap sort(LeafReader reader) throws IOException { |
| + SortField fields[] = sort.getSort(); |
| + final int reverseMul[] = new int[fields.length]; |
| + final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length]; |
| + |
| + for (int i = 0; i < fields.length; i++) { |
| + reverseMul[i] = fields[i].getReverse() ? -1 : 1; |
| + comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext()); |
| + comparators[i].setScorer(FAKESCORER); |
| + } |
| + final DocComparator comparator = new DocComparator() { |
| + @Override |
| + public int compare(int docID1, int docID2) { |
| + try { |
| + for (int i = 0; i < comparators.length; i++) { |
| + // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, |
| + // the segments are always the same here... |
| + comparators[i].copy(0, docID1); |
| + comparators[i].setBottom(0); |
| + int comp = reverseMul[i] * comparators[i].compareBottom(docID2); |
| + if (comp != 0) { |
| + return comp; |
| + } |
| + } |
| + return Integer.compare(docID1, docID2); // docid order tiebreak |
| + } catch (IOException e) { |
| + throw new RuntimeException(e); |
| + } |
| + } |
| + }; |
| + return sort(reader.maxDoc(), comparator); |
| + } |
| + |
| + /** |
| + * Returns the identifier of this {@link Sorter}. |
| + * <p>This identifier is similar to {@link Object#hashCode()} and should be |
| + * chosen so that two instances of this class that sort documents likewise |
| + * will have the same identifier. On the contrary, this identifier should be |
| + * different on different {@link Sort sorts}. |
| + */ |
| + public String getID() { |
| + return sort.toString(); |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return getID(); |
| + } |
| + |
| + static final Scorer FAKESCORER = new Scorer(null) { |
| + |
| + float score; |
| + int doc = -1; |
| + int freq = 1; |
| + |
| + @Override |
| + public int docID() { |
| + return doc; |
| + } |
| + |
| + public DocIdSetIterator iterator() { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| + @Override |
| + public int freq() throws IOException { |
| + return freq; |
| + } |
| + |
| + @Override |
| + public float score() throws IOException { |
| + return score; |
| + } |
| + }; |
| + |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -0,0 +1,914 @@ |
| +package org.apache.lucene.index; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +import org.apache.lucene.index.Sorter.DocMap; |
| +import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.store.RAMFile; |
| +import org.apache.lucene.store.RAMInputStream; |
| +import org.apache.lucene.store.RAMOutputStream; |
| +import org.apache.lucene.util.ArrayUtil; |
| +import org.apache.lucene.util.Bits; |
| +import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.TimSorter; |
| +import org.apache.lucene.util.automaton.CompiledAutomaton; |
| + |
| +/** |
| + * An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given |
| + * {@link Sort}. This is package private and is only used by Lucene when it needs to merge |
| + * a newly flushed (unsorted) segment. |
| + * |
| + * @lucene.experimental |
| + */ |
| + |
| +class SortingLeafReader extends FilterLeafReader { |
| + |
| + private static class SortingFields extends FilterFields { |
| + |
| + private final Sorter.DocMap docMap; |
| + private final FieldInfos infos; |
| + |
| + public SortingFields(final Fields in, FieldInfos infos, Sorter.DocMap docMap) { |
| + super(in); |
| + this.docMap = docMap; |
| + this.infos = infos; |
| + } |
| + |
| + @Override |
| + public Terms terms(final String field) throws IOException { |
| + Terms terms = in.terms(field); |
| + if (terms == null) { |
| + return null; |
| + } else { |
| + return new SortingTerms(terms, infos.fieldInfo(field).getIndexOptions(), docMap); |
| + } |
| + } |
| + |
| + } |
| + |
| + private static class SortingTerms extends FilterTerms { |
| + |
| + private final Sorter.DocMap docMap; |
| + private final IndexOptions indexOptions; |
| + |
| + public SortingTerms(final Terms in, IndexOptions indexOptions, final Sorter.DocMap docMap) { |
| + super(in); |
| + this.docMap = docMap; |
| + this.indexOptions = indexOptions; |
| + } |
| + |
| + @Override |
| + public TermsEnum iterator() throws IOException { |
| + return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions()); |
| + } |
| + |
| + @Override |
| + public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) |
| + throws IOException { |
| + return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions()); |
| + } |
| + |
| + } |
| + |
| + private static class SortingTermsEnum extends FilterTermsEnum { |
| + |
| + final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods |
| + private final IndexOptions indexOptions; |
| + private final boolean hasPositions; |
| + |
| + public SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) { |
| + super(in); |
| + this.docMap = docMap; |
| + this.indexOptions = indexOptions; |
| + this.hasPositions = hasPositions; |
| + } |
| + |
| + @Override |
| + public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException { |
| + |
| + if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) { |
| + final PostingsEnum inReuse; |
| + final SortingPostingsEnum wrapReuse; |
| + if (reuse != null && reuse instanceof SortingPostingsEnum) { |
| + // if we're asked to reuse the given DocsEnum and it is Sorting, return |
| + // the wrapped one, since some Codecs expect it. |
| + wrapReuse = (SortingPostingsEnum) reuse; |
| + inReuse = wrapReuse.getWrapped(); |
| + } else { |
| + wrapReuse = null; |
| + inReuse = reuse; |
| + } |
| + |
| + final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags); |
| + // we ignore the fact that offsets may be stored but not asked for, |
| + // since this code is expected to be used during addIndexes which will |
| + // ask for everything. if that assumption changes in the future, we can |
| + // factor in whether 'flags' says offsets are not required. |
| + final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| + return new SortingPostingsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets); |
| + } |
| + |
| + final PostingsEnum inReuse; |
| + final SortingDocsEnum wrapReuse; |
| + if (reuse != null && reuse instanceof SortingDocsEnum) { |
| + // if we're asked to reuse the given DocsEnum and it is Sorting, return |
| + // the wrapped one, since some Codecs expect it. |
| + wrapReuse = (SortingDocsEnum) reuse; |
| + inReuse = wrapReuse.getWrapped(); |
| + } else { |
| + wrapReuse = null; |
| + inReuse = reuse; |
| + } |
| + |
| + final PostingsEnum inDocs = in.postings(inReuse, flags); |
| + final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS); |
| + return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap); |
| + } |
| + |
| + } |
| + |
| + private static class SortingBinaryDocValues extends BinaryDocValues { |
| + |
| + private final BinaryDocValues in; |
| + private final Sorter.DocMap docMap; |
| + |
| + SortingBinaryDocValues(BinaryDocValues in, Sorter.DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public BytesRef get(int docID) { |
| + return in.get(docMap.newToOld(docID)); |
| + } |
| + } |
| + |
| + private static class SortingNumericDocValues extends NumericDocValues { |
| + |
| + private final NumericDocValues in; |
| + private final Sorter.DocMap docMap; |
| + |
| + public SortingNumericDocValues(final NumericDocValues in, Sorter.DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public long get(int docID) { |
| + return in.get(docMap.newToOld(docID)); |
| + } |
| + } |
| + |
| + private static class SortingSortedNumericDocValues extends SortedNumericDocValues { |
| + |
| + private final SortedNumericDocValues in; |
| + private final Sorter.DocMap docMap; |
| + |
| + SortingSortedNumericDocValues(SortedNumericDocValues in, DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public int count() { |
| + return in.count(); |
| + } |
| + |
| + @Override |
| + public void setDocument(int doc) { |
| + in.setDocument(docMap.newToOld(doc)); |
| + } |
| + |
| + @Override |
| + public long valueAt(int index) { |
| + return in.valueAt(index); |
| + } |
| + } |
| + |
| + private static class SortingBits implements Bits { |
| + |
| + private final Bits in; |
| + private final Sorter.DocMap docMap; |
| + |
| + public SortingBits(final Bits in, Sorter.DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public boolean get(int index) { |
| + return in.get(docMap.newToOld(index)); |
| + } |
| + |
| + @Override |
| + public int length() { |
| + return in.length(); |
| + } |
| + } |
| + |
| + private static class SortingPointValues extends PointValues { |
| + |
| + private final PointValues in; |
| + private final Sorter.DocMap docMap; |
| + |
| + public SortingPointValues(final PointValues in, Sorter.DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { |
| + in.intersect(fieldName, |
| + new IntersectVisitor() { |
| + @Override |
| + public void visit(int docID) throws IOException { |
| + visitor.visit(docMap.oldToNew(docID)); |
| + } |
| + |
| + @Override |
| + public void visit(int docID, byte[] packedValue) throws IOException { |
| + visitor.visit(docMap.oldToNew(docID), packedValue); |
| + } |
| + |
| + @Override |
| + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| + return visitor.compare(minPackedValue, maxPackedValue); |
| + } |
| + }); |
| + } |
| + |
| + @Override |
| + public byte[] getMinPackedValue(String fieldName) throws IOException { |
| + return in.getMinPackedValue(fieldName); |
| + } |
| + |
| + @Override |
| + public byte[] getMaxPackedValue(String fieldName) throws IOException { |
| + return in.getMaxPackedValue(fieldName); |
| + } |
| + |
| + @Override |
| + public int getNumDimensions(String fieldName) throws IOException { |
| + return in.getNumDimensions(fieldName); |
| + } |
| + |
| + @Override |
| + public int getBytesPerDimension(String fieldName) throws IOException { |
| + return in.getBytesPerDimension(fieldName); |
| + } |
| + |
| + @Override |
| + public long size(String fieldName) { |
| + return in.size(fieldName); |
| + } |
| + |
| + @Override |
| + public int getDocCount(String fieldName) { |
| + return in.getDocCount(fieldName); |
| + } |
| + } |
| + |
| + private static class SortingSortedDocValues extends SortedDocValues { |
| + |
| + private final SortedDocValues in; |
| + private final Sorter.DocMap docMap; |
| + |
| + SortingSortedDocValues(SortedDocValues in, Sorter.DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public int getOrd(int docID) { |
| + return in.getOrd(docMap.newToOld(docID)); |
| + } |
| + |
| + @Override |
| + public BytesRef lookupOrd(int ord) { |
| + return in.lookupOrd(ord); |
| + } |
| + |
| + @Override |
| + public int getValueCount() { |
| + return in.getValueCount(); |
| + } |
| + |
| + @Override |
| + public BytesRef get(int docID) { |
| + return in.get(docMap.newToOld(docID)); |
| + } |
| + |
| + @Override |
| + public int lookupTerm(BytesRef key) { |
| + return in.lookupTerm(key); |
| + } |
| + } |
| + |
| + private static class SortingSortedSetDocValues extends SortedSetDocValues { |
| + |
| + private final SortedSetDocValues in; |
| + private final Sorter.DocMap docMap; |
| + |
| + SortingSortedSetDocValues(SortedSetDocValues in, Sorter.DocMap docMap) { |
| + this.in = in; |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public long nextOrd() { |
| + return in.nextOrd(); |
| + } |
| + |
| + @Override |
| + public void setDocument(int docID) { |
| + //System.out.println(" slr.sssdv.setDocument docID=" + docID + " this=" + this); |
| + in.setDocument(docMap.newToOld(docID)); |
| + } |
| + |
| + @Override |
| + public BytesRef lookupOrd(long ord) { |
| + return in.lookupOrd(ord); |
| + } |
| + |
| + @Override |
| + public long getValueCount() { |
| + return in.getValueCount(); |
| + } |
| + |
| + @Override |
| + public long lookupTerm(BytesRef key) { |
| + return in.lookupTerm(key); |
| + } |
| + } |
| + |
| + static class SortingDocsEnum extends FilterPostingsEnum { |
| + |
| + private static final class DocFreqSorter extends TimSorter { |
| + |
| + private int[] docs; |
| + private int[] freqs; |
| + private final int[] tmpDocs; |
| + private int[] tmpFreqs; |
| + |
| + public DocFreqSorter(int maxDoc) { |
| + super(maxDoc / 64); |
| + this.tmpDocs = new int[maxDoc / 64]; |
| + } |
| + |
| + public void reset(int[] docs, int[] freqs) { |
| + this.docs = docs; |
| + this.freqs = freqs; |
| + if (freqs != null && tmpFreqs == null) { |
| + tmpFreqs = new int[tmpDocs.length]; |
| + } |
| + } |
| + |
| + @Override |
| + protected int compare(int i, int j) { |
| + return docs[i] - docs[j]; |
| + } |
| + |
| + @Override |
| + protected void swap(int i, int j) { |
| + int tmpDoc = docs[i]; |
| + docs[i] = docs[j]; |
| + docs[j] = tmpDoc; |
| + |
| + if (freqs != null) { |
| + int tmpFreq = freqs[i]; |
| + freqs[i] = freqs[j]; |
| + freqs[j] = tmpFreq; |
| + } |
| + } |
| + |
| + @Override |
| + protected void copy(int src, int dest) { |
| + docs[dest] = docs[src]; |
| + if (freqs != null) { |
| + freqs[dest] = freqs[src]; |
| + } |
| + } |
| + |
| + @Override |
| + protected void save(int i, int len) { |
| + System.arraycopy(docs, i, tmpDocs, 0, len); |
| + if (freqs != null) { |
| + System.arraycopy(freqs, i, tmpFreqs, 0, len); |
| + } |
| + } |
| + |
| + @Override |
| + protected void restore(int i, int j) { |
| + docs[j] = tmpDocs[i]; |
| + if (freqs != null) { |
| + freqs[j] = tmpFreqs[i]; |
| + } |
| + } |
| + |
| + @Override |
| + protected int compareSaved(int i, int j) { |
| + return tmpDocs[i] - docs[j]; |
| + } |
| + } |
| + |
| + private final int maxDoc; |
| + private final DocFreqSorter sorter; |
| + private int[] docs; |
| + private int[] freqs; |
| + private int docIt = -1; |
| + private final int upto; |
| + private final boolean withFreqs; |
| + |
| + SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException { |
| + super(in); |
| + this.maxDoc = maxDoc; |
| + this.withFreqs = withFreqs; |
| + if (reuse != null) { |
| + if (reuse.maxDoc == maxDoc) { |
| + sorter = reuse.sorter; |
| + } else { |
| + sorter = new DocFreqSorter(maxDoc); |
| + } |
| + docs = reuse.docs; |
| + freqs = reuse.freqs; // maybe null |
| + } else { |
| + docs = new int[64]; |
| + sorter = new DocFreqSorter(maxDoc); |
| + } |
| + docIt = -1; |
| + int i = 0; |
| + int doc; |
| + if (withFreqs) { |
| + if (freqs == null || freqs.length < docs.length) { |
| + freqs = new int[docs.length]; |
| + } |
| + while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ |
| + if (i >= docs.length) { |
| + docs = ArrayUtil.grow(docs, docs.length + 1); |
| + freqs = ArrayUtil.grow(freqs, freqs.length + 1); |
| + } |
| + docs[i] = docMap.oldToNew(doc); |
| + freqs[i] = in.freq(); |
| + ++i; |
| + } |
| + } else { |
| + freqs = null; |
| + while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ |
| + if (i >= docs.length) { |
| + docs = ArrayUtil.grow(docs, docs.length + 1); |
| + } |
| + docs[i++] = docMap.oldToNew(doc); |
| + } |
| + } |
| + // TimSort can save much time compared to other sorts in case of |
| + // reverse sorting, or when sorting a concatenation of sorted readers |
| + sorter.reset(docs, freqs); |
| + sorter.sort(0, i); |
| + upto = i; |
| + } |
| + |
| + // for testing |
| + boolean reused(PostingsEnum other) { |
| + if (other == null || !(other instanceof SortingDocsEnum)) { |
| + return false; |
| + } |
| + return docs == ((SortingDocsEnum) other).docs; |
| + } |
| + |
| + @Override |
| + public int advance(final int target) throws IOException { |
| + // need to support it for checkIndex, but in practice it won't be called, so |
| + // don't bother to implement efficiently for now. |
| + return slowAdvance(target); |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt]; |
| + } |
| + |
| + @Override |
| + public int freq() throws IOException { |
| + return withFreqs && docIt < upto ? freqs[docIt] : 1; |
| + } |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + if (++docIt >= upto) return NO_MORE_DOCS; |
| + return docs[docIt]; |
| + } |
| + |
| + /** Returns the wrapped {@link PostingsEnum}. */ |
| + PostingsEnum getWrapped() { |
| + return in; |
| + } |
| + |
| + // we buffer up docs/freqs only, don't forward any positions requests to underlying enum |
| + |
| + @Override |
| + public int nextPosition() throws IOException { |
| + return -1; |
| + } |
| + |
| + @Override |
| + public int startOffset() throws IOException { |
| + return -1; |
| + } |
| + |
| + @Override |
| + public int endOffset() throws IOException { |
| + return -1; |
| + } |
| + |
| + @Override |
| + public BytesRef getPayload() throws IOException { |
| + return null; |
| + } |
| + } |
| + |
| + static class SortingPostingsEnum extends FilterPostingsEnum { |
| + |
| + /** |
| + * A {@link TimSorter} which sorts two parallel arrays of doc IDs and |
| + * offsets in one go. Everytime a doc ID is 'swapped', its corresponding offset |
| + * is swapped too. |
| + */ |
| + private static final class DocOffsetSorter extends TimSorter { |
| + |
| + private int[] docs; |
| + private long[] offsets; |
| + private final int[] tmpDocs; |
| + private final long[] tmpOffsets; |
| + |
| + public DocOffsetSorter(int maxDoc) { |
| + super(maxDoc / 64); |
| + this.tmpDocs = new int[maxDoc / 64]; |
| + this.tmpOffsets = new long[maxDoc / 64]; |
| + } |
| + |
| + public void reset(int[] docs, long[] offsets) { |
| + this.docs = docs; |
| + this.offsets = offsets; |
| + } |
| + |
| + @Override |
| + protected int compare(int i, int j) { |
| + return docs[i] - docs[j]; |
| + } |
| + |
| + @Override |
| + protected void swap(int i, int j) { |
| + int tmpDoc = docs[i]; |
| + docs[i] = docs[j]; |
| + docs[j] = tmpDoc; |
| + |
| + long tmpOffset = offsets[i]; |
| + offsets[i] = offsets[j]; |
| + offsets[j] = tmpOffset; |
| + } |
| + |
| + @Override |
| + protected void copy(int src, int dest) { |
| + docs[dest] = docs[src]; |
| + offsets[dest] = offsets[src]; |
| + } |
| + |
| + @Override |
| + protected void save(int i, int len) { |
| + System.arraycopy(docs, i, tmpDocs, 0, len); |
| + System.arraycopy(offsets, i, tmpOffsets, 0, len); |
| + } |
| + |
| + @Override |
| + protected void restore(int i, int j) { |
| + docs[j] = tmpDocs[i]; |
| + offsets[j] = tmpOffsets[i]; |
| + } |
| + |
| + @Override |
| + protected int compareSaved(int i, int j) { |
| + return tmpDocs[i] - docs[j]; |
| + } |
| + } |
| + |
| + private final int maxDoc; |
| + private final DocOffsetSorter sorter; |
| + private int[] docs; |
| + private long[] offsets; |
| + private final int upto; |
| + |
| + private final IndexInput postingInput; |
| + private final boolean storeOffsets; |
| + |
| + private int docIt = -1; |
| + private int pos; |
| + private int startOffset = -1; |
| + private int endOffset = -1; |
| + private final BytesRef payload; |
| + private int currFreq; |
| + |
| + private final RAMFile file; |
| + |
| + SortingPostingsEnum(int maxDoc, SortingPostingsEnum reuse, final PostingsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { |
| + super(in); |
| + this.maxDoc = maxDoc; |
| + this.storeOffsets = storeOffsets; |
| + if (reuse != null) { |
| + docs = reuse.docs; |
| + offsets = reuse.offsets; |
| + payload = reuse.payload; |
| + file = reuse.file; |
| + if (reuse.maxDoc == maxDoc) { |
| + sorter = reuse.sorter; |
| + } else { |
| + sorter = new DocOffsetSorter(maxDoc); |
| + } |
| + } else { |
| + docs = new int[32]; |
| + offsets = new long[32]; |
| + payload = new BytesRef(32); |
| + file = new RAMFile(); |
| + sorter = new DocOffsetSorter(maxDoc); |
| + } |
| + final IndexOutput out = new RAMOutputStream(file, false); |
| + int doc; |
| + int i = 0; |
| + while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { |
| + if (i == docs.length) { |
| + final int newLength = ArrayUtil.oversize(i + 1, 4); |
| + docs = Arrays.copyOf(docs, newLength); |
| + offsets = Arrays.copyOf(offsets, newLength); |
| + } |
| + docs[i] = docMap.oldToNew(doc); |
| + offsets[i] = out.getFilePointer(); |
| + addPositions(in, out); |
| + i++; |
| + } |
| + upto = i; |
| + sorter.reset(docs, offsets); |
| + sorter.sort(0, upto); |
| + out.close(); |
| + this.postingInput = new RAMInputStream("", file); |
| + } |
| + |
| + // for testing |
| + boolean reused(PostingsEnum other) { |
| + if (other == null || !(other instanceof SortingPostingsEnum)) { |
| + return false; |
| + } |
| + return docs == ((SortingPostingsEnum) other).docs; |
| + } |
| + |
| + private void addPositions(final PostingsEnum in, final IndexOutput out) throws IOException { |
| + int freq = in.freq(); |
| + out.writeVInt(freq); |
| + int previousPosition = 0; |
| + int previousEndOffset = 0; |
| + for (int i = 0; i < freq; i++) { |
| + final int pos = in.nextPosition(); |
| + final BytesRef payload = in.getPayload(); |
| + // The low-order bit of token is set only if there is a payload, the |
| + // previous bits are the delta-encoded position. |
| + final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1); |
| + out.writeVInt(token); |
| + previousPosition = pos; |
| + if (storeOffsets) { // don't encode offsets if they are not stored |
| + final int startOffset = in.startOffset(); |
| + final int endOffset = in.endOffset(); |
| + out.writeVInt(startOffset - previousEndOffset); |
| + out.writeVInt(endOffset - startOffset); |
| + previousEndOffset = endOffset; |
| + } |
| + if (payload != null) { |
| + out.writeVInt(payload.length); |
| + out.writeBytes(payload.bytes, payload.offset, payload.length); |
| + } |
| + } |
| + } |
| + |
| + @Override |
| + public int advance(final int target) throws IOException { |
| + // need to support it for checkIndex, but in practice it won't be called, so |
| + // don't bother to implement efficiently for now. |
| + return slowAdvance(target); |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt]; |
| + } |
| + |
| + @Override |
| + public int endOffset() throws IOException { |
| + return endOffset; |
| + } |
| + |
| + @Override |
| + public int freq() throws IOException { |
| + return currFreq; |
| + } |
| + |
| + @Override |
| + public BytesRef getPayload() throws IOException { |
| + return payload.length == 0 ? null : payload; |
| + } |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS; |
| + postingInput.seek(offsets[docIt]); |
| + currFreq = postingInput.readVInt(); |
| + // reset variables used in nextPosition |
| + pos = 0; |
| + endOffset = 0; |
| + return docs[docIt]; |
| + } |
| + |
| + @Override |
| + public int nextPosition() throws IOException { |
| + final int token = postingInput.readVInt(); |
| + pos += token >>> 1; |
| + if (storeOffsets) { |
| + startOffset = endOffset + postingInput.readVInt(); |
| + endOffset = startOffset + postingInput.readVInt(); |
| + } |
| + if ((token & 1) != 0) { |
| + payload.offset = 0; |
| + payload.length = postingInput.readVInt(); |
| + if (payload.length > payload.bytes.length) { |
| + payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)]; |
| + } |
| + postingInput.readBytes(payload.bytes, 0, payload.length); |
| + } else { |
| + payload.length = 0; |
| + } |
| + return pos; |
| + } |
| + |
| + @Override |
| + public int startOffset() throws IOException { |
| + return startOffset; |
| + } |
| + |
| + /** Returns the wrapped {@link PostingsEnum}. */ |
| + PostingsEnum getWrapped() { |
| + return in; |
| + } |
| + } |
| + |
| + /** Return a sorted view of <code>reader</code> according to the order |
| + * defined by <code>sort</code>. If the reader is already sorted, this |
| + * method might return the reader as-is. */ |
| + public static LeafReader wrap(LeafReader reader, Sort sort) throws IOException { |
| + return wrap(reader, new Sorter(sort).sort(reader)); |
| + } |
| + |
| + /** Expert: same as {@link #wrap(org.apache.lucene.index.LeafReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */ |
| + static LeafReader wrap(LeafReader reader, Sorter.DocMap docMap) { |
| + if (docMap == null) { |
| + // the reader is already sorted |
| + return reader; |
| + } |
| + if (reader.maxDoc() != docMap.size()) { |
| + throw new IllegalArgumentException("reader.maxDoc() should be equal to docMap.size(), got" + reader.maxDoc() + " != " + docMap.size()); |
| + } |
| + assert Sorter.isConsistent(docMap); |
| + return new SortingLeafReader(reader, docMap); |
| + } |
| + |
| + final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods |
| + |
| + private SortingLeafReader(final LeafReader in, final Sorter.DocMap docMap) { |
| + super(in); |
| + this.docMap = docMap; |
| + } |
| + |
| + @Override |
| + public void document(final int docID, final StoredFieldVisitor visitor) throws IOException { |
| + in.document(docMap.newToOld(docID), visitor); |
| + } |
| + |
| + @Override |
| + public Fields fields() throws IOException { |
| + return new SortingFields(in.fields(), in.getFieldInfos(), docMap); |
| + } |
| + |
| + @Override |
| + public BinaryDocValues getBinaryDocValues(String field) throws IOException { |
| + BinaryDocValues oldDocValues = in.getBinaryDocValues(field); |
| + if (oldDocValues == null) { |
| + return null; |
| + } else { |
| + return new SortingBinaryDocValues(oldDocValues, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public Bits getLiveDocs() { |
| + final Bits inLiveDocs = in.getLiveDocs(); |
| + if (inLiveDocs == null) { |
| + return null; |
| + } else { |
| + return new SortingBits(inLiveDocs, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public PointValues getPointValues() { |
| + final PointValues inPointValues = in.getPointValues(); |
| + if (inPointValues == null) { |
| + return null; |
| + } else { |
| + return new SortingPointValues(inPointValues, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public NumericDocValues getNormValues(String field) throws IOException { |
| + final NumericDocValues norm = in.getNormValues(field); |
| + if (norm == null) { |
| + return null; |
| + } else { |
| + return new SortingNumericDocValues(norm, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public NumericDocValues getNumericDocValues(String field) throws IOException { |
| + final NumericDocValues oldDocValues = in.getNumericDocValues(field); |
| + if (oldDocValues == null) return null; |
| + return new SortingNumericDocValues(oldDocValues, docMap); |
| + } |
| + |
| + @Override |
| + public SortedNumericDocValues getSortedNumericDocValues(String field) |
| + throws IOException { |
| + final SortedNumericDocValues oldDocValues = in.getSortedNumericDocValues(field); |
| + if (oldDocValues == null) { |
| + return null; |
| + } else { |
| + return new SortingSortedNumericDocValues(oldDocValues, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public SortedDocValues getSortedDocValues(String field) throws IOException { |
| + SortedDocValues sortedDV = in.getSortedDocValues(field); |
| + if (sortedDV == null) { |
| + return null; |
| + } else { |
| + return new SortingSortedDocValues(sortedDV, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { |
| + SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field); |
| + if (sortedSetDV == null) { |
| + return null; |
| + } else { |
| + return new SortingSortedSetDocValues(sortedSetDV, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public Bits getDocsWithField(String field) throws IOException { |
| + Bits bits = in.getDocsWithField(field); |
| + if (bits == null || bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) { |
| + return bits; |
| + } else { |
| + return new SortingBits(bits, docMap); |
| + } |
| + } |
| + |
| + @Override |
| + public Fields getTermVectors(final int docID) throws IOException { |
| + return in.getTermVectors(docMap.newToOld(docID)); |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return "SortingLeafReader(" + in + ")"; |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java indexsort/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -0,0 +1,130 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.search; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| +import java.util.concurrent.atomic.AtomicBoolean; |
| + |
| +import org.apache.lucene.index.IndexWriter; |
| +import org.apache.lucene.index.IndexWriterConfig; |
| +import org.apache.lucene.index.LeafReaderContext; |
| +import org.apache.lucene.search.CollectionTerminatedException; |
| +import org.apache.lucene.search.Collector; |
| +import org.apache.lucene.search.FilterCollector; |
| +import org.apache.lucene.search.FilterLeafCollector; |
| +import org.apache.lucene.search.LeafCollector; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.TopDocsCollector; |
| +import org.apache.lucene.search.TotalHitCountCollector; |
| + |
| +/** |
| + * A {@link Collector} that early terminates collection of documents on a |
| + * per-segment basis, if the segment was sorted according to the given |
| + * {@link Sort}. |
| + * |
| + * <p> |
| + * <b>NOTE:</b> the {@code Collector} detects segments sorted according to a |
| + * an {@link IndexWriterConfig#setIndexSort}. Also, it collects up to a specified |
| + * {@code numDocsToCollect} from each segment, and therefore is mostly suitable |
| + * for use in conjunction with collectors such as {@link TopDocsCollector}, and |
| + * not e.g. {@link TotalHitCountCollector}. |
| + * <p> |
| + * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same |
| + * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs} |
| + * will be correct. However the total of {@link TopDocsCollector#getTotalHits() |
| + * hit count} will be vastly underestimated since not all matching documents will have |
| + * been collected. |
| + * |
| + * @lucene.experimental |
| + */ |
| + |
| +public class EarlyTerminatingSortingCollector extends FilterCollector { |
| + |
| + /** Returns whether collection can be early-terminated if it sorts with the |
| + * provided {@link Sort} and if segments are merged with the provided |
| + * {@link Sort}. */ |
| + public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) { |
| + final SortField[] fields1 = searchSort.getSort(); |
| + final SortField[] fields2 = mergePolicySort.getSort(); |
| + // early termination is possible if fields1 is a prefix of fields2 |
| + if (fields1.length > fields2.length) { |
| + return false; |
| + } |
| + return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length)); |
| + } |
| + |
| + /** Sort used to sort the search results */ |
| + protected final Sort sort; |
| + /** Number of documents to collect in each segment */ |
| + protected final int numDocsToCollect; |
| + private final AtomicBoolean terminatedEarly = new AtomicBoolean(false); |
| + |
| + /** |
| + * Create a new {@link EarlyTerminatingSortingCollector} instance. |
| + * |
| + * @param in |
| + * the collector to wrap |
| + * @param sort |
| + * the sort you are sorting the search results on |
| + * @param numDocsToCollect |
| + * the number of documents to collect on each segment. When wrapping |
| + * a {@link TopDocsCollector}, this number should be the number of |
| + * hits. |
| + * @throws IllegalArgumentException if the sort order doesn't allow for early |
| + * termination with the given merge policy. |
| + */ |
| + public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) { |
| + super(in); |
| + if (numDocsToCollect <= 0) { |
| + throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect); |
| + } |
| + this.sort = sort; |
| + this.numDocsToCollect = numDocsToCollect; |
| + } |
| + |
| + @Override |
| + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { |
| + Sort segmentSort = context.reader().getIndexSort(); |
| + if (segmentSort != null && canEarlyTerminate(sort, segmentSort) == false) { |
| + throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + segmentSort); |
| + } |
| + |
| + if (segmentSort != null) { |
| + // segment is sorted, can early-terminate |
| + return new FilterLeafCollector(super.getLeafCollector(context)) { |
| + private int numCollected; |
| + |
| + @Override |
| + public void collect(int doc) throws IOException { |
| + super.collect(doc); |
| + if (++numCollected >= numDocsToCollect) { |
| + terminatedEarly.set(true); |
| + throw new CollectionTerminatedException(); |
| + } |
| + } |
| + |
| + }; |
| + } else { |
| + return super.getLeafCollector(context); |
| + } |
| + } |
| + |
| + public boolean terminatedEarly() { |
| + return terminatedEarly.get(); |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java indexsort/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java 2016-03-23 06:11:24.645189984 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -819,6 +819,7 @@ |
| sumTotalTermFreq = terms.getSumTotalTermFreq(); |
| sumDocFreq = terms.getSumDocFreq(); |
| } |
| + |
| return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/SortField.java indexsort/lucene/core/src/java/org/apache/lucene/search/SortField.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/search/SortField.java 2016-03-02 04:32:40.439807336 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/search/SortField.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -77,9 +77,6 @@ |
| * uses ordinals to do the sorting. */ |
| STRING_VAL, |
| |
| - /** Sort use byte[] index values. */ |
| - BYTES, |
| - |
| /** Force rewriting of SortField using {@link SortField#rewrite(IndexSearcher)} |
| * before it can be used for sorting */ |
| REWRITEABLE |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/Sort.java indexsort/lucene/core/src/java/org/apache/lucene/search/Sort.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/search/Sort.java 2016-02-16 11:18:34.677021815 -0500 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/search/Sort.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -147,6 +147,9 @@ |
| * etc. Finally, if there is still a tie after all SortFields |
| * are checked, the internal Lucene docid is used to break it. */ |
| public void setSort(SortField... fields) { |
| + if (fields.length == 0) { |
| + throw new IllegalArgumentException("There must be at least 1 sort field"); |
| + } |
| this.fields = fields; |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java indexsort/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java |
| --- trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java 2016-04-24 06:00:46.369895938 -0400 |
| +++ indexsort/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -299,9 +299,6 @@ |
| final BKDReader.IntersectState state; |
| final MergeState.DocMap docMap; |
| |
| - /** Base offset for all our docIDs */ |
| - final int docIDBase; |
| - |
| /** Current doc ID */ |
| public int docID; |
| |
| @@ -314,7 +311,7 @@ |
| /** Which leaf block we are up to */ |
| private int blockID; |
| |
| - public MergeReader(BKDReader bkd, MergeState.DocMap docMap, int docIDBase) throws IOException { |
| + public MergeReader(BKDReader bkd, MergeState.DocMap docMap) throws IOException { |
| this.bkd = bkd; |
| state = new BKDReader.IntersectState(bkd.in.clone(), |
| bkd.numDims, |
| @@ -322,7 +319,6 @@ |
| bkd.maxPointsInLeafNode, |
| null); |
| this.docMap = docMap; |
| - this.docIDBase = docIDBase; |
| long minFP = Long.MAX_VALUE; |
| //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length); |
| for(long fp : bkd.leafBlockFPs) { |
| @@ -396,14 +392,14 @@ |
| } |
| |
| // Tie break by sorting smaller docIDs earlier: |
| - return a.docIDBase < b.docIDBase; |
| + return a.docID < b.docID; |
| } |
| } |
| |
| /** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already |
| * sorted values and currently only works when numDims==1. This returns -1 if all documents containing |
| * dimensional values were deleted. */ |
| - public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers, List<Integer> docIDBases) throws IOException { |
| + public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException { |
| if (numDims != 1) { |
| throw new UnsupportedOperationException("numDims must be 1 but got " + numDims); |
| } |
| @@ -411,8 +407,6 @@ |
| throw new IllegalStateException("cannot mix add and merge"); |
| } |
| |
| - //System.out.println("BKDW.merge segs=" + readers.size()); |
| - |
| // Catch user silliness: |
| if (heapPointWriter == null && tempInput == null) { |
| throw new IllegalStateException("already finished"); |
| @@ -433,7 +427,7 @@ |
| } else { |
| docMap = docMaps.get(i); |
| } |
| - MergeReader reader = new MergeReader(bkd, docMap, docIDBases.get(i)); |
| + MergeReader reader = new MergeReader(bkd, docMap); |
| if (reader.next()) { |
| queue.add(reader); |
| } |
| @@ -468,7 +462,7 @@ |
| // System.out.println("iter reader=" + reader); |
| |
| // NOTE: doesn't work with subclasses (e.g. SimpleText!) |
| - int docID = reader.docIDBase + reader.docID; |
| + int docID = reader.docID; |
| leafBlockDocIDs[leafCount] = docID; |
| System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength); |
| docsSeen.set(docID); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec indexsort/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec |
| --- trunk/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-01-24 13:09:49.940989953 -0500 |
| +++ indexsort/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-05-10 05:44:23.748471119 -0400 |
| @@ -13,4 +13,4 @@ |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| -org.apache.lucene.codecs.lucene60.Lucene60Codec |
| +org.apache.lucene.codecs.lucene62.Lucene62Codec |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java 2016-02-16 11:18:34.701021815 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,39 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.codecs.lucene50; |
| - |
| - |
| -import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.index.BaseSegmentInfoFormatTestCase; |
| -import org.apache.lucene.util.TestUtil; |
| -import org.apache.lucene.util.Version; |
| - |
| -/** |
| - * Tests Lucene50SegmentInfoFormat |
| - */ |
| -public class TestLucene50SegmentInfoFormat extends BaseSegmentInfoFormatTestCase { |
| - |
| - @Override |
| - protected Version[] getVersions() { |
| - return new Version[] { Version.LATEST }; |
| - } |
| - |
| - @Override |
| - protected Codec getCodec() { |
| - return TestUtil.getDefaultCodec(); |
| - } |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java 2016-03-02 04:32:40.439807336 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -19,7 +19,7 @@ |
| |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; |
| -import org.apache.lucene.codecs.lucene60.Lucene60Codec; |
| +import org.apache.lucene.codecs.lucene62.Lucene62Codec; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.StoredField; |
| import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; |
| @@ -33,7 +33,7 @@ |
| public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase { |
| @Override |
| protected Codec getCodec() { |
| - return new Lucene60Codec(Mode.BEST_COMPRESSION); |
| + return new Lucene62Codec(Mode.BEST_COMPRESSION); |
| } |
| |
| /** |
| @@ -44,7 +44,7 @@ |
| Directory dir = newDirectory(); |
| for (int i = 0; i < 10; i++) { |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| - iwc.setCodec(new Lucene60Codec(RandomPicks.randomFrom(random(), Mode.values()))); |
| + iwc.setCodec(new Lucene62Codec(RandomPicks.randomFrom(random(), Mode.values()))); |
| IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig()); |
| Document doc = new Document(); |
| doc.add(new StoredField("field1", "value1")); |
| @@ -71,7 +71,7 @@ |
| |
| public void testInvalidOptions() throws Exception { |
| expectThrows(NullPointerException.class, () -> { |
| - new Lucene60Codec(null); |
| + new Lucene62Codec(null); |
| }); |
| |
| expectThrows(NullPointerException.class, () -> { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java 2016-02-16 11:18:34.701021815 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -18,14 +18,14 @@ |
| |
| |
| import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.codecs.lucene60.Lucene60Codec; |
| +import org.apache.lucene.codecs.lucene62.Lucene62Codec; |
| import org.apache.lucene.index.BaseNormsFormatTestCase; |
| |
| /** |
| * Tests Lucene53NormsFormat |
| */ |
| public class TestLucene53NormsFormat extends BaseNormsFormatTestCase { |
| - private final Codec codec = new Lucene60Codec(); |
| + private final Codec codec = new Lucene62Codec(); |
| |
| @Override |
| protected Codec getCodec() { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -0,0 +1,39 @@ |
| +package org.apache.lucene.codecs.lucene62; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.index.BaseSegmentInfoFormatTestCase; |
| +import org.apache.lucene.util.TestUtil; |
| +import org.apache.lucene.util.Version; |
| + |
| +/** |
| + * Tests Lucene62SegmentInfoFormat |
| + */ |
| +public class TestLucene62SegmentInfoFormat extends BaseSegmentInfoFormatTestCase { |
| + |
| + @Override |
| + protected Version[] getVersions() { |
| + return new Version[] { Version.LATEST }; |
| + } |
| + |
| + @Override |
| + protected Codec getCodec() { |
| + return TestUtil.getDefaultCodec(); |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java 2016-04-24 06:00:27.689895636 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java 2016-05-10 05:44:23.748471119 -0400 |
| @@ -24,8 +24,6 @@ |
| import org.apache.lucene.codecs.PointsFormat; |
| import org.apache.lucene.codecs.PointsReader; |
| import org.apache.lucene.codecs.PointsWriter; |
| -import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; |
| -import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.LongPoint; |
| import org.apache.lucene.search.IndexSearcher; |
| @@ -143,6 +141,6 @@ |
| } |
| |
| private static Codec getCodec() { |
| - return Codec.forName("Lucene60"); |
| + return Codec.forName("Lucene62"); |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java 2016-02-16 11:18:34.705021816 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -53,7 +53,7 @@ |
| // disk (but, should run successfully). Best to run w/ |
| // -Dtests.codec=<current codec>, and w/ plenty of RAM, eg: |
| // |
| -// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene60 -Dtestcase=Test2BTerms |
| +// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene62 -Dtestcase=Test2BTerms |
| // |
| @SuppressCodecs({ "SimpleText", "Memory", "Direct" }) |
| @Monster("very slow, use 5g minimum heap") |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java 2016-03-02 04:32:40.443807336 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -39,6 +39,8 @@ |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.PhraseQuery; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.BaseDirectoryWrapper; |
| import org.apache.lucene.store.Directory; |
| @@ -1281,4 +1283,53 @@ |
| w2.close(); |
| IOUtils.close(src, dest); |
| } |
| + |
| + public void testIllegalIndexSortChange1() throws Exception { |
| + Directory dir1 = newDirectory(); |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); |
| + RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1); |
| + w1.addDocument(new Document()); |
| + w1.commit(); |
| + w1.addDocument(new Document()); |
| + w1.commit(); |
| + // so the index sort is in fact burned into the index: |
| + w1.forceMerge(1); |
| + w1.close(); |
| + |
| + Directory dir2 = newDirectory(); |
| + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING))); |
| + RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2); |
| + String message = expectThrows(IllegalArgumentException.class, () -> { |
| + w2.addIndexes(dir1); |
| + }).getMessage(); |
| + assertEquals("cannot change index sort from <int: \"foo\"> to <string: \"foo\">", message); |
| + IOUtils.close(dir1, w2, dir2); |
| + } |
| + |
| + public void testIllegalIndexSortChange2() throws Exception { |
| + Directory dir1 = newDirectory(); |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); |
| + RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1); |
| + w1.addDocument(new Document()); |
| + w1.commit(); |
| + w1.addDocument(new Document()); |
| + w1.commit(); |
| + // so the index sort is in fact burned into the index: |
| + w1.forceMerge(1); |
| + w1.close(); |
| + |
| + Directory dir2 = newDirectory(); |
| + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING))); |
| + RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2); |
| + IndexReader r1 = DirectoryReader.open(dir1); |
| + String message = expectThrows(IllegalArgumentException.class, () -> { |
| + w2.addIndexes((SegmentReader) getOnlyLeafReader(r1)); |
| + }).getMessage(); |
| + assertEquals("cannot change index sort from <int: \"foo\"> to <string: \"foo\">", message); |
| + IOUtils.close(r1, dir1, w2, dir2); |
| + } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java 2016-02-16 11:18:34.705021816 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -222,7 +222,7 @@ |
| final FieldInfos fieldInfos = builder.finish(); |
| final Directory dir = newDirectory(); |
| Codec codec = Codec.getDefault(); |
| - final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| |
| this.write(si, fieldInfos, dir, fields); |
| final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); |
| @@ -279,7 +279,7 @@ |
| } |
| |
| Codec codec = Codec.getDefault(); |
| - final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| this.write(si, fieldInfos, dir, fields); |
| |
| if (VERBOSE) { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java 2016-03-13 05:38:07.383183845 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -503,7 +503,7 @@ |
| |
| class ReindexingOneMerge extends OneMerge { |
| |
| - List<LeafReader> parallelReaders; |
| + final List<ParallelLeafReader> parallelReaders = new ArrayList<>(); |
| final long schemaGen; |
| |
| ReindexingOneMerge(List<SegmentCommitInfo> segments) { |
| @@ -519,33 +519,23 @@ |
| } |
| |
| @Override |
| - public List<CodecReader> getMergeReaders() throws IOException { |
| - if (parallelReaders == null) { |
| - parallelReaders = new ArrayList<>(); |
| - for (CodecReader reader : super.getMergeReaders()) { |
| - parallelReaders.add(getCurrentReader((SegmentReader)reader, schemaGen)); |
| - } |
| - } |
| - |
| - // TODO: fix ParallelLeafReader, if this is a good use case |
| - List<CodecReader> mergeReaders = new ArrayList<>(); |
| - for (LeafReader reader : parallelReaders) { |
| - mergeReaders.add(SlowCodecReaderWrapper.wrap(reader)); |
| + public CodecReader wrapForMerge(CodecReader reader) throws IOException { |
| + LeafReader wrapped = getCurrentReader((SegmentReader)reader, schemaGen); |
| + if (wrapped instanceof ParallelLeafReader) { |
| + parallelReaders.add((ParallelLeafReader) wrapped); |
| } |
| - return mergeReaders; |
| + return SlowCodecReaderWrapper.wrap(wrapped); |
| } |
| |
| @Override |
| public void mergeFinished() throws IOException { |
| Throwable th = null; |
| - for(LeafReader r : parallelReaders) { |
| - if (r instanceof ParallelLeafReader) { |
| - try { |
| - r.decRef(); |
| - } catch (Throwable t) { |
| - if (th == null) { |
| - th = t; |
| - } |
| + for (ParallelLeafReader r : parallelReaders) { |
| + try { |
| + r.decRef(); |
| + } catch (Throwable t) { |
| + if (th == null) { |
| + th = t; |
| } |
| } |
| } |
| @@ -561,10 +551,6 @@ |
| super.setMergeInfo(info); |
| } |
| |
| - @Override |
| - public MergePolicy.DocMap getDocMap(final MergeState mergeState) { |
| - return super.getDocMap(mergeState); |
| - } |
| } |
| |
| class ReindexingMergeSpecification extends MergeSpecification { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -0,0 +1,205 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.index; |
| + |
| +import java.util.ArrayList; |
| +import java.util.Collections; |
| +import java.util.List; |
| + |
| +import org.apache.lucene.util.Bits; |
| +import org.apache.lucene.util.FixedBitSet; |
| +import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util.TestUtil; |
| + |
| +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| + |
| +public class TestDocIDMerger extends LuceneTestCase { |
| + |
| + private static class TestSubUnsorted extends DocIDMerger.Sub { |
| + private int docID = -1; |
| + final int valueStart; |
| + final int maxDoc; |
| + |
| + public TestSubUnsorted(MergeState.DocMap docMap, int maxDoc, int valueStart) { |
| + super(docMap); |
| + this.maxDoc = maxDoc; |
| + this.valueStart = valueStart; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + |
| + public int getValue() { |
| + return valueStart + docID; |
| + } |
| + } |
| + |
| + public void testNoSort() throws Exception { |
| + |
| + int subCount = TestUtil.nextInt(random(), 1, 20); |
| + List<TestSubUnsorted> subs = new ArrayList<>(); |
| + int valueStart = 0; |
| + for(int i=0;i<subCount;i++) { |
| + int maxDoc = TestUtil.nextInt(random(), 1, 1000); |
| + final int docBase = valueStart; |
| + subs.add(new TestSubUnsorted(new MergeState.DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + return docBase + docID; |
| + } |
| + }, maxDoc, valueStart)); |
| + valueStart += maxDoc; |
| + } |
| + |
| + DocIDMerger<TestSubUnsorted> merger = new DocIDMerger<>(subs, false); |
| + |
| + int count = 0; |
| + while (true) { |
| + TestSubUnsorted sub = merger.next(); |
| + if (sub == null) { |
| + break; |
| + } |
| + assertEquals(count, sub.mappedDocID); |
| + assertEquals(count, sub.getValue()); |
| + count++; |
| + } |
| + |
| + assertEquals(valueStart, count); |
| + } |
| + |
| + private static class TestSubSorted extends DocIDMerger.Sub { |
| + private int docID = -1; |
| + final int maxDoc; |
| + final int index; |
| + |
| + public TestSubSorted(MergeState.DocMap docMap, int maxDoc, int index) { |
| + super(docMap); |
| + this.maxDoc = maxDoc; |
| + this.index = index; |
| + } |
| + |
| + @Override |
| + public int nextDoc() { |
| + docID++; |
| + if (docID == maxDoc) { |
| + return NO_MORE_DOCS; |
| + } else { |
| + return docID; |
| + } |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return "TestSubSorted(index=" + index + ", mappedDocID=" + mappedDocID+ ")"; |
| + } |
| + } |
| + |
| + public void testWithSort() throws Exception { |
| + |
| + int subCount = TestUtil.nextInt(random(), 1, 20); |
| + List<int[]> oldToNew = new ArrayList<>(); |
| + // how many docs we've written to each sub: |
| + List<Integer> uptos = new ArrayList<>(); |
| + int totDocCount = 0; |
| + for(int i=0;i<subCount;i++) { |
| + int maxDoc = TestUtil.nextInt(random(), 1, 1000); |
| + uptos.add(0); |
| + oldToNew.add(new int[maxDoc]); |
| + totDocCount += maxDoc; |
| + } |
| + |
| + List<int[]> completedSubs = new ArrayList<>(); |
| + |
| + // randomly distribute target docIDs into the segments: |
| + for(int docID=0;docID<totDocCount;docID++) { |
| + int sub = random().nextInt(oldToNew.size()); |
| + int upto = uptos.get(sub); |
| + int[] subDocs = oldToNew.get(sub); |
| + subDocs[upto] = docID; |
| + upto++; |
| + if (upto == subDocs.length) { |
| + completedSubs.add(subDocs); |
| + oldToNew.remove(sub); |
| + uptos.remove(sub); |
| + } else { |
| + uptos.set(sub, upto); |
| + } |
| + } |
| + assertEquals(0, oldToNew.size()); |
| + |
| + // sometimes do some deletions: |
| + final FixedBitSet liveDocs; |
| + if (random().nextBoolean()) { |
| + liveDocs = new FixedBitSet(totDocCount); |
| + liveDocs.set(0, totDocCount); |
| + int deleteAttemptCount = TestUtil.nextInt(random(), 1, totDocCount); |
| + for(int i=0;i<deleteAttemptCount;i++) { |
| + liveDocs.clear(random().nextInt(totDocCount)); |
| + } |
| + } else { |
| + liveDocs = null; |
| + } |
| + |
| + List<TestSubSorted> subs = new ArrayList<>(); |
| + for(int i=0;i<subCount;i++) { |
| + final int[] docMap = completedSubs.get(i); |
| + subs.add(new TestSubSorted(new MergeState.DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + int mapped = docMap[docID]; |
| + if (liveDocs == null || liveDocs.get(mapped)) { |
| + return mapped; |
| + } else { |
| + return -1; |
| + } |
| + } |
| + }, docMap.length, i)); |
| + } |
| + |
| + DocIDMerger<TestSubSorted> merger = new DocIDMerger<>(subs, true); |
| + |
| + int count = 0; |
| + while (true) { |
| + TestSubSorted sub = merger.next(); |
| + if (sub == null) { |
| + break; |
| + } |
| + if (liveDocs != null) { |
| + count = liveDocs.nextSetBit(count); |
| + } |
| + assertEquals(count, sub.mappedDocID); |
| + count++; |
| + } |
| + |
| + if (liveDocs != null) { |
| + if (count < totDocCount) { |
| + assertEquals(NO_MORE_DOCS, liveDocs.nextSetBit(count)); |
| + } else { |
| + assertEquals(totDocCount, count); |
| + } |
| + } else { |
| + assertEquals(totDocCount, count); |
| + } |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestDoc.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java 2016-02-16 11:18:34.705021816 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestDoc.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -218,7 +218,7 @@ |
| |
| final Codec codec = Codec.getDefault(); |
| TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); |
| - final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| |
| SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), |
| si, InfoStream.getDefault(), trackingDir, |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -0,0 +1,1358 @@ |
| +package org.apache.lucene.index; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Arrays; |
| +import java.util.Collections; |
| +import java.util.HashMap; |
| +import java.util.HashSet; |
| +import java.util.List; |
| +import java.util.Map; |
| +import java.util.Random; |
| +import java.util.Set; |
| +import java.util.concurrent.CountDownLatch; |
| +import java.util.concurrent.atomic.AtomicInteger; |
| + |
| +import org.apache.lucene.analysis.MockAnalyzer; |
| +import org.apache.lucene.analysis.TokenStream; |
| +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| +import org.apache.lucene.document.BinaryDocValuesField; |
| +import org.apache.lucene.document.BinaryPoint; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.DoubleDocValuesField; |
| +import org.apache.lucene.document.Field.Store; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.FieldType; |
| +import org.apache.lucene.document.FloatDocValuesField; |
| +import org.apache.lucene.document.NumericDocValuesField; |
| +import org.apache.lucene.document.SortedDocValuesField; |
| +import org.apache.lucene.document.SortedNumericDocValuesField; |
| +import org.apache.lucene.document.SortedSetDocValuesField; |
| +import org.apache.lucene.document.StoredField; |
| +import org.apache.lucene.document.StringField; |
| +import org.apache.lucene.document.TextField; |
| +import org.apache.lucene.index.PointValues.IntersectVisitor; |
| +import org.apache.lucene.index.PointValues.Relation; |
| +import org.apache.lucene.index.TermsEnum.SeekStatus; |
| +import org.apache.lucene.search.CollectionStatistics; |
| +import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.EarlyTerminatingSortingCollector; |
| +import org.apache.lucene.search.FieldDoc; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.MatchAllDocsQuery; |
| +import org.apache.lucene.search.Query; |
| +import org.apache.lucene.search.ScoreDoc; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.search.TermStatistics; |
| +import org.apache.lucene.search.TopDocs; |
| +import org.apache.lucene.search.TopFieldCollector; |
| +import org.apache.lucene.search.similarities.Similarity; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.util.Bits; |
| +import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.FixedBitSet; |
| +import org.apache.lucene.util.IOUtils; |
| +import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util.NumericUtils; |
| +import org.apache.lucene.util.TestUtil; |
| +import org.junit.AfterClass; |
| +import org.junit.BeforeClass; |
| + |
| +public class TestIndexSorting extends LuceneTestCase { |
| + |
| + public void testBasicString() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.STRING)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("zzz"))); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("aaa"))); |
| + w.addDocument(doc); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("mmm"))); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + SortedDocValues values = leaf.getSortedDocValues("foo"); |
| + assertEquals("aaa", values.get(0).utf8ToString()); |
| + assertEquals("mmm", values.get(1).utf8ToString()); |
| + assertEquals("zzz", values.get(2).utf8ToString()); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingStringFirst() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.STRING); |
| + sortField.setMissingValue(SortField.STRING_FIRST); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("zzz"))); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("mmm"))); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + SortedDocValues values = leaf.getSortedDocValues("foo"); |
| + assertEquals(-1, values.getOrd(0)); |
| + assertEquals("mmm", values.get(1).utf8ToString()); |
| + assertEquals("zzz", values.get(2).utf8ToString()); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingStringLast() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.STRING); |
| + sortField.setMissingValue(SortField.STRING_LAST); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("zzz"))); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new SortedDocValuesField("foo", new BytesRef("mmm"))); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + SortedDocValues values = leaf.getSortedDocValues("foo"); |
| + assertEquals("mmm", values.get(0).utf8ToString()); |
| + assertEquals("zzz", values.get(1).utf8ToString()); |
| + assertEquals(-1, values.getOrd(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testBasicLong() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 18)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", -1)); |
| + w.addDocument(doc); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 7)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + assertEquals(-1, values.get(0)); |
| + assertEquals(7, values.get(1)); |
| + assertEquals(18, values.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingLongFirst() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.LONG); |
| + sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE)); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 18)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 7)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(0, values.get(0)); |
| + assertFalse(docsWithField.get(0)); |
| + assertEquals(7, values.get(1)); |
| + assertEquals(18, values.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingLongLast() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.LONG); |
| + sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE)); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 18)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 7)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(7, values.get(0)); |
| + assertEquals(18, values.get(1)); |
| + assertEquals(0, values.get(2)); |
| + assertFalse(docsWithField.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testBasicInt() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 18)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", -1)); |
| + w.addDocument(doc); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 7)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + assertEquals(-1, values.get(0)); |
| + assertEquals(7, values.get(1)); |
| + assertEquals(18, values.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingIntFirst() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.INT); |
| + sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE)); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 18)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 7)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(0, values.get(0)); |
| + assertFalse(docsWithField.get(0)); |
| + assertEquals(7, values.get(1)); |
| + assertEquals(18, values.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingIntLast() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.INT); |
| + sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE)); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 18)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", 7)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(7, values.get(0)); |
| + assertEquals(18, values.get(1)); |
| + assertEquals(0, values.get(2)); |
| + assertFalse(docsWithField.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testBasicDouble() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", 18.0)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", -1.0)); |
| + w.addDocument(doc); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", 7.0)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0); |
| + assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0); |
| + assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingDoubleFirst() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.DOUBLE); |
| + sortField.setMissingValue(Double.NEGATIVE_INFINITY); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", 18.0)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", 7.0)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0); |
| + assertFalse(docsWithField.get(0)); |
| + assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0); |
| + assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingDoubleLast() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.DOUBLE); |
| + sortField.setMissingValue(Double.POSITIVE_INFINITY); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", 18.0)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new DoubleDocValuesField("foo", 7.0)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0); |
| + assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0); |
| + assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0); |
| + assertFalse(docsWithField.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testBasicFloat() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", 18.0f)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", -1.0f)); |
| + w.addDocument(doc); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", 7.0f)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f); |
| + assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f); |
| + assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingFloatFirst() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.FLOAT); |
| + sortField.setMissingValue(Float.NEGATIVE_INFINITY); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", 18.0f)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", 7.0f)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f); |
| + assertFalse(docsWithField.get(0)); |
| + assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f); |
| + assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testMissingFloatLast() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + SortField sortField = new SortField("foo", SortField.Type.FLOAT); |
| + sortField.setMissingValue(Float.POSITIVE_INFINITY); |
| + Sort indexSort = new Sort(sortField); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Document doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", 18.0f)); |
| + w.addDocument(doc); |
| + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: |
| + w.commit(); |
| + |
| + // missing |
| + w.addDocument(new Document()); |
| + w.commit(); |
| + |
| + doc = new Document(); |
| + doc.add(new FloatDocValuesField("foo", 7.0f)); |
| + w.addDocument(doc); |
| + w.forceMerge(1); |
| + |
| + DirectoryReader r = DirectoryReader.open(w); |
| + LeafReader leaf = getOnlyLeafReader(r); |
| + assertEquals(3, leaf.maxDoc()); |
| + NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + Bits docsWithField = leaf.getDocsWithField("foo"); |
| + assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f); |
| + assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f); |
| + assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f); |
| + assertFalse(docsWithField.get(2)); |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testRandom1() throws IOException { |
| + boolean withDeletes = random().nextBoolean(); |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + final int numDocs = atLeast(1000); |
| + final FixedBitSet deleted = new FixedBitSet(numDocs); |
| + for (int i = 0; i < numDocs; ++i) { |
| + Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("foo", random().nextInt(20))); |
| + doc.add(new StringField("id", Integer.toString(i), Store.YES)); |
| + doc.add(new NumericDocValuesField("id", i)); |
| + w.addDocument(doc); |
| + if (random().nextInt(5) == 0) { |
| + w.getReader().close(); |
| + } else if (random().nextInt(30) == 0) { |
| + w.forceMerge(2); |
| + } else if (random().nextInt(4) == 0) { |
| + final int id = TestUtil.nextInt(random(), 0, i); |
| + deleted.set(id); |
| + w.deleteDocuments(new Term("id", Integer.toString(id))); |
| + } |
| + } |
| + |
| + // Check that segments are sorted |
| + DirectoryReader reader = w.getReader(); |
| + for (LeafReaderContext ctx : reader.leaves()) { |
| + final SegmentReader leaf = (SegmentReader) ctx.reader(); |
| + SegmentInfo info = leaf.getSegmentInfo().info; |
| + switch (info.getDiagnostics().get(IndexWriter.SOURCE)) { |
| + case IndexWriter.SOURCE_FLUSH: |
| + assertNull(info.getIndexSort()); |
| + break; |
| + case IndexWriter.SOURCE_MERGE: |
| + assertEquals(indexSort, info.getIndexSort()); |
| + final NumericDocValues values = leaf.getNumericDocValues("foo"); |
| + long previous = Long.MIN_VALUE; |
| + for (int i = 0; i < leaf.maxDoc(); ++i) { |
| + final long value = values.get(i); |
| + assertTrue(value >= previous); |
| + previous = value; |
| + } |
| + break; |
| + default: |
| + fail(); |
| + } |
| + } |
| + |
| + // Now check that the index is consistent |
| + IndexSearcher searcher = newSearcher(reader); |
| + for (int i = 0; i < numDocs; ++i) { |
| + TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i))); |
| + final TopDocs topDocs = searcher.search(termQuery, 1); |
| + if (deleted.get(i)) { |
| + assertEquals(0, topDocs.totalHits); |
| + } else { |
| + assertEquals(1, topDocs.totalHits); |
| + assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc)); |
| + Document document = reader.document(topDocs.scoreDocs[0].doc); |
| + assertEquals(Integer.toString(i), document.get("id")); |
| + } |
| + } |
| + |
| + reader.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + static class UpdateRunnable implements Runnable { |
| + |
| + private final int numDocs; |
| + private final Random random; |
| + private final AtomicInteger updateCount; |
| + private final IndexWriter w; |
| + private final Map<Integer, Long> values; |
| + private final CountDownLatch latch; |
| + |
| + UpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map<Integer, Long> values) { |
| + this.numDocs = numDocs; |
| + this.random = random; |
| + this.latch = latch; |
| + this.updateCount = updateCount; |
| + this.w = w; |
| + this.values = values; |
| + } |
| + |
| + @Override |
| + public void run() { |
| + try { |
| + latch.await(); |
| + while (updateCount.decrementAndGet() >= 0) { |
| + final int id = random.nextInt(numDocs); |
| + final long value = random.nextInt(20); |
| + Document doc = new Document(); |
| + doc.add(new StringField("id", Integer.toString(id), Store.NO)); |
| + doc.add(new NumericDocValuesField("foo", value)); |
| + |
| + synchronized (values) { |
| + w.updateDocument(new Term("id", Integer.toString(id)), doc); |
| + values.put(id, value); |
| + } |
| + |
| + switch (random.nextInt(10)) { |
| + case 0: |
| + case 1: |
| + // reopen |
| + DirectoryReader.open(w).close(); |
| + break; |
| + case 2: |
| + w.forceMerge(3); |
| + break; |
| + } |
| + } |
| + } catch (IOException | InterruptedException e) { |
| + throw new RuntimeException(e); |
| + } |
| + } |
| + |
| + } |
| + |
| + // There is tricky logic to resolve deletes that happened while merging |
| + public void testConcurrentUpdates() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Map<Integer, Long> values = new HashMap<>(); |
| + |
| + final int numDocs = atLeast(100); |
| + Thread[] threads = new Thread[2]; |
| + final AtomicInteger updateCount = new AtomicInteger(atLeast(1000)); |
| + final CountDownLatch latch = new CountDownLatch(1); |
| + for (int i = 0; i < threads.length; ++i) { |
| + Random r = new Random(random().nextLong()); |
| + threads[i] = new Thread(new UpdateRunnable(numDocs, r, latch, updateCount, w, values)); |
| + } |
| + for (Thread thread : threads) { |
| + thread.start(); |
| + } |
| + latch.countDown(); |
| + for (Thread thread : threads) { |
| + thread.join(); |
| + } |
| + w.forceMerge(1); |
| + DirectoryReader reader = DirectoryReader.open(w); |
| + IndexSearcher searcher = newSearcher(reader); |
| + for (int i = 0; i < numDocs; ++i) { |
| + final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); |
| + if (values.containsKey(i) == false) { |
| + assertEquals(0, topDocs.totalHits); |
| + } else { |
| + assertEquals(1, topDocs.totalHits); |
| + assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc)); |
| + } |
| + } |
| + reader.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + static class DVUpdateRunnable implements Runnable { |
| + |
| + private final int numDocs; |
| + private final Random random; |
| + private final AtomicInteger updateCount; |
| + private final IndexWriter w; |
| + private final Map<Integer, Long> values; |
| + private final CountDownLatch latch; |
| + |
| + DVUpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map<Integer, Long> values) { |
| + this.numDocs = numDocs; |
| + this.random = random; |
| + this.latch = latch; |
| + this.updateCount = updateCount; |
| + this.w = w; |
| + this.values = values; |
| + } |
| + |
| + @Override |
| + public void run() { |
| + try { |
| + latch.await(); |
| + while (updateCount.decrementAndGet() >= 0) { |
| + final int id = random.nextInt(numDocs); |
| + final long value = random.nextInt(20); |
| + |
| + synchronized (values) { |
| + w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value)); |
| + values.put(id, value); |
| + } |
| + |
| + switch (random.nextInt(10)) { |
| + case 0: |
| + case 1: |
| + // reopen |
| + DirectoryReader.open(w).close(); |
| + break; |
| + case 2: |
| + w.forceMerge(3); |
| + break; |
| + } |
| + } |
| + } catch (IOException | InterruptedException e) { |
| + throw new RuntimeException(e); |
| + } |
| + } |
| + |
| + } |
| + |
| + // There is tricky logic to resolve dv updates that happened while merging |
| + public void testConcurrentDVUpdates() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + Map<Integer, Long> values = new HashMap<>(); |
| + |
| + final int numDocs = atLeast(100); |
| + for (int i = 0; i < numDocs; ++i) { |
| + Document doc = new Document(); |
| + doc.add(new StringField("id", Integer.toString(i), Store.NO)); |
| + doc.add(new NumericDocValuesField("foo", -1)); |
| + w.addDocument(doc); |
| + values.put(i, -1L); |
| + } |
| + Thread[] threads = new Thread[2]; |
| + final AtomicInteger updateCount = new AtomicInteger(atLeast(1000)); |
| + final CountDownLatch latch = new CountDownLatch(1); |
| + for (int i = 0; i < threads.length; ++i) { |
| + Random r = new Random(random().nextLong()); |
| + threads[i] = new Thread(new DVUpdateRunnable(numDocs, r, latch, updateCount, w, values)); |
| + } |
| + for (Thread thread : threads) { |
| + thread.start(); |
| + } |
| + latch.countDown(); |
| + for (Thread thread : threads) { |
| + thread.join(); |
| + } |
| + w.forceMerge(1); |
| + DirectoryReader reader = DirectoryReader.open(w); |
| + IndexSearcher searcher = newSearcher(reader); |
| + for (int i = 0; i < numDocs; ++i) { |
| + final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); |
| + assertEquals(1, topDocs.totalHits); |
| + assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc)); |
| + } |
| + reader.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception { |
| + Directory dir = newDirectory(); |
| + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(); |
| + if (random().nextBoolean()) { |
| + iwc1.setIndexSort(indexSort); |
| + } |
| + RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| + final int numDocs = atLeast(100); |
| + for (int i = 0; i < numDocs; ++i) { |
| + Document doc = new Document(); |
| + doc.add(new StringField("id", Integer.toString(i), Store.NO)); |
| + doc.add(new NumericDocValuesField("foo", random().nextInt(20))); |
| + w.addDocument(doc); |
| + } |
| + if (withDeletes) { |
| + for (int i = random().nextInt(5); i < numDocs; i += TestUtil.nextInt(random(), 1, 5)) { |
| + w.deleteDocuments(new Term("id", Integer.toString(i))); |
| + } |
| + } |
| + if (random().nextBoolean()) { |
| + w.forceMerge(1); |
| + } |
| + final IndexReader reader = w.getReader(); |
| + w.close(); |
| + |
| + Directory dir2 = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + iwc.setIndexSort(indexSort); |
| + IndexWriter w2 = new IndexWriter(dir2, iwc); |
| + |
| + if (useReaders) { |
| + CodecReader[] codecReaders = new CodecReader[reader.leaves().size()]; |
| + for (int i = 0; i < codecReaders.length; ++i) { |
| + codecReaders[i] = (CodecReader) reader.leaves().get(i).reader(); |
| + } |
| + w2.addIndexes(codecReaders); |
| + } else { |
| + w2.addIndexes(dir); |
| + } |
| + final IndexReader reader2 = w2.getReader(); |
| + final IndexSearcher searcher = newSearcher(reader); |
| + final IndexSearcher searcher2 = newSearcher(reader2); |
| + for (int i = 0; i < numDocs; ++i) { |
| + Query query = new TermQuery(new Term("id", Integer.toString(i))); |
| + final TopDocs topDocs = searcher.search(query, 1); |
| + final TopDocs topDocs2 = searcher2.search(query, 1); |
| + assertEquals(topDocs.totalHits, topDocs2.totalHits); |
| + if (topDocs.totalHits == 1) { |
| + assertEquals( |
| + MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc), |
| + MultiDocValues.getNumericValues(reader2, "foo").get(topDocs2.scoreDocs[0].doc)); |
| + } |
| + } |
| + |
| + IOUtils.close(reader, reader2, w2, dir, dir2); |
| + } |
| + |
| + public void testAddIndexes() throws Exception { |
| + testAddIndexes(false, true); |
| + } |
| + |
| + public void testAddIndexesWithDeletions() throws Exception { |
| + testAddIndexes(true, true); |
| + } |
| + |
| + public void testAddIndexesWithDirectory() throws Exception { |
| + testAddIndexes(false, false); |
| + } |
| + |
| + public void testAddIndexesWithDeletionsAndDirectory() throws Exception { |
| + testAddIndexes(true, false); |
| + } |
| + |
| + public void testBadSort() throws Exception { |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { |
| + iwc.setIndexSort(Sort.RELEVANCE); |
| + }); |
| + assertEquals("invalid SortField type: must be one of [STRING, INT, FLOAT, LONG, DOUBLE] but got: <score>", expected.getMessage()); |
| + } |
| + |
| + // you can't change the index sort on an existing index: |
| + public void testIllegalChangeSort() throws Exception { |
| + final Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| + iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG))); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + w.addDocument(new Document()); |
| + DirectoryReader.open(w).close(); |
| + w.addDocument(new Document()); |
| + w.forceMerge(1); |
| + w.close(); |
| + |
| + final IndexWriterConfig iwc2 = new IndexWriterConfig(new MockAnalyzer(random())); |
| + iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.LONG))); |
| + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { |
| + new IndexWriter(dir, iwc2); |
| + }); |
| + String message = e.getMessage(); |
| + assertTrue(message.contains("cannot change previous indexSort=<long: \"foo\">")); |
| + assertTrue(message.contains("to new indexSort=<long: \"bar\">")); |
| + dir.close(); |
| + } |
| + |
| + static final class NormsSimilarity extends Similarity { |
| + |
| + private final Similarity in; |
| + |
| + public NormsSimilarity(Similarity in) { |
| + this.in = in; |
| + } |
| + |
| + @Override |
| + public long computeNorm(FieldInvertState state) { |
| + if (state.getName().equals("norms")) { |
| + return Float.floatToIntBits(state.getBoost()); |
| + } else { |
| + return in.computeNorm(state); |
| + } |
| + } |
| + |
| + @Override |
| + public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { |
| + return in.computeWeight(collectionStats, termStats); |
| + } |
| + |
| + @Override |
| + public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { |
| + return in.simScorer(weight, context); |
| + } |
| + |
| + } |
| + |
| + static final class PositionsTokenStream extends TokenStream { |
| + |
| + private final CharTermAttribute term; |
| + private final PayloadAttribute payload; |
| + private final OffsetAttribute offset; |
| + |
| + private int pos, off; |
| + |
| + public PositionsTokenStream() { |
| + term = addAttribute(CharTermAttribute.class); |
| + payload = addAttribute(PayloadAttribute.class); |
| + offset = addAttribute(OffsetAttribute.class); |
| + } |
| + |
| + @Override |
| + public boolean incrementToken() throws IOException { |
| + if (pos == 0) { |
| + return false; |
| + } |
| + |
| + clearAttributes(); |
| + term.append("#all#"); |
| + payload.setPayload(new BytesRef(Integer.toString(pos))); |
| + offset.setOffset(off, off); |
| + --pos; |
| + ++off; |
| + return true; |
| + } |
| + |
| + void setId(int id) { |
| + pos = id / 10 + 1; |
| + off = 0; |
| + } |
| + } |
| + |
| + public void testRandom2() throws Exception { |
| + int numDocs = atLeast(100); |
| + |
| + FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); |
| + POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); |
| + POSITIONS_TYPE.freeze(); |
| + |
| + FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); |
| + TERM_VECTORS_TYPE.setStoreTermVectors(true); |
| + TERM_VECTORS_TYPE.freeze(); |
| + |
| + List<Document> docs = new ArrayList<>(); |
| + for (int i=0;i<numDocs;i++) { |
| + int id = i * 10; |
| + Document doc = new Document(); |
| + doc.add(new StringField("id", Integer.toString(id), Store.YES)); |
| + doc.add(new StringField("docs", "#all#", Store.NO)); |
| + PositionsTokenStream positions = new PositionsTokenStream(); |
| + positions.setId(id); |
| + doc.add(new Field("positions", positions, POSITIONS_TYPE)); |
| + doc.add(new NumericDocValuesField("numeric", id)); |
| + TextField norms = new TextField("norms", Integer.toString(id), Store.NO); |
| + norms.setBoost(Float.intBitsToFloat(id)); |
| + doc.add(norms); |
| + doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id)))); |
| + doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id)))); |
| + doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id)))); |
| + doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id + 1)))); |
| + doc.add(new SortedNumericDocValuesField("sorted_numeric", id)); |
| + doc.add(new SortedNumericDocValuesField("sorted_numeric", id + 1)); |
| + doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE)); |
| + byte[] bytes = new byte[4]; |
| + NumericUtils.intToSortableBytes(id, bytes, 0); |
| + doc.add(new BinaryPoint("points", bytes)); |
| + docs.add(doc); |
| + } |
| + |
| + // Must use the same seed for both RandomIndexWriters so they behave identically |
| + long seed = random().nextLong(); |
| + |
| + // We add document alread in ID order for the first writer: |
| + Directory dir1 = newFSDirectory(createTempDir()); |
| + |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field |
| + // preserve docIDs |
| + iwc1.setMergePolicy(newLogMergePolicy()); |
| + if (VERBOSE) { |
| + System.out.println("TEST: now index pre-sorted"); |
| + } |
| + RandomIndexWriter w1 = new RandomIndexWriter(new Random(seed), dir1, iwc1); |
| + for(Document doc : docs) { |
| + ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id"))); |
| + w1.addDocument(doc); |
| + } |
| + |
| + // We shuffle documents, but set index sort, for the second writer: |
| + Directory dir2 = newFSDirectory(createTempDir()); |
| + |
| + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field |
| + |
| + Sort sort = new Sort(new SortField("numeric", SortField.Type.INT)); |
| + iwc2.setIndexSort(sort); |
| + |
| + Collections.shuffle(docs, random()); |
| + if (VERBOSE) { |
| + System.out.println("TEST: now index with index-time sorting"); |
| + } |
| + RandomIndexWriter w2 = new RandomIndexWriter(new Random(seed), dir2, iwc2); |
| + int count = 0; |
| + int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1); |
| + for(Document doc : docs) { |
| + ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id"))); |
| + if (count++ == commitAtCount) { |
| + // Ensure forceMerge really does merge |
| + w2.commit(); |
| + } |
| + w2.addDocument(doc); |
| + } |
| + w2.forceMerge(1); |
| + |
| + DirectoryReader r1 = w1.getReader(); |
| + DirectoryReader r2 = w2.getReader(); |
| + assertEquals(sort, getOnlyLeafReader(r2).getIndexSort()); |
| + assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2); |
| + IOUtils.close(w1, w2, r1, r2, dir1, dir2); |
| + } |
| + |
| + private static final class RandomDoc { |
| + public final int id; |
| + public final int intValue; |
| + public final long longValue; |
| + public final float floatValue; |
| + public final double doubleValue; |
| + public final byte[] bytesValue; |
| + |
| + public RandomDoc(int id) { |
| + this.id = id; |
| + intValue = random().nextInt(); |
| + longValue = random().nextLong(); |
| + floatValue = random().nextFloat(); |
| + doubleValue = random().nextDouble(); |
| + bytesValue = new byte[TestUtil.nextInt(random(), 1, 50)]; |
| + random().nextBytes(bytesValue); |
| + } |
| + } |
| + |
| + private static Sort randomSort() { |
| + int numFields = TestUtil.nextInt(random(), 1, 3); |
| + SortField[] sortFields = new SortField[numFields]; |
| + for(int i=0;i<numFields-1;i++) { |
| + boolean reversed = random().nextBoolean(); |
| + SortField sortField; |
| + switch(random().nextInt(5)) { |
| + case 0: |
| + sortField = new SortField("int", SortField.Type.INT, reversed); |
| + if (random().nextBoolean()) { |
| + sortField.setMissingValue(random().nextInt()); |
| + } |
| + break; |
| + case 1: |
| + sortField = new SortField("long", SortField.Type.LONG, reversed); |
| + if (random().nextBoolean()) { |
| + sortField.setMissingValue(random().nextLong()); |
| + } |
| + break; |
| + case 2: |
| + sortField = new SortField("float", SortField.Type.FLOAT, reversed); |
| + if (random().nextBoolean()) { |
| + sortField.setMissingValue(random().nextFloat()); |
| + } |
| + break; |
| + case 3: |
| + sortField = new SortField("double", SortField.Type.DOUBLE, reversed); |
| + if (random().nextBoolean()) { |
| + sortField.setMissingValue(random().nextDouble()); |
| + } |
| + break; |
| + case 4: |
| + sortField = new SortField("bytes", SortField.Type.STRING, reversed); |
| + if (random().nextBoolean()) { |
| + sortField.setMissingValue(SortField.STRING_LAST); |
| + } |
| + break; |
| + default: |
| + throw new AssertionError(); |
| + } |
| + sortFields[i] = sortField; |
| + } |
| + |
| + // tie-break by id: |
| + sortFields[numFields-1] = new SortField("id", SortField.Type.INT); |
| + |
| + return new Sort(sortFields); |
| + } |
| + |
| + // pits index time sorting against query time sorting |
| + public void testRandom3() throws Exception { |
| + int numDocs; |
| + if (TEST_NIGHTLY) { |
| + numDocs = atLeast(100000); |
| + } else { |
| + numDocs = atLeast(10000); |
| + } |
| + List<RandomDoc> docs = new ArrayList<>(); |
| + |
| + Sort sort = randomSort(); |
| + if (VERBOSE) { |
| + System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort); |
| + } |
| + |
| + // no index sorting, all search-time sorting: |
| + Directory dir1 = newFSDirectory(createTempDir()); |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + IndexWriter w1 = new IndexWriter(dir1, iwc1); |
| + |
| + // use index sorting: |
| + Directory dir2 = newFSDirectory(createTempDir()); |
| + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc2.setIndexSort(sort); |
| + IndexWriter w2 = new IndexWriter(dir2, iwc2); |
| + |
| + Set<Integer> toDelete = new HashSet<>(); |
| + |
| + double deleteChance = random().nextDouble(); |
| + |
| + for(int id=0;id<numDocs;id++) { |
| + RandomDoc docValues = new RandomDoc(id); |
| + docs.add(docValues); |
| + if (VERBOSE) { |
| + System.out.println("TEST: doc id=" + id); |
| + System.out.println(" int=" + docValues.intValue); |
| + System.out.println(" long=" + docValues.longValue); |
| + System.out.println(" float=" + docValues.floatValue); |
| + System.out.println(" double=" + docValues.doubleValue); |
| + System.out.println(" bytes=" + new BytesRef(docValues.bytesValue)); |
| + } |
| + |
| + Document doc = new Document(); |
| + doc.add(new StringField("id", Integer.toString(id), Field.Store.YES)); |
| + doc.add(new NumericDocValuesField("id", id)); |
| + doc.add(new NumericDocValuesField("int", docValues.intValue)); |
| + doc.add(new NumericDocValuesField("long", docValues.longValue)); |
| + doc.add(new DoubleDocValuesField("double", docValues.doubleValue)); |
| + doc.add(new FloatDocValuesField("float", docValues.floatValue)); |
| + doc.add(new SortedDocValuesField("bytes", new BytesRef(docValues.bytesValue))); |
| + w1.addDocument(doc); |
| + w2.addDocument(doc); |
| + if (random().nextDouble() < deleteChance) { |
| + toDelete.add(id); |
| + } |
| + } |
| + for(int id : toDelete) { |
| + w1.deleteDocuments(new Term("id", Integer.toString(id))); |
| + w2.deleteDocuments(new Term("id", Integer.toString(id))); |
| + } |
| + DirectoryReader r1 = DirectoryReader.open(w1); |
| + IndexSearcher s1 = newSearcher(r1); |
| + |
| + if (random().nextBoolean()) { |
| + int maxSegmentCount = TestUtil.nextInt(random(), 1, 5); |
| + if (VERBOSE) { |
| + System.out.println("TEST: now forceMerge(" + maxSegmentCount + ")"); |
| + } |
| + w2.forceMerge(maxSegmentCount); |
| + } |
| + |
| + DirectoryReader r2 = DirectoryReader.open(w2); |
| + IndexSearcher s2 = newSearcher(r2); |
| + |
| + /* |
| + System.out.println("TEST: full index:"); |
| + SortedDocValues docValues = MultiDocValues.getSortedValues(r2, "bytes"); |
| + for(int i=0;i<r2.maxDoc();i++) { |
| + System.out.println(" doc " + i + " id=" + r2.document(i).get("id") + " bytes=" + docValues.get(i)); |
| + } |
| + */ |
| + |
| + for(int iter=0;iter<100;iter++) { |
| + int numHits = TestUtil.nextInt(random(), 1, numDocs); |
| + if (VERBOSE) { |
| + System.out.println("TEST: iter=" + iter + " numHits=" + numHits); |
| + } |
| + |
| + TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true); |
| + s1.search(new MatchAllDocsQuery(), c1); |
| + TopDocs hits1 = c1.topDocs(); |
| + |
| + TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true); |
| + EarlyTerminatingSortingCollector c3 = new EarlyTerminatingSortingCollector(c2, sort, numHits); |
| + s2.search(new MatchAllDocsQuery(), c3); |
| + |
| + TopDocs hits2 = c2.topDocs(); |
| + |
| + if (VERBOSE) { |
| + System.out.println(" topDocs query-time sort: totalHits=" + hits1.totalHits); |
| + for(ScoreDoc scoreDoc : hits1.scoreDocs) { |
| + System.out.println(" " + scoreDoc.doc); |
| + } |
| + System.out.println(" topDocs index-time sort: totalHits=" + hits2.totalHits); |
| + for(ScoreDoc scoreDoc : hits2.scoreDocs) { |
| + System.out.println(" " + scoreDoc.doc); |
| + } |
| + } |
| + |
| + assertTrue(hits2.totalHits <= hits1.totalHits); |
| + assertEquals(hits2.scoreDocs.length, hits1.scoreDocs.length); |
| + for(int i=0;i<hits2.scoreDocs.length;i++) { |
| + ScoreDoc hit1 = hits1.scoreDocs[i]; |
| + ScoreDoc hit2 = hits2.scoreDocs[i]; |
| + assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id")); |
| + assertEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields); |
| + } |
| + } |
| + |
| + IOUtils.close(r1, r2, w1, w2, dir1, dir2); |
| + } |
| + |
| + public void testTieBreak() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING))); |
| + iwc.setMergePolicy(newLogMergePolicy()); |
| + IndexWriter w = new IndexWriter(dir, iwc); |
| + for(int id=0;id<1000;id++) { |
| + Document doc = new Document(); |
| + doc.add(new StoredField("id", id)); |
| + String value; |
| + if (id < 500) { |
| + value = "bar2"; |
| + } else { |
| + value = "bar1"; |
| + } |
| + doc.add(new SortedDocValuesField("foo", new BytesRef(value))); |
| + w.addDocument(doc); |
| + if (id == 500) { |
| + w.commit(); |
| + } |
| + } |
| + w.forceMerge(1); |
| + DirectoryReader r = DirectoryReader.open(w); |
| + for(int docID=0;docID<1000;docID++) { |
| + int expectedID; |
| + if (docID < 500) { |
| + expectedID = 500 + docID; |
| + } else { |
| + expectedID = docID - 500; |
| + } |
| + assertEquals(expectedID, r.document(docID).getField("id").numericValue().intValue()); |
| + } |
| + IOUtils.close(r, w, dir); |
| + } |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java 2016-03-13 05:38:07.387183845 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -69,6 +69,8 @@ |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.search.PhraseQuery; |
| import org.apache.lucene.search.ScoreDoc; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.BaseDirectoryWrapper; |
| @@ -2759,5 +2761,6 @@ |
| w.close(); |
| dir.close(); |
| } |
| + |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java 2016-03-13 05:38:07.387183845 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -23,10 +23,11 @@ |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| -import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.search.*; |
| +import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.TestUtil; |
| |
| @@ -314,4 +315,60 @@ |
| return dir2; |
| } |
| |
| + // not ok to have one leaf w/ index sort and another with a different index sort |
| + public void testWithIndexSort1() throws Exception { |
| + Directory dir1 = newDirectory(); |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); |
| + IndexWriter w1 = new IndexWriter(dir1, iwc1); |
| + w1.addDocument(new Document()); |
| + w1.commit(); |
| + w1.addDocument(new Document()); |
| + w1.forceMerge(1); |
| + w1.close(); |
| + IndexReader r1 = DirectoryReader.open(dir1); |
| + |
| + Directory dir2 = newDirectory(); |
| + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.INT))); |
| + IndexWriter w2 = new IndexWriter(dir2, iwc2); |
| + w2.addDocument(new Document()); |
| + w2.commit(); |
| + w2.addDocument(new Document()); |
| + w2.forceMerge(1); |
| + w2.close(); |
| + IndexReader r2 = DirectoryReader.open(dir2); |
| + |
| + String message = expectThrows(IllegalArgumentException.class, () -> { |
| + new ParallelLeafReader(getOnlyLeafReader(r1), getOnlyLeafReader(r2)); |
| + }).getMessage(); |
| + assertEquals("cannot combine LeafReaders that have different index sorts: saw both sort=<int: \"foo\"> and <int: \"bar\">", message); |
| + IOUtils.close(r1, dir1, r2, dir2); |
| + } |
| + |
| + // ok to have one leaf w/ index sort and the other with no sort |
| + public void testWithIndexSort2() throws Exception { |
| + Directory dir1 = newDirectory(); |
| + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); |
| + IndexWriter w1 = new IndexWriter(dir1, iwc1); |
| + w1.addDocument(new Document()); |
| + w1.commit(); |
| + w1.addDocument(new Document()); |
| + w1.forceMerge(1); |
| + w1.close(); |
| + IndexReader r1 = DirectoryReader.open(dir1); |
| + |
| + Directory dir2 = newDirectory(); |
| + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); |
| + IndexWriter w2 = new IndexWriter(dir2, iwc2); |
| + w2.addDocument(new Document()); |
| + w2.addDocument(new Document()); |
| + w2.close(); |
| + |
| + IndexReader r2 = DirectoryReader.open(dir2); |
| + new ParallelLeafReader(false, getOnlyLeafReader(r1), getOnlyLeafReader(r2)).close(); |
| + new ParallelLeafReader(false, getOnlyLeafReader(r2), getOnlyLeafReader(r1)).close(); |
| + IOUtils.close(r1, dir1, r2, dir2); |
| + } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java 2016-05-03 07:31:51.560971608 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -394,11 +394,11 @@ |
| dir.close(); |
| } |
| |
| - // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with SimpleText |
| + // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with SimpleText |
| public void testDifferentCodecs1() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| - iwc.setCodec(Codec.forName("Lucene60")); |
| + iwc.setCodec(Codec.forName("Lucene62")); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| doc.add(new IntPoint("int", 1)); |
| @@ -417,7 +417,7 @@ |
| dir.close(); |
| } |
| |
| - // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with Lucene60 |
| + // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with Lucene60 |
| public void testDifferentCodecs2() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| @@ -429,7 +429,7 @@ |
| w.close(); |
| |
| iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| - iwc.setCodec(Codec.forName("Lucene60")); |
| + iwc.setCodec(Codec.forName("Lucene62")); |
| w = new IndexWriter(dir, iwc); |
| doc = new Document(); |
| doc.add(new IntPoint("int", 1)); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java 2016-05-03 07:38:23.468977947 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -51,7 +51,7 @@ |
| |
| SegmentInfos sis = new SegmentInfos(); |
| SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(), |
| - Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap()); |
| + Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1); |
| @@ -73,14 +73,14 @@ |
| |
| SegmentInfos sis = new SegmentInfos(); |
| SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(), |
| - Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap()); |
| + Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1); |
| sis.add(commitInfo); |
| |
| info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_1", 1, false, Codec.getDefault(), |
| - Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap()); |
| + Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java 2016-02-16 11:18:34.713021816 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -35,6 +35,7 @@ |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.TestUtil; |
| import org.apache.lucene.util.Version; |
| +import org.apache.lucene.util.packed.PackedLongValues; |
| |
| public class TestSegmentMerger extends LuceneTestCase { |
| //The variables for the new merged segment |
| @@ -83,7 +84,7 @@ |
| |
| public void testMerge() throws IOException { |
| final Codec codec = Codec.getDefault(); |
| - final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| |
| SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(reader1, reader2), |
| si, InfoStream.getDefault(), mergedDir, |
| @@ -144,22 +145,9 @@ |
| mergedReader.close(); |
| } |
| |
| - private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) { |
| - if (map1.maxDoc() != map2.maxDoc()) { |
| - return false; |
| - } |
| - for (int i = 0; i < map1.maxDoc(); ++i) { |
| - if (map1.get(i) != map2.get(i)) { |
| - return false; |
| - } |
| - } |
| - return true; |
| - } |
| - |
| public void testBuildDocMap() { |
| final int maxDoc = TestUtil.nextInt(random(), 1, 128); |
| final int numDocs = TestUtil.nextInt(random(), 0, maxDoc); |
| - final int numDeletedDocs = maxDoc - numDocs; |
| final FixedBitSet liveDocs = new FixedBitSet(maxDoc); |
| for (int i = 0; i < numDocs; ++i) { |
| while (true) { |
| @@ -171,15 +159,11 @@ |
| } |
| } |
| |
| - final MergeState.DocMap docMap = MergeState.DocMap.build(maxDoc, liveDocs); |
| + final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs); |
| |
| - assertEquals(maxDoc, docMap.maxDoc()); |
| - assertEquals(numDocs, docMap.numDocs()); |
| - assertEquals(numDeletedDocs, docMap.numDeletedDocs()); |
| // assert the mapping is compact |
| for (int i = 0, del = 0; i < maxDoc; ++i) { |
| - if (!liveDocs.get(i)) { |
| - assertEquals(-1, docMap.get(i)); |
| + if (liveDocs.get(i) == false) { |
| ++del; |
| } else { |
| assertEquals(i - del, docMap.get(i)); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java indexsort/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -0,0 +1,260 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.search; |
| + |
| +import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.HashMap; |
| +import java.util.HashSet; |
| +import java.util.List; |
| +import java.util.Random; |
| +import java.util.Set; |
| + |
| +import org.apache.lucene.analysis.MockAnalyzer; |
| +import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field.Store; |
| +import org.apache.lucene.document.NumericDocValuesField; |
| +import org.apache.lucene.document.StringField; |
| +import org.apache.lucene.index.DirectoryReader; |
| +import org.apache.lucene.index.ExitableDirectoryReader; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.IndexWriterConfig; |
| +import org.apache.lucene.index.LeafReaderContext; |
| +import org.apache.lucene.index.MockRandomMergePolicy; |
| +import org.apache.lucene.index.QueryTimeout; |
| +import org.apache.lucene.index.RandomIndexWriter; |
| +import org.apache.lucene.index.SerialMergeScheduler; |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.LeafCollector; |
| +import org.apache.lucene.search.MatchAllDocsQuery; |
| +import org.apache.lucene.search.Query; |
| +import org.apache.lucene.search.ScoreDoc; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.search.TopFieldCollector; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util.TestUtil; |
| + |
| +import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| + |
| +public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { |
| + |
| + private int numDocs; |
| + private List<String> terms; |
| + private Directory dir; |
| + private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG)); |
| + private RandomIndexWriter iw; |
| + private IndexReader reader; |
| + private final int forceMergeMaxSegmentCount = 5; |
| + |
| + private Document randomDocument() { |
| + final Document doc = new Document(); |
| + doc.add(new NumericDocValuesField("ndv1", random().nextInt(10))); |
| + doc.add(new NumericDocValuesField("ndv2", random().nextInt(10))); |
| + doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES)); |
| + return doc; |
| + } |
| + |
| + private void createRandomIndex(boolean singleSortedSegment) throws IOException { |
| + dir = newDirectory(); |
| + numDocs = atLeast(150); |
| + final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5); |
| + Set<String> randomTerms = new HashSet<>(); |
| + while (randomTerms.size() < numTerms) { |
| + randomTerms.add(TestUtil.randomSimpleString(random())); |
| + } |
| + terms = new ArrayList<>(randomTerms); |
| + final long seed = random().nextLong(); |
| + final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); |
| + if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) { |
| + // MockRandomMP randomly wraps the leaf readers which makes merging angry |
| + iwc.setMergePolicy(newTieredMergePolicy()); |
| + } |
| + iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests |
| + iwc.setIndexSort(sort); |
| + iw = new RandomIndexWriter(new Random(seed), dir, iwc); |
| + iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP |
| + for (int i = 0; i < numDocs; ++i) { |
| + final Document doc = randomDocument(); |
| + iw.addDocument(doc); |
| + if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) { |
| + iw.commit(); |
| + } |
| + if (random().nextInt(15) == 0) { |
| + final String term = RandomPicks.randomFrom(random(), terms); |
| + iw.deleteDocuments(new Term("s", term)); |
| + } |
| + } |
| + if (singleSortedSegment) { |
| + // because of deletions, there might still be a single flush segment in |
| + // the index, although want want a sorted segment so it needs to be merged |
| + iw.getReader().close(); // refresh |
| + iw.addDocument(new Document()); |
| + iw.commit(); |
| + iw.addDocument(new Document()); |
| + iw.forceMerge(1); |
| + } |
| + else if (random().nextBoolean()) { |
| + iw.forceMerge(forceMergeMaxSegmentCount); |
| + } |
| + reader = iw.getReader(); |
| + } |
| + |
| + private void closeIndex() throws IOException { |
| + reader.close(); |
| + iw.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testEarlyTermination() throws IOException { |
| + final int iters = atLeast(8); |
| + for (int i = 0; i < iters; ++i) { |
| + createRandomIndex(false); |
| + for (int j = 0; j < iters; ++j) { |
| + final IndexSearcher searcher = newSearcher(reader); |
| + final int numHits = TestUtil.nextInt(random(), 1, numDocs); |
| + final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false)); |
| + final boolean fillFields = random().nextBoolean(); |
| + final boolean trackDocScores = random().nextBoolean(); |
| + final boolean trackMaxScore = random().nextBoolean(); |
| + final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); |
| + final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); |
| + |
| + final Query query; |
| + if (random().nextBoolean()) { |
| + query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); |
| + } else { |
| + query = new MatchAllDocsQuery(); |
| + } |
| + searcher.search(query, collector1); |
| + searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits)); |
| + assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); |
| + assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); |
| + } |
| + closeIndex(); |
| + } |
| + } |
| + |
| + public void testCanEarlyTerminate() { |
| + assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG)), |
| + new Sort(new SortField("a", SortField.Type.LONG)))); |
| + |
| + assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); |
| + |
| + assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG)), |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); |
| + |
| + assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG, true)), |
| + new Sort(new SortField("a", SortField.Type.LONG, false)))); |
| + |
| + assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| + new Sort(new SortField("a", SortField.Type.LONG)))); |
| + |
| + assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING)))); |
| + |
| + assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| + new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| + new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); |
| + } |
| + |
| + public void testEarlyTerminationDifferentSorter() throws IOException { |
| + createRandomIndex(true); |
| + |
| + Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false)); |
| + Collector c = new EarlyTerminatingSortingCollector(TopFieldCollector.create(sort, 10, true, true, true), sort, 10); |
| + IndexSearcher searcher = newSearcher(reader); |
| + Exception e = expectThrows(IllegalStateException.class, |
| + () -> { |
| + searcher.search(new MatchAllDocsQuery(), c); |
| + }); |
| + assertEquals("Cannot early terminate with sort order <long: \"ndv2\"> if segments are sorted with <long: \"ndv1\">", e.getMessage()); |
| + closeIndex(); |
| + } |
| + |
| + private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) { |
| + assertEquals(scoreDocs1.length, scoreDocs2.length); |
| + for (int i = 0; i < scoreDocs1.length; ++i) { |
| + final ScoreDoc scoreDoc1 = scoreDocs1[i]; |
| + final ScoreDoc scoreDoc2 = scoreDocs2[i]; |
| + assertEquals(scoreDoc1.doc, scoreDoc2.doc); |
| + assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f); |
| + } |
| + } |
| + |
| + private class TestTerminatedEarlySimpleCollector extends SimpleCollector { |
| + private boolean collectedSomething; |
| + public boolean collectedSomething() { |
| + return collectedSomething; |
| + } |
| + @Override |
| + public void collect(int doc) throws IOException { |
| + collectedSomething = true; |
| + } |
| + @Override |
| + public boolean needsScores() { |
| + return false; |
| + } |
| + } |
| + |
| + private class TestEarlyTerminatingSortingcollectorQueryTimeout implements QueryTimeout { |
| + final private boolean shouldExit; |
| + public TestEarlyTerminatingSortingcollectorQueryTimeout(boolean shouldExit) { |
| + this.shouldExit = shouldExit; |
| + } |
| + public boolean shouldExit() { |
| + return shouldExit; |
| + } |
| + } |
| + |
| + public void testTerminatedEarly() throws IOException { |
| + final int iters = atLeast(8); |
| + for (int i = 0; i < iters; ++i) { |
| + createRandomIndex(true); |
| + |
| + final IndexSearcher searcher = new IndexSearcher(reader); // future TODO: use newSearcher(reader); |
| + final Query query = new MatchAllDocsQuery(); // search for everything/anything |
| + |
| + final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector(); |
| + searcher.search(query, collector1); |
| + |
| + final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector(); |
| + final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1); |
| + searcher.search(query, etsCollector); |
| + |
| + assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething()); |
| + |
| + if (collector1.collectedSomething()) { |
| + // we collected something and since we modestly asked for just one document we should have terminated early |
| + assertTrue("should have terminated early (searcher.reader="+searcher.reader+")", etsCollector.terminatedEarly()); |
| + } |
| + closeIndex(); |
| + } |
| + } |
| + |
| +} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java indexsort/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java 2016-04-24 06:00:33.345895727 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -1151,14 +1151,14 @@ |
| } |
| |
| private static Codec getCodec() { |
| - if (Codec.getDefault().getName().equals("Lucene60")) { |
| + if (Codec.getDefault().getName().equals("Lucene62")) { |
| int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); |
| double maxMBSortInHeap = 5.0 + (3*random().nextDouble()); |
| if (VERBOSE) { |
| System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); |
| } |
| |
| - return new FilterCodec("Lucene60", Codec.getDefault()) { |
| + return new FilterCodec("Lucene62", Codec.getDefault()) { |
| @Override |
| public PointsFormat pointsFormat() { |
| return new PointsFormat() { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java indexsort/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java |
| --- trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java 2016-04-24 06:00:46.369895938 -0400 |
| +++ indexsort/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -25,6 +25,7 @@ |
| import java.util.List; |
| |
| import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.MergeState; |
| import org.apache.lucene.index.PointValues.IntersectVisitor; |
| import org.apache.lucene.index.PointValues.Relation; |
| import org.apache.lucene.store.CorruptingIndexOutput; |
| @@ -554,7 +555,7 @@ |
| } |
| |
| List<Long> toMerge = null; |
| - List<Integer> docIDBases = null; |
| + List<MergeState.DocMap> docMaps = null; |
| int seg = 0; |
| |
| BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false); |
| @@ -601,9 +602,15 @@ |
| if (useMerge && segCount == valuesInThisSeg) { |
| if (toMerge == null) { |
| toMerge = new ArrayList<>(); |
| - docIDBases = new ArrayList<>(); |
| + docMaps = new ArrayList<>(); |
| } |
| - docIDBases.add(lastDocIDBase); |
| + final int curDocIDBase = lastDocIDBase; |
| + docMaps.add(new MergeState.DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + return curDocIDBase + docID; |
| + } |
| + }); |
| toMerge.add(w.finish(out)); |
| valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues/2); |
| segCount = 0; |
| @@ -620,8 +627,14 @@ |
| |
| if (toMerge != null) { |
| if (segCount > 0) { |
| - docIDBases.add(lastDocIDBase); |
| toMerge.add(w.finish(out)); |
| + final int curDocIDBase = lastDocIDBase; |
| + docMaps.add(new MergeState.DocMap() { |
| + @Override |
| + public int get(int docID) { |
| + return curDocIDBase + docID; |
| + } |
| + }); |
| } |
| out.close(); |
| in = dir.openInput("bkd", IOContext.DEFAULT); |
| @@ -633,7 +646,7 @@ |
| readers.add(new BKDReader(in)); |
| } |
| out = dir.createOutput("bkd2", IOContext.DEFAULT); |
| - indexFP = w.merge(out, null, readers, docIDBases); |
| + indexFP = w.merge(out, docMaps, readers); |
| out.close(); |
| in.close(); |
| in = dir.openInput("bkd2", IOContext.DEFAULT); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java indexsort/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java |
| --- trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java 2016-02-16 11:18:34.745021816 -0500 |
| +++ indexsort/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -21,7 +21,6 @@ |
| import java.util.Iterator; |
| |
| import org.apache.lucene.index.BinaryDocValues; |
| -import org.apache.lucene.index.PointValues; |
| import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| @@ -29,11 +28,13 @@ |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.NumericDocValues; |
| +import org.apache.lucene.index.PointValues; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedNumericDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.StoredFieldVisitor; |
| import org.apache.lucene.index.Terms; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| |
| /** |
| @@ -178,4 +179,8 @@ |
| public void document(int docID, StoredFieldVisitor visitor) throws IOException { |
| } |
| |
| + @Override |
| + public Sort getIndexSort() { |
| + return null; |
| + } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java indexsort/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java |
| --- trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java 2016-04-24 06:00:46.369895938 -0400 |
| +++ indexsort/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -40,6 +40,7 @@ |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.Scorer; |
| import org.apache.lucene.search.SimpleCollector; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.similarities.Similarity; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.util.*; |
| @@ -1606,6 +1607,10 @@ |
| return info.getNormDocValues(); |
| } |
| |
| + @Override |
| + public Sort getIndexSort() { |
| + return null; |
| + } |
| } |
| |
| /** |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java indexsort/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java 2016-01-24 13:09:50.028989954 -0500 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -140,7 +140,7 @@ |
| SegmentInfo info = infoPerCommit.info; |
| // Same info just changing the dir: |
| SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.maxDoc(), |
| - info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>()); |
| + info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>(), null); |
| destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(), |
| infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(), |
| infoPerCommit.getDocValuesGen())); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java indexsort/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java 2016-02-16 11:18:34.749021816 -0500 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,259 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.DocValuesProducer; |
| -import org.apache.lucene.codecs.FieldsProducer; |
| -import org.apache.lucene.codecs.NormsProducer; |
| -import org.apache.lucene.codecs.StoredFieldsReader; |
| -import org.apache.lucene.codecs.TermVectorsReader; |
| -import org.apache.lucene.util.Bits; |
| - |
| -/** this is a hack to make SortingMP fast! */ |
| -class MergeReaderWrapper extends LeafReader { |
| - final SegmentReader in; |
| - final FieldsProducer fields; |
| - final NormsProducer norms; |
| - final DocValuesProducer docValues; |
| - final StoredFieldsReader store; |
| - final TermVectorsReader vectors; |
| - |
| - MergeReaderWrapper(SegmentReader in) throws IOException { |
| - this.in = in; |
| - |
| - FieldsProducer fields = in.getPostingsReader(); |
| - if (fields != null) { |
| - fields = fields.getMergeInstance(); |
| - } |
| - this.fields = fields; |
| - |
| - NormsProducer norms = in.getNormsReader(); |
| - if (norms != null) { |
| - norms = norms.getMergeInstance(); |
| - } |
| - this.norms = norms; |
| - |
| - DocValuesProducer docValues = in.getDocValuesReader(); |
| - if (docValues != null) { |
| - docValues = docValues.getMergeInstance(); |
| - } |
| - this.docValues = docValues; |
| - |
| - StoredFieldsReader store = in.getFieldsReader(); |
| - if (store != null) { |
| - store = store.getMergeInstance(); |
| - } |
| - this.store = store; |
| - |
| - TermVectorsReader vectors = in.getTermVectorsReader(); |
| - if (vectors != null) { |
| - vectors = vectors.getMergeInstance(); |
| - } |
| - this.vectors = vectors; |
| - } |
| - |
| - @Override |
| - public void addCoreClosedListener(CoreClosedListener listener) { |
| - in.addCoreClosedListener(listener); |
| - } |
| - |
| - @Override |
| - public void removeCoreClosedListener(CoreClosedListener listener) { |
| - in.removeCoreClosedListener(listener); |
| - } |
| - |
| - @Override |
| - public Fields fields() throws IOException { |
| - return fields; |
| - } |
| - |
| - @Override |
| - public NumericDocValues getNumericDocValues(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null) { |
| - // Field does not exist |
| - return null; |
| - } |
| - if (fi.getDocValuesType() != DocValuesType.NUMERIC) { |
| - // Field was not indexed with doc values |
| - return null; |
| - } |
| - return docValues.getNumeric(fi); |
| - } |
| - |
| - @Override |
| - public BinaryDocValues getBinaryDocValues(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null) { |
| - // Field does not exist |
| - return null; |
| - } |
| - if (fi.getDocValuesType() != DocValuesType.BINARY) { |
| - // Field was not indexed with doc values |
| - return null; |
| - } |
| - return docValues.getBinary(fi); |
| - } |
| - |
| - @Override |
| - public SortedDocValues getSortedDocValues(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null) { |
| - // Field does not exist |
| - return null; |
| - } |
| - if (fi.getDocValuesType() != DocValuesType.SORTED) { |
| - // Field was not indexed with doc values |
| - return null; |
| - } |
| - return docValues.getSorted(fi); |
| - } |
| - |
| - @Override |
| - public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null) { |
| - // Field does not exist |
| - return null; |
| - } |
| - if (fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) { |
| - // Field was not indexed with doc values |
| - return null; |
| - } |
| - return docValues.getSortedNumeric(fi); |
| - } |
| - |
| - @Override |
| - public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null) { |
| - // Field does not exist |
| - return null; |
| - } |
| - if (fi.getDocValuesType() != DocValuesType.SORTED_SET) { |
| - // Field was not indexed with doc values |
| - return null; |
| - } |
| - return docValues.getSortedSet(fi); |
| - } |
| - |
| - @Override |
| - public Bits getDocsWithField(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null) { |
| - // Field does not exist |
| - return null; |
| - } |
| - if (fi.getDocValuesType() == DocValuesType.NONE) { |
| - // Field was not indexed with doc values |
| - return null; |
| - } |
| - return docValues.getDocsWithField(fi); |
| - } |
| - |
| - @Override |
| - public NumericDocValues getNormValues(String field) throws IOException { |
| - ensureOpen(); |
| - FieldInfo fi = getFieldInfos().fieldInfo(field); |
| - if (fi == null || !fi.hasNorms()) { |
| - // Field does not exist or does not index norms |
| - return null; |
| - } |
| - return norms.getNorms(fi); |
| - } |
| - |
| - @Override |
| - public FieldInfos getFieldInfos() { |
| - return in.getFieldInfos(); |
| - } |
| - |
| - @Override |
| - public Bits getLiveDocs() { |
| - return in.getLiveDocs(); |
| - } |
| - |
| - @Override |
| - public void checkIntegrity() throws IOException { |
| - in.checkIntegrity(); |
| - } |
| - |
| - @Override |
| - public Fields getTermVectors(int docID) throws IOException { |
| - ensureOpen(); |
| - checkBounds(docID); |
| - if (vectors == null) { |
| - return null; |
| - } |
| - return vectors.get(docID); |
| - } |
| - |
| - @Override |
| - public PointValues getPointValues() { |
| - return in.getPointValues(); |
| - } |
| - |
| - @Override |
| - public int numDocs() { |
| - return in.numDocs(); |
| - } |
| - |
| - @Override |
| - public int maxDoc() { |
| - return in.maxDoc(); |
| - } |
| - |
| - @Override |
| - public void document(int docID, StoredFieldVisitor visitor) throws IOException { |
| - ensureOpen(); |
| - checkBounds(docID); |
| - store.visitDocument(docID, visitor); |
| - } |
| - |
| - @Override |
| - protected void doClose() throws IOException { |
| - in.close(); |
| - } |
| - |
| - @Override |
| - public Object getCoreCacheKey() { |
| - return in.getCoreCacheKey(); |
| - } |
| - |
| - @Override |
| - public Object getCombinedCoreAndDeletesKey() { |
| - return in.getCombinedCoreAndDeletesKey(); |
| - } |
| - |
| - private void checkBounds(int docID) { |
| - if (docID < 0 || docID >= maxDoc()) { |
| - throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")"); |
| - } |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return "MergeReaderWrapper(" + in + ")"; |
| - } |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java indexsort/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java 2016-03-13 05:38:07.391183845 -0400 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -24,6 +24,7 @@ |
| import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; |
| import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; |
| import org.apache.lucene.index.MultiDocValues.OrdinalMap; |
| +import org.apache.lucene.search.Sort; |
| import org.apache.lucene.util.Bits; |
| |
| /** |
| @@ -67,6 +68,11 @@ |
| if (getFieldInfos().hasPointValues()) { |
| throw new IllegalArgumentException("cannot wrap points"); |
| } |
| + for(LeafReaderContext context : reader.leaves()) { |
| + if (context.reader().getIndexSort() != null) { |
| + throw new IllegalArgumentException("cannot use index sort"); |
| + } |
| + } |
| fields = MultiFields.getFields(in); |
| in.registerParentReader(this); |
| this.merging = merging; |
| @@ -272,4 +278,9 @@ |
| ctx.reader().checkIntegrity(); |
| } |
| } |
| + |
| + @Override |
| + public Sort getIndexSort() { |
| + return null; |
| + } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/Sorter.java indexsort/lucene/misc/src/java/org/apache/lucene/index/Sorter.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/index/Sorter.java 2016-05-06 05:10:21.045026439 -0400 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/index/Sorter.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,287 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| -import java.util.Comparator; |
| - |
| -import org.apache.lucene.search.DocIdSetIterator; |
| -import org.apache.lucene.search.LeafFieldComparator; |
| -import org.apache.lucene.search.Scorer; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.SortField; |
| -import org.apache.lucene.util.TimSorter; |
| -import org.apache.lucene.util.packed.PackedInts; |
| -import org.apache.lucene.util.packed.PackedLongValues; |
| - |
| -/** |
| - * Sorts documents of a given index by returning a permutation on the document |
| - * IDs. |
| - * @lucene.experimental |
| - */ |
| -final class Sorter { |
| - final Sort sort; |
| - |
| - /** Creates a new Sorter to sort the index with {@code sort} */ |
| - Sorter(Sort sort) { |
| - if (sort.needsScores()) { |
| - throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score"); |
| - } |
| - this.sort = sort; |
| - } |
| - |
| - /** |
| - * A permutation of doc IDs. For every document ID between <tt>0</tt> and |
| - * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must |
| - * return <code>docID</code>. |
| - */ |
| - static abstract class DocMap { |
| - |
| - /** Given a doc ID from the original index, return its ordinal in the |
| - * sorted index. */ |
| - abstract int oldToNew(int docID); |
| - |
| - /** Given the ordinal of a doc ID, return its doc ID in the original index. */ |
| - abstract int newToOld(int docID); |
| - |
| - /** Return the number of documents in this map. This must be equal to the |
| - * {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the |
| - * {@link org.apache.lucene.index.LeafReader} which is sorted. */ |
| - abstract int size(); |
| - } |
| - |
| - /** Check consistency of a {@link DocMap}, useful for assertions. */ |
| - static boolean isConsistent(DocMap docMap) { |
| - final int maxDoc = docMap.size(); |
| - for (int i = 0; i < maxDoc; ++i) { |
| - final int newID = docMap.oldToNew(i); |
| - final int oldID = docMap.newToOld(newID); |
| - assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID; |
| - assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID; |
| - if (i != oldID || newID < 0 || newID >= maxDoc) { |
| - return false; |
| - } |
| - } |
| - return true; |
| - } |
| - |
| - /** A comparator of doc IDs. */ |
| - static abstract class DocComparator { |
| - |
| - /** Compare docID1 against docID2. The contract for the return value is the |
| - * same as {@link Comparator#compare(Object, Object)}. */ |
| - public abstract int compare(int docID1, int docID2); |
| - |
| - } |
| - |
| - private static final class DocValueSorter extends TimSorter { |
| - |
| - private final int[] docs; |
| - private final Sorter.DocComparator comparator; |
| - private final int[] tmp; |
| - |
| - DocValueSorter(int[] docs, Sorter.DocComparator comparator) { |
| - super(docs.length / 64); |
| - this.docs = docs; |
| - this.comparator = comparator; |
| - tmp = new int[docs.length / 64]; |
| - } |
| - |
| - @Override |
| - protected int compare(int i, int j) { |
| - return comparator.compare(docs[i], docs[j]); |
| - } |
| - |
| - @Override |
| - protected void swap(int i, int j) { |
| - int tmpDoc = docs[i]; |
| - docs[i] = docs[j]; |
| - docs[j] = tmpDoc; |
| - } |
| - |
| - @Override |
| - protected void copy(int src, int dest) { |
| - docs[dest] = docs[src]; |
| - } |
| - |
| - @Override |
| - protected void save(int i, int len) { |
| - System.arraycopy(docs, i, tmp, 0, len); |
| - } |
| - |
| - @Override |
| - protected void restore(int i, int j) { |
| - docs[j] = tmp[i]; |
| - } |
| - |
| - @Override |
| - protected int compareSaved(int i, int j) { |
| - return comparator.compare(tmp[i], docs[j]); |
| - } |
| - } |
| - |
| - /** Computes the old-to-new permutation over the given comparator. */ |
| - private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) { |
| - // check if the index is sorted |
| - boolean sorted = true; |
| - for (int i = 1; i < maxDoc; ++i) { |
| - if (comparator.compare(i-1, i) > 0) { |
| - sorted = false; |
| - break; |
| - } |
| - } |
| - if (sorted) { |
| - return null; |
| - } |
| - |
| - // sort doc IDs |
| - final int[] docs = new int[maxDoc]; |
| - for (int i = 0; i < maxDoc; i++) { |
| - docs[i] = i; |
| - } |
| - |
| - DocValueSorter sorter = new DocValueSorter(docs, comparator); |
| - // It can be common to sort a reader, add docs, sort it again, ... and in |
| - // that case timSort can save a lot of time |
| - sorter.sort(0, docs.length); // docs is now the newToOld mapping |
| - |
| - // The reason why we use MonotonicAppendingLongBuffer here is that it |
| - // wastes very little memory if the index is in random order but can save |
| - // a lot of memory if the index is already "almost" sorted |
| - final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| - for (int i = 0; i < maxDoc; ++i) { |
| - newToOldBuilder.add(docs[i]); |
| - } |
| - final PackedLongValues newToOld = newToOldBuilder.build(); |
| - |
| - for (int i = 0; i < maxDoc; ++i) { |
| - docs[(int) newToOld.get(i)] = i; |
| - } // docs is now the oldToNew mapping |
| - |
| - final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| - for (int i = 0; i < maxDoc; ++i) { |
| - oldToNewBuilder.add(docs[i]); |
| - } |
| - final PackedLongValues oldToNew = oldToNewBuilder.build(); |
| - |
| - return new Sorter.DocMap() { |
| - |
| - @Override |
| - public int oldToNew(int docID) { |
| - return (int) oldToNew.get(docID); |
| - } |
| - |
| - @Override |
| - public int newToOld(int docID) { |
| - return (int) newToOld.get(docID); |
| - } |
| - |
| - @Override |
| - public int size() { |
| - return maxDoc; |
| - } |
| - }; |
| - } |
| - |
| - /** |
| - * Returns a mapping from the old document ID to its new location in the |
| - * sorted index. Implementations can use the auxiliary |
| - * {@link #sort(int, DocComparator)} to compute the old-to-new permutation |
| - * given a list of documents and their corresponding values. |
| - * <p> |
| - * A return value of <tt>null</tt> is allowed and means that |
| - * <code>reader</code> is already sorted. |
| - * <p> |
| - * <b>NOTE:</b> deleted documents are expected to appear in the mapping as |
| - * well, they will however be marked as deleted in the sorted view. |
| - */ |
| - DocMap sort(LeafReader reader) throws IOException { |
| - SortField fields[] = sort.getSort(); |
| - final int reverseMul[] = new int[fields.length]; |
| - final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length]; |
| - |
| - for (int i = 0; i < fields.length; i++) { |
| - reverseMul[i] = fields[i].getReverse() ? -1 : 1; |
| - comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext()); |
| - comparators[i].setScorer(FAKESCORER); |
| - } |
| - final DocComparator comparator = new DocComparator() { |
| - @Override |
| - public int compare(int docID1, int docID2) { |
| - try { |
| - for (int i = 0; i < comparators.length; i++) { |
| - // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, |
| - // the segments are always the same here... |
| - comparators[i].copy(0, docID1); |
| - comparators[i].setBottom(0); |
| - int comp = reverseMul[i] * comparators[i].compareBottom(docID2); |
| - if (comp != 0) { |
| - return comp; |
| - } |
| - } |
| - return Integer.compare(docID1, docID2); // docid order tiebreak |
| - } catch (IOException e) { |
| - throw new RuntimeException(e); |
| - } |
| - } |
| - }; |
| - return sort(reader.maxDoc(), comparator); |
| - } |
| - |
| - /** |
| - * Returns the identifier of this {@link Sorter}. |
| - * <p>This identifier is similar to {@link Object#hashCode()} and should be |
| - * chosen so that two instances of this class that sort documents likewise |
| - * will have the same identifier. On the contrary, this identifier should be |
| - * different on different {@link Sort sorts}. |
| - */ |
| - public String getID() { |
| - return sort.toString(); |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return getID(); |
| - } |
| - |
| - static final Scorer FAKESCORER = new Scorer(null) { |
| - |
| - float score; |
| - int doc = -1; |
| - int freq = 1; |
| - |
| - @Override |
| - public int docID() { |
| - return doc; |
| - } |
| - |
| - public DocIdSetIterator iterator() { |
| - throw new UnsupportedOperationException(); |
| - } |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - return freq; |
| - } |
| - |
| - @Override |
| - public float score() throws IOException { |
| - return score; |
| - } |
| - }; |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java 2016-03-08 17:22:26.836938630 -0500 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,940 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| -import java.util.Arrays; |
| - |
| -import org.apache.lucene.index.Sorter.DocMap; |
| -import org.apache.lucene.search.DocIdSetIterator; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.store.RAMFile; |
| -import org.apache.lucene.store.RAMInputStream; |
| -import org.apache.lucene.store.RAMOutputStream; |
| -import org.apache.lucene.util.ArrayUtil; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.TimSorter; |
| -import org.apache.lucene.util.automaton.CompiledAutomaton; |
| - |
| -/** |
| - * An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given |
| - * {@link Sort}. You can use this class to sort an index as follows: |
| - * |
| - * <pre class="prettyprint"> |
| - * IndexWriter writer; // writer to which the sorted index will be added |
| - * DirectoryReader reader; // reader on the input index |
| - * Sort sort; // determines how the documents are sorted |
| - * LeafReader sortingReader = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort); |
| - * writer.addIndexes(reader); |
| - * writer.close(); |
| - * reader.close(); |
| - * </pre> |
| - * |
| - * @lucene.experimental |
| - */ |
| -public class SortingLeafReader extends FilterLeafReader { |
| - |
| - private static class SortingFields extends FilterFields { |
| - |
| - private final Sorter.DocMap docMap; |
| - private final FieldInfos infos; |
| - |
| - public SortingFields(final Fields in, FieldInfos infos, Sorter.DocMap docMap) { |
| - super(in); |
| - this.docMap = docMap; |
| - this.infos = infos; |
| - } |
| - |
| - @Override |
| - public Terms terms(final String field) throws IOException { |
| - Terms terms = in.terms(field); |
| - if (terms == null) { |
| - return null; |
| - } else { |
| - return new SortingTerms(terms, infos.fieldInfo(field).getIndexOptions(), docMap); |
| - } |
| - } |
| - |
| - } |
| - |
| - private static class SortingTerms extends FilterTerms { |
| - |
| - private final Sorter.DocMap docMap; |
| - private final IndexOptions indexOptions; |
| - |
| - public SortingTerms(final Terms in, IndexOptions indexOptions, final Sorter.DocMap docMap) { |
| - super(in); |
| - this.docMap = docMap; |
| - this.indexOptions = indexOptions; |
| - } |
| - |
| - @Override |
| - public TermsEnum iterator() throws IOException { |
| - return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions()); |
| - } |
| - |
| - @Override |
| - public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) |
| - throws IOException { |
| - return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions()); |
| - } |
| - |
| - } |
| - |
| - private static class SortingTermsEnum extends FilterTermsEnum { |
| - |
| - final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods |
| - private final IndexOptions indexOptions; |
| - private final boolean hasPositions; |
| - |
| - public SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) { |
| - super(in); |
| - this.docMap = docMap; |
| - this.indexOptions = indexOptions; |
| - this.hasPositions = hasPositions; |
| - } |
| - |
| - Bits newToOld(final Bits liveDocs) { |
| - if (liveDocs == null) { |
| - return null; |
| - } |
| - return new Bits() { |
| - |
| - @Override |
| - public boolean get(int index) { |
| - return liveDocs.get(docMap.oldToNew(index)); |
| - } |
| - |
| - @Override |
| - public int length() { |
| - return liveDocs.length(); |
| - } |
| - |
| - }; |
| - } |
| - |
| - @Override |
| - public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException { |
| - |
| - if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) { |
| - final PostingsEnum inReuse; |
| - final SortingPostingsEnum wrapReuse; |
| - if (reuse != null && reuse instanceof SortingPostingsEnum) { |
| - // if we're asked to reuse the given DocsEnum and it is Sorting, return |
| - // the wrapped one, since some Codecs expect it. |
| - wrapReuse = (SortingPostingsEnum) reuse; |
| - inReuse = wrapReuse.getWrapped(); |
| - } else { |
| - wrapReuse = null; |
| - inReuse = reuse; |
| - } |
| - |
| - final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags); |
| - // we ignore the fact that offsets may be stored but not asked for, |
| - // since this code is expected to be used during addIndexes which will |
| - // ask for everything. if that assumption changes in the future, we can |
| - // factor in whether 'flags' says offsets are not required. |
| - final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| - return new SortingPostingsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets); |
| - } |
| - |
| - final PostingsEnum inReuse; |
| - final SortingDocsEnum wrapReuse; |
| - if (reuse != null && reuse instanceof SortingDocsEnum) { |
| - // if we're asked to reuse the given DocsEnum and it is Sorting, return |
| - // the wrapped one, since some Codecs expect it. |
| - wrapReuse = (SortingDocsEnum) reuse; |
| - inReuse = wrapReuse.getWrapped(); |
| - } else { |
| - wrapReuse = null; |
| - inReuse = reuse; |
| - } |
| - |
| - final PostingsEnum inDocs = in.postings(inReuse, flags); |
| - final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS); |
| - return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap); |
| - } |
| - |
| - } |
| - |
| - private static class SortingBinaryDocValues extends BinaryDocValues { |
| - |
| - private final BinaryDocValues in; |
| - private final Sorter.DocMap docMap; |
| - |
| - SortingBinaryDocValues(BinaryDocValues in, Sorter.DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public BytesRef get(int docID) { |
| - return in.get(docMap.newToOld(docID)); |
| - } |
| - } |
| - |
| - private static class SortingNumericDocValues extends NumericDocValues { |
| - |
| - private final NumericDocValues in; |
| - private final Sorter.DocMap docMap; |
| - |
| - public SortingNumericDocValues(final NumericDocValues in, Sorter.DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public long get(int docID) { |
| - return in.get(docMap.newToOld(docID)); |
| - } |
| - } |
| - |
| - private static class SortingSortedNumericDocValues extends SortedNumericDocValues { |
| - |
| - private final SortedNumericDocValues in; |
| - private final Sorter.DocMap docMap; |
| - |
| - SortingSortedNumericDocValues(SortedNumericDocValues in, DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public int count() { |
| - return in.count(); |
| - } |
| - |
| - @Override |
| - public void setDocument(int doc) { |
| - in.setDocument(docMap.newToOld(doc)); |
| - } |
| - |
| - @Override |
| - public long valueAt(int index) { |
| - return in.valueAt(index); |
| - } |
| - } |
| - |
| - private static class SortingBits implements Bits { |
| - |
| - private final Bits in; |
| - private final Sorter.DocMap docMap; |
| - |
| - public SortingBits(final Bits in, Sorter.DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public boolean get(int index) { |
| - return in.get(docMap.newToOld(index)); |
| - } |
| - |
| - @Override |
| - public int length() { |
| - return in.length(); |
| - } |
| - } |
| - |
| - private static class SortingPointValues extends PointValues { |
| - |
| - private final PointValues in; |
| - private final Sorter.DocMap docMap; |
| - |
| - public SortingPointValues(final PointValues in, Sorter.DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { |
| - in.intersect(fieldName, |
| - new IntersectVisitor() { |
| - @Override |
| - public void visit(int docID) throws IOException { |
| - visitor.visit(docMap.oldToNew(docID)); |
| - } |
| - |
| - @Override |
| - public void visit(int docID, byte[] packedValue) throws IOException { |
| - visitor.visit(docMap.oldToNew(docID), packedValue); |
| - } |
| - |
| - @Override |
| - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| - return visitor.compare(minPackedValue, maxPackedValue); |
| - } |
| - }); |
| - } |
| - |
| - @Override |
| - public byte[] getMinPackedValue(String fieldName) throws IOException { |
| - return in.getMinPackedValue(fieldName); |
| - } |
| - |
| - @Override |
| - public byte[] getMaxPackedValue(String fieldName) throws IOException { |
| - return in.getMaxPackedValue(fieldName); |
| - } |
| - |
| - @Override |
| - public int getNumDimensions(String fieldName) throws IOException { |
| - return in.getNumDimensions(fieldName); |
| - } |
| - |
| - @Override |
| - public int getBytesPerDimension(String fieldName) throws IOException { |
| - return in.getBytesPerDimension(fieldName); |
| - } |
| - |
| - @Override |
| - public long size(String fieldName) { |
| - return in.size(fieldName); |
| - } |
| - |
| - @Override |
| - public int getDocCount(String fieldName) { |
| - return in.getDocCount(fieldName); |
| - } |
| - } |
| - |
| - private static class SortingSortedDocValues extends SortedDocValues { |
| - |
| - private final SortedDocValues in; |
| - private final Sorter.DocMap docMap; |
| - |
| - SortingSortedDocValues(SortedDocValues in, Sorter.DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public int getOrd(int docID) { |
| - return in.getOrd(docMap.newToOld(docID)); |
| - } |
| - |
| - @Override |
| - public BytesRef lookupOrd(int ord) { |
| - return in.lookupOrd(ord); |
| - } |
| - |
| - @Override |
| - public int getValueCount() { |
| - return in.getValueCount(); |
| - } |
| - |
| - @Override |
| - public BytesRef get(int docID) { |
| - return in.get(docMap.newToOld(docID)); |
| - } |
| - |
| - @Override |
| - public int lookupTerm(BytesRef key) { |
| - return in.lookupTerm(key); |
| - } |
| - } |
| - |
| - private static class SortingSortedSetDocValues extends SortedSetDocValues { |
| - |
| - private final SortedSetDocValues in; |
| - private final Sorter.DocMap docMap; |
| - |
| - SortingSortedSetDocValues(SortedSetDocValues in, Sorter.DocMap docMap) { |
| - this.in = in; |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public long nextOrd() { |
| - return in.nextOrd(); |
| - } |
| - |
| - @Override |
| - public void setDocument(int docID) { |
| - in.setDocument(docMap.newToOld(docID)); |
| - } |
| - |
| - @Override |
| - public BytesRef lookupOrd(long ord) { |
| - return in.lookupOrd(ord); |
| - } |
| - |
| - @Override |
| - public long getValueCount() { |
| - return in.getValueCount(); |
| - } |
| - |
| - @Override |
| - public long lookupTerm(BytesRef key) { |
| - return in.lookupTerm(key); |
| - } |
| - } |
| - |
| - static class SortingDocsEnum extends FilterPostingsEnum { |
| - |
| - private static final class DocFreqSorter extends TimSorter { |
| - |
| - private int[] docs; |
| - private int[] freqs; |
| - private final int[] tmpDocs; |
| - private int[] tmpFreqs; |
| - |
| - public DocFreqSorter(int maxDoc) { |
| - super(maxDoc / 64); |
| - this.tmpDocs = new int[maxDoc / 64]; |
| - } |
| - |
| - public void reset(int[] docs, int[] freqs) { |
| - this.docs = docs; |
| - this.freqs = freqs; |
| - if (freqs != null && tmpFreqs == null) { |
| - tmpFreqs = new int[tmpDocs.length]; |
| - } |
| - } |
| - |
| - @Override |
| - protected int compare(int i, int j) { |
| - return docs[i] - docs[j]; |
| - } |
| - |
| - @Override |
| - protected void swap(int i, int j) { |
| - int tmpDoc = docs[i]; |
| - docs[i] = docs[j]; |
| - docs[j] = tmpDoc; |
| - |
| - if (freqs != null) { |
| - int tmpFreq = freqs[i]; |
| - freqs[i] = freqs[j]; |
| - freqs[j] = tmpFreq; |
| - } |
| - } |
| - |
| - @Override |
| - protected void copy(int src, int dest) { |
| - docs[dest] = docs[src]; |
| - if (freqs != null) { |
| - freqs[dest] = freqs[src]; |
| - } |
| - } |
| - |
| - @Override |
| - protected void save(int i, int len) { |
| - System.arraycopy(docs, i, tmpDocs, 0, len); |
| - if (freqs != null) { |
| - System.arraycopy(freqs, i, tmpFreqs, 0, len); |
| - } |
| - } |
| - |
| - @Override |
| - protected void restore(int i, int j) { |
| - docs[j] = tmpDocs[i]; |
| - if (freqs != null) { |
| - freqs[j] = tmpFreqs[i]; |
| - } |
| - } |
| - |
| - @Override |
| - protected int compareSaved(int i, int j) { |
| - return tmpDocs[i] - docs[j]; |
| - } |
| - } |
| - |
| - private final int maxDoc; |
| - private final DocFreqSorter sorter; |
| - private int[] docs; |
| - private int[] freqs; |
| - private int docIt = -1; |
| - private final int upto; |
| - private final boolean withFreqs; |
| - |
| - SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException { |
| - super(in); |
| - this.maxDoc = maxDoc; |
| - this.withFreqs = withFreqs; |
| - if (reuse != null) { |
| - if (reuse.maxDoc == maxDoc) { |
| - sorter = reuse.sorter; |
| - } else { |
| - sorter = new DocFreqSorter(maxDoc); |
| - } |
| - docs = reuse.docs; |
| - freqs = reuse.freqs; // maybe null |
| - } else { |
| - docs = new int[64]; |
| - sorter = new DocFreqSorter(maxDoc); |
| - } |
| - docIt = -1; |
| - int i = 0; |
| - int doc; |
| - if (withFreqs) { |
| - if (freqs == null || freqs.length < docs.length) { |
| - freqs = new int[docs.length]; |
| - } |
| - while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ |
| - if (i >= docs.length) { |
| - docs = ArrayUtil.grow(docs, docs.length + 1); |
| - freqs = ArrayUtil.grow(freqs, freqs.length + 1); |
| - } |
| - docs[i] = docMap.oldToNew(doc); |
| - freqs[i] = in.freq(); |
| - ++i; |
| - } |
| - } else { |
| - freqs = null; |
| - while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ |
| - if (i >= docs.length) { |
| - docs = ArrayUtil.grow(docs, docs.length + 1); |
| - } |
| - docs[i++] = docMap.oldToNew(doc); |
| - } |
| - } |
| - // TimSort can save much time compared to other sorts in case of |
| - // reverse sorting, or when sorting a concatenation of sorted readers |
| - sorter.reset(docs, freqs); |
| - sorter.sort(0, i); |
| - upto = i; |
| - } |
| - |
| - // for testing |
| - boolean reused(PostingsEnum other) { |
| - if (other == null || !(other instanceof SortingDocsEnum)) { |
| - return false; |
| - } |
| - return docs == ((SortingDocsEnum) other).docs; |
| - } |
| - |
| - @Override |
| - public int advance(final int target) throws IOException { |
| - // need to support it for checkIndex, but in practice it won't be called, so |
| - // don't bother to implement efficiently for now. |
| - return slowAdvance(target); |
| - } |
| - |
| - @Override |
| - public int docID() { |
| - return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt]; |
| - } |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - return withFreqs && docIt < upto ? freqs[docIt] : 1; |
| - } |
| - |
| - @Override |
| - public int nextDoc() throws IOException { |
| - if (++docIt >= upto) return NO_MORE_DOCS; |
| - return docs[docIt]; |
| - } |
| - |
| - /** Returns the wrapped {@link PostingsEnum}. */ |
| - PostingsEnum getWrapped() { |
| - return in; |
| - } |
| - |
| - // we buffer up docs/freqs only, don't forward any positions requests to underlying enum |
| - |
| - @Override |
| - public int nextPosition() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public int startOffset() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public int endOffset() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public BytesRef getPayload() throws IOException { |
| - return null; |
| - } |
| - } |
| - |
| - static class SortingPostingsEnum extends FilterPostingsEnum { |
| - |
| - /** |
| - * A {@link TimSorter} which sorts two parallel arrays of doc IDs and |
| - * offsets in one go. Everytime a doc ID is 'swapped', its corresponding offset |
| - * is swapped too. |
| - */ |
| - private static final class DocOffsetSorter extends TimSorter { |
| - |
| - private int[] docs; |
| - private long[] offsets; |
| - private final int[] tmpDocs; |
| - private final long[] tmpOffsets; |
| - |
| - public DocOffsetSorter(int maxDoc) { |
| - super(maxDoc / 64); |
| - this.tmpDocs = new int[maxDoc / 64]; |
| - this.tmpOffsets = new long[maxDoc / 64]; |
| - } |
| - |
| - public void reset(int[] docs, long[] offsets) { |
| - this.docs = docs; |
| - this.offsets = offsets; |
| - } |
| - |
| - @Override |
| - protected int compare(int i, int j) { |
| - return docs[i] - docs[j]; |
| - } |
| - |
| - @Override |
| - protected void swap(int i, int j) { |
| - int tmpDoc = docs[i]; |
| - docs[i] = docs[j]; |
| - docs[j] = tmpDoc; |
| - |
| - long tmpOffset = offsets[i]; |
| - offsets[i] = offsets[j]; |
| - offsets[j] = tmpOffset; |
| - } |
| - |
| - @Override |
| - protected void copy(int src, int dest) { |
| - docs[dest] = docs[src]; |
| - offsets[dest] = offsets[src]; |
| - } |
| - |
| - @Override |
| - protected void save(int i, int len) { |
| - System.arraycopy(docs, i, tmpDocs, 0, len); |
| - System.arraycopy(offsets, i, tmpOffsets, 0, len); |
| - } |
| - |
| - @Override |
| - protected void restore(int i, int j) { |
| - docs[j] = tmpDocs[i]; |
| - offsets[j] = tmpOffsets[i]; |
| - } |
| - |
| - @Override |
| - protected int compareSaved(int i, int j) { |
| - return tmpDocs[i] - docs[j]; |
| - } |
| - } |
| - |
| - private final int maxDoc; |
| - private final DocOffsetSorter sorter; |
| - private int[] docs; |
| - private long[] offsets; |
| - private final int upto; |
| - |
| - private final IndexInput postingInput; |
| - private final boolean storeOffsets; |
| - |
| - private int docIt = -1; |
| - private int pos; |
| - private int startOffset = -1; |
| - private int endOffset = -1; |
| - private final BytesRef payload; |
| - private int currFreq; |
| - |
| - private final RAMFile file; |
| - |
| - SortingPostingsEnum(int maxDoc, SortingPostingsEnum reuse, final PostingsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { |
| - super(in); |
| - this.maxDoc = maxDoc; |
| - this.storeOffsets = storeOffsets; |
| - if (reuse != null) { |
| - docs = reuse.docs; |
| - offsets = reuse.offsets; |
| - payload = reuse.payload; |
| - file = reuse.file; |
| - if (reuse.maxDoc == maxDoc) { |
| - sorter = reuse.sorter; |
| - } else { |
| - sorter = new DocOffsetSorter(maxDoc); |
| - } |
| - } else { |
| - docs = new int[32]; |
| - offsets = new long[32]; |
| - payload = new BytesRef(32); |
| - file = new RAMFile(); |
| - sorter = new DocOffsetSorter(maxDoc); |
| - } |
| - final IndexOutput out = new RAMOutputStream(file, false); |
| - int doc; |
| - int i = 0; |
| - while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { |
| - if (i == docs.length) { |
| - final int newLength = ArrayUtil.oversize(i + 1, 4); |
| - docs = Arrays.copyOf(docs, newLength); |
| - offsets = Arrays.copyOf(offsets, newLength); |
| - } |
| - docs[i] = docMap.oldToNew(doc); |
| - offsets[i] = out.getFilePointer(); |
| - addPositions(in, out); |
| - i++; |
| - } |
| - upto = i; |
| - sorter.reset(docs, offsets); |
| - sorter.sort(0, upto); |
| - out.close(); |
| - this.postingInput = new RAMInputStream("", file); |
| - } |
| - |
| - // for testing |
| - boolean reused(PostingsEnum other) { |
| - if (other == null || !(other instanceof SortingPostingsEnum)) { |
| - return false; |
| - } |
| - return docs == ((SortingPostingsEnum) other).docs; |
| - } |
| - |
| - private void addPositions(final PostingsEnum in, final IndexOutput out) throws IOException { |
| - int freq = in.freq(); |
| - out.writeVInt(freq); |
| - int previousPosition = 0; |
| - int previousEndOffset = 0; |
| - for (int i = 0; i < freq; i++) { |
| - final int pos = in.nextPosition(); |
| - final BytesRef payload = in.getPayload(); |
| - // The low-order bit of token is set only if there is a payload, the |
| - // previous bits are the delta-encoded position. |
| - final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1); |
| - out.writeVInt(token); |
| - previousPosition = pos; |
| - if (storeOffsets) { // don't encode offsets if they are not stored |
| - final int startOffset = in.startOffset(); |
| - final int endOffset = in.endOffset(); |
| - out.writeVInt(startOffset - previousEndOffset); |
| - out.writeVInt(endOffset - startOffset); |
| - previousEndOffset = endOffset; |
| - } |
| - if (payload != null) { |
| - out.writeVInt(payload.length); |
| - out.writeBytes(payload.bytes, payload.offset, payload.length); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public int advance(final int target) throws IOException { |
| - // need to support it for checkIndex, but in practice it won't be called, so |
| - // don't bother to implement efficiently for now. |
| - return slowAdvance(target); |
| - } |
| - |
| - @Override |
| - public int docID() { |
| - return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt]; |
| - } |
| - |
| - @Override |
| - public int endOffset() throws IOException { |
| - return endOffset; |
| - } |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - return currFreq; |
| - } |
| - |
| - @Override |
| - public BytesRef getPayload() throws IOException { |
| - return payload.length == 0 ? null : payload; |
| - } |
| - |
| - @Override |
| - public int nextDoc() throws IOException { |
| - if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS; |
| - postingInput.seek(offsets[docIt]); |
| - currFreq = postingInput.readVInt(); |
| - // reset variables used in nextPosition |
| - pos = 0; |
| - endOffset = 0; |
| - return docs[docIt]; |
| - } |
| - |
| - @Override |
| - public int nextPosition() throws IOException { |
| - final int token = postingInput.readVInt(); |
| - pos += token >>> 1; |
| - if (storeOffsets) { |
| - startOffset = endOffset + postingInput.readVInt(); |
| - endOffset = startOffset + postingInput.readVInt(); |
| - } |
| - if ((token & 1) != 0) { |
| - payload.offset = 0; |
| - payload.length = postingInput.readVInt(); |
| - if (payload.length > payload.bytes.length) { |
| - payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)]; |
| - } |
| - postingInput.readBytes(payload.bytes, 0, payload.length); |
| - } else { |
| - payload.length = 0; |
| - } |
| - return pos; |
| - } |
| - |
| - @Override |
| - public int startOffset() throws IOException { |
| - return startOffset; |
| - } |
| - |
| - /** Returns the wrapped {@link PostingsEnum}. */ |
| - PostingsEnum getWrapped() { |
| - return in; |
| - } |
| - } |
| - |
| - /** Return a sorted view of <code>reader</code> according to the order |
| - * defined by <code>sort</code>. If the reader is already sorted, this |
| - * method might return the reader as-is. */ |
| - public static LeafReader wrap(LeafReader reader, Sort sort) throws IOException { |
| - return wrap(reader, new Sorter(sort).sort(reader)); |
| - } |
| - |
| - /** Expert: same as {@link #wrap(org.apache.lucene.index.LeafReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */ |
| - static LeafReader wrap(LeafReader reader, Sorter.DocMap docMap) { |
| - if (docMap == null) { |
| - // the reader is already sorted |
| - return reader; |
| - } |
| - if (reader.maxDoc() != docMap.size()) { |
| - throw new IllegalArgumentException("reader.maxDoc() should be equal to docMap.size(), got" + reader.maxDoc() + " != " + docMap.size()); |
| - } |
| - assert Sorter.isConsistent(docMap); |
| - return new SortingLeafReader(reader, docMap); |
| - } |
| - |
| - final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods |
| - |
| - private SortingLeafReader(final LeafReader in, final Sorter.DocMap docMap) { |
| - super(in); |
| - this.docMap = docMap; |
| - } |
| - |
| - @Override |
| - public void document(final int docID, final StoredFieldVisitor visitor) throws IOException { |
| - in.document(docMap.newToOld(docID), visitor); |
| - } |
| - |
| - @Override |
| - public Fields fields() throws IOException { |
| - return new SortingFields(in.fields(), in.getFieldInfos(), docMap); |
| - } |
| - |
| - @Override |
| - public BinaryDocValues getBinaryDocValues(String field) throws IOException { |
| - BinaryDocValues oldDocValues = in.getBinaryDocValues(field); |
| - if (oldDocValues == null) { |
| - return null; |
| - } else { |
| - return new SortingBinaryDocValues(oldDocValues, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public Bits getLiveDocs() { |
| - final Bits inLiveDocs = in.getLiveDocs(); |
| - if (inLiveDocs == null) { |
| - return null; |
| - } else { |
| - return new SortingBits(inLiveDocs, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public PointValues getPointValues() { |
| - final PointValues inPointValues = in.getPointValues(); |
| - if (inPointValues == null) { |
| - return null; |
| - } else { |
| - // TODO: this is untested! |
| - return new SortingPointValues(inPointValues, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public NumericDocValues getNormValues(String field) throws IOException { |
| - final NumericDocValues norm = in.getNormValues(field); |
| - if (norm == null) { |
| - return null; |
| - } else { |
| - return new SortingNumericDocValues(norm, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public NumericDocValues getNumericDocValues(String field) throws IOException { |
| - final NumericDocValues oldDocValues = in.getNumericDocValues(field); |
| - if (oldDocValues == null) return null; |
| - return new SortingNumericDocValues(oldDocValues, docMap); |
| - } |
| - |
| - @Override |
| - public SortedNumericDocValues getSortedNumericDocValues(String field) |
| - throws IOException { |
| - final SortedNumericDocValues oldDocValues = in.getSortedNumericDocValues(field); |
| - if (oldDocValues == null) { |
| - return null; |
| - } else { |
| - return new SortingSortedNumericDocValues(oldDocValues, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public SortedDocValues getSortedDocValues(String field) throws IOException { |
| - SortedDocValues sortedDV = in.getSortedDocValues(field); |
| - if (sortedDV == null) { |
| - return null; |
| - } else { |
| - return new SortingSortedDocValues(sortedDV, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { |
| - SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field); |
| - if (sortedSetDV == null) { |
| - return null; |
| - } else { |
| - return new SortingSortedSetDocValues(sortedSetDV, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public Bits getDocsWithField(String field) throws IOException { |
| - Bits bits = in.getDocsWithField(field); |
| - if (bits == null || bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) { |
| - return bits; |
| - } else { |
| - return new SortingBits(bits, docMap); |
| - } |
| - } |
| - |
| - @Override |
| - public Fields getTermVectors(final int docID) throws IOException { |
| - return in.getTermVectors(docMap.newToOld(docID)); |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return "SortingLeafReader(" + in + ")"; |
| - } |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,264 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Collections; |
| -import java.util.List; |
| -import java.util.Map; |
| - |
| -import org.apache.lucene.index.LeafReader; |
| -import org.apache.lucene.index.IndexWriter; |
| -import org.apache.lucene.index.MergePolicy; |
| -import org.apache.lucene.index.MergeState; |
| -import org.apache.lucene.index.MergeTrigger; |
| -import org.apache.lucene.index.MultiReader; |
| -import org.apache.lucene.index.SegmentCommitInfo; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.SegmentInfos; |
| -import org.apache.lucene.index.SegmentReader; |
| -import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.InfoStream; |
| -import org.apache.lucene.util.packed.PackedInts; |
| -import org.apache.lucene.util.packed.PackedLongValues; |
| - |
| -/** A {@link MergePolicy} that reorders documents according to a {@link Sort} |
| - * before merging them. As a consequence, all segments resulting from a merge |
| - * will be sorted while segments resulting from a flush will be in the order |
| - * in which documents have been added. |
| - * <p><b>NOTE</b>: Never use this policy if you rely on |
| - * {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments} |
| - * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. |
| - * <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s |
| - * so that the order of segments is predictable. For example, using |
| - * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make |
| - * the order of documents in a segment depend on the number of times the segment |
| - * has been merged. |
| - * @lucene.experimental */ |
| -public final class SortingMergePolicy extends MergePolicyWrapper { |
| - |
| - /** |
| - * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that |
| - * this segment is sorted. |
| - */ |
| - public static final String SORTER_ID_PROP = "sorter"; |
| - |
| - class SortingOneMerge extends OneMerge { |
| - |
| - List<CodecReader> unsortedReaders; |
| - Sorter.DocMap docMap; |
| - LeafReader sortedView; |
| - final InfoStream infoStream; |
| - |
| - SortingOneMerge(List<SegmentCommitInfo> segments, InfoStream infoStream) { |
| - super(segments); |
| - this.infoStream = infoStream; |
| - } |
| - |
| - @Override |
| - public List<CodecReader> getMergeReaders() throws IOException { |
| - if (unsortedReaders == null) { |
| - unsortedReaders = super.getMergeReaders(); |
| - if (infoStream.isEnabled("SMP")) { |
| - infoStream.message("SMP", "sorting " + unsortedReaders); |
| - for (LeafReader leaf : unsortedReaders) { |
| - String sortDescription = getSortDescription(leaf); |
| - if (sortDescription == null) { |
| - sortDescription = "not sorted"; |
| - } |
| - infoStream.message("SMP", "seg=" + leaf + " " + sortDescription); |
| - } |
| - } |
| - // wrap readers, to be optimal for merge; |
| - List<LeafReader> wrapped = new ArrayList<>(unsortedReaders.size()); |
| - for (LeafReader leaf : unsortedReaders) { |
| - if (leaf instanceof SegmentReader) { |
| - leaf = new MergeReaderWrapper((SegmentReader)leaf); |
| - } |
| - wrapped.add(leaf); |
| - } |
| - final LeafReader atomicView; |
| - if (wrapped.size() == 1) { |
| - atomicView = wrapped.get(0); |
| - } else { |
| - final CompositeReader multiReader = new MultiReader(wrapped.toArray(new LeafReader[wrapped.size()])); |
| - atomicView = new SlowCompositeReaderWrapper(multiReader, true); |
| - } |
| - docMap = sorter.sort(atomicView); |
| - sortedView = SortingLeafReader.wrap(atomicView, docMap); |
| - } |
| - // a null doc map means that the readers are already sorted |
| - if (docMap == null) { |
| - if (infoStream.isEnabled("SMP")) { |
| - infoStream.message("SMP", "readers already sorted, omitting sort"); |
| - } |
| - return unsortedReaders; |
| - } else { |
| - if (infoStream.isEnabled("SMP")) { |
| - infoStream.message("SMP", "sorting readers by " + sort); |
| - } |
| - return Collections.singletonList(SlowCodecReaderWrapper.wrap(sortedView)); |
| - } |
| - } |
| - |
| - @Override |
| - public void setMergeInfo(SegmentCommitInfo info) { |
| - Map<String,String> diagnostics = info.info.getDiagnostics(); |
| - diagnostics.put(SORTER_ID_PROP, sorter.getID()); |
| - super.setMergeInfo(info); |
| - } |
| - |
| - private PackedLongValues getDeletes(List<CodecReader> readers) { |
| - PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); |
| - int deleteCount = 0; |
| - for (LeafReader reader : readers) { |
| - final int maxDoc = reader.maxDoc(); |
| - final Bits liveDocs = reader.getLiveDocs(); |
| - for (int i = 0; i < maxDoc; ++i) { |
| - if (liveDocs != null && !liveDocs.get(i)) { |
| - ++deleteCount; |
| - } else { |
| - deletes.add(deleteCount); |
| - } |
| - } |
| - } |
| - return deletes.build(); |
| - } |
| - |
| - @Override |
| - public MergePolicy.DocMap getDocMap(final MergeState mergeState) { |
| - if (unsortedReaders == null) { |
| - throw new IllegalStateException(); |
| - } |
| - if (docMap == null) { |
| - return super.getDocMap(mergeState); |
| - } |
| - assert mergeState.docMaps.length == 1; // we returned a singleton reader |
| - final PackedLongValues deletes = getDeletes(unsortedReaders); |
| - return new MergePolicy.DocMap() { |
| - @Override |
| - public int map(int old) { |
| - final int oldWithDeletes = old + (int) deletes.get(old); |
| - final int newWithDeletes = docMap.oldToNew(oldWithDeletes); |
| - return mergeState.docMaps[0].get(newWithDeletes); |
| - } |
| - }; |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return "SortingMergePolicy.SortingOneMerge(segments=" + segString() + " sort=" + sort + ")"; |
| - } |
| - } |
| - |
| - class SortingMergeSpecification extends MergeSpecification { |
| - final InfoStream infoStream; |
| - |
| - SortingMergeSpecification(InfoStream infoStream) { |
| - this.infoStream = infoStream; |
| - } |
| - |
| - @Override |
| - public void add(OneMerge merge) { |
| - super.add(new SortingOneMerge(merge.segments, infoStream)); |
| - } |
| - |
| - @Override |
| - public String segString(Directory dir) { |
| - return "SortingMergeSpec(" + super.segString(dir) + ", sorter=" + sorter + ")"; |
| - } |
| - |
| - } |
| - |
| - /** Returns {@code true} if the given {@code reader} is sorted by the |
| - * {@code sort} given. Typically the given {@code sort} would be the |
| - * {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */ |
| - public static boolean isSorted(LeafReader reader, Sort sort) { |
| - String description = getSortDescription(reader); |
| - if (description != null && description.equals(sort.toString())) { |
| - return true; |
| - } |
| - return false; |
| - } |
| - |
| - private static String getSortDescription(LeafReader reader) { |
| - if (reader instanceof SegmentReader) { |
| - final SegmentReader segReader = (SegmentReader) reader; |
| - final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics(); |
| - if (diagnostics != null) { |
| - return diagnostics.get(SORTER_ID_PROP); |
| - } |
| - } else if (reader instanceof FilterLeafReader) { |
| - return getSortDescription(FilterLeafReader.unwrap(reader)); |
| - } |
| - return null; |
| - } |
| - |
| - private MergeSpecification sortedMergeSpecification(MergeSpecification specification, InfoStream infoStream) { |
| - if (specification == null) { |
| - return null; |
| - } |
| - MergeSpecification sortingSpec = new SortingMergeSpecification(infoStream); |
| - for (OneMerge merge : specification.merges) { |
| - sortingSpec.add(merge); |
| - } |
| - return sortingSpec; |
| - } |
| - |
| - final Sorter sorter; |
| - final Sort sort; |
| - |
| - /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ |
| - public SortingMergePolicy(MergePolicy in, Sort sort) { |
| - super(in); |
| - this.sorter = new Sorter(sort); |
| - this.sort = sort; |
| - } |
| - |
| - /** Return the {@link Sort} order that is used to sort segments when merging. */ |
| - public Sort getSort() { |
| - return sort; |
| - } |
| - |
| - @Override |
| - public MergeSpecification findMerges(MergeTrigger mergeTrigger, |
| - SegmentInfos segmentInfos, IndexWriter writer) throws IOException { |
| - return sortedMergeSpecification(in.findMerges(mergeTrigger, segmentInfos, writer), writer.infoStream); |
| - } |
| - |
| - @Override |
| - public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, |
| - int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) |
| - throws IOException { |
| - return sortedMergeSpecification(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer), writer.infoStream); |
| - } |
| - |
| - @Override |
| - public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) |
| - throws IOException { |
| - return sortedMergeSpecification(in.findForcedDeletesMerges(segmentInfos, writer), writer.infoStream); |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return "SortingMergePolicy(" + in + ", sorter=" + sorter + ")"; |
| - } |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java indexsort/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,224 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.search; |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.ReaderUtil; |
| -import org.apache.lucene.index.SortingMergePolicy; |
| -import org.apache.lucene.util.BitSet; |
| - |
| -/** |
| - * Helper class to sort readers that contain blocks of documents. |
| - * <p> |
| - * Note that this class is intended to used with {@link SortingMergePolicy}, |
| - * and for other purposes has some limitations: |
| - * <ul> |
| - * <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter} |
| - * <li>Filling sort field values is not yet supported. |
| - * </ul> |
| - * @lucene.experimental |
| - */ |
| -// TODO: can/should we clean this thing up (e.g. return a proper sort value) |
| -// and move to the join/ module? |
| -public class BlockJoinComparatorSource extends FieldComparatorSource { |
| - final Query parentsFilter; |
| - final Sort parentSort; |
| - final Sort childSort; |
| - |
| - /** |
| - * Create a new BlockJoinComparatorSource, sorting only blocks of documents |
| - * with {@code parentSort} and not reordering children with a block. |
| - * |
| - * @param parentsFilter Filter identifying parent documents |
| - * @param parentSort Sort for parent documents |
| - */ |
| - public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort) { |
| - this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC)); |
| - } |
| - |
| - /** |
| - * Create a new BlockJoinComparatorSource, specifying the sort order for both |
| - * blocks of documents and children within a block. |
| - * |
| - * @param parentsFilter Filter identifying parent documents |
| - * @param parentSort Sort for parent documents |
| - * @param childSort Sort for child documents in the same block |
| - */ |
| - public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort, Sort childSort) { |
| - this.parentsFilter = parentsFilter; |
| - this.parentSort = parentSort; |
| - this.childSort = childSort; |
| - } |
| - |
| - @Override |
| - @SuppressWarnings({"unchecked", "rawtypes"}) |
| - public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { |
| - // we keep parallel slots: the parent ids and the child ids |
| - final int parentSlots[] = new int[numHits]; |
| - final int childSlots[] = new int[numHits]; |
| - |
| - SortField parentFields[] = parentSort.getSort(); |
| - final int parentReverseMul[] = new int[parentFields.length]; |
| - final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length]; |
| - for (int i = 0; i < parentFields.length; i++) { |
| - parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1; |
| - parentComparators[i] = parentFields[i].getComparator(1, i); |
| - } |
| - |
| - SortField childFields[] = childSort.getSort(); |
| - final int childReverseMul[] = new int[childFields.length]; |
| - final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length]; |
| - for (int i = 0; i < childFields.length; i++) { |
| - childReverseMul[i] = childFields[i].getReverse() ? -1 : 1; |
| - childComparators[i] = childFields[i].getComparator(1, i); |
| - } |
| - |
| - // NOTE: we could return parent ID as value but really our sort "value" is more complex... |
| - // So we throw UOE for now. At the moment you really should only use this at indexing time. |
| - return new FieldComparator<Integer>() { |
| - int bottomParent; |
| - int bottomChild; |
| - BitSet parentBits; |
| - LeafFieldComparator[] parentLeafComparators; |
| - LeafFieldComparator[] childLeafComparators; |
| - |
| - @Override |
| - public int compare(int slot1, int slot2) { |
| - try { |
| - return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]); |
| - } catch (IOException e) { |
| - throw new RuntimeException(e); |
| - } |
| - } |
| - |
| - @Override |
| - public void setTopValue(Integer value) { |
| - // we dont have enough information (the docid is needed) |
| - throw new UnsupportedOperationException("this comparator cannot be used with deep paging"); |
| - } |
| - |
| - @Override |
| - public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { |
| - if (parentBits != null) { |
| - throw new IllegalStateException("This comparator can only be used on a single segment"); |
| - } |
| - IndexSearcher searcher = new IndexSearcher(ReaderUtil.getTopLevelContext(context)); |
| - searcher.setQueryCache(null); |
| - final Weight weight = searcher.createNormalizedWeight(parentsFilter, false); |
| - final Scorer parents = weight.scorer(context); |
| - if (parents == null) { |
| - throw new IllegalStateException("LeafReader " + context.reader() + " contains no parents!"); |
| - } |
| - parentBits = BitSet.of(parents.iterator(), context.reader().maxDoc()); |
| - parentLeafComparators = new LeafFieldComparator[parentComparators.length]; |
| - for (int i = 0; i < parentComparators.length; i++) { |
| - parentLeafComparators[i] = parentComparators[i].getLeafComparator(context); |
| - } |
| - childLeafComparators = new LeafFieldComparator[childComparators.length]; |
| - for (int i = 0; i < childComparators.length; i++) { |
| - childLeafComparators[i] = childComparators[i].getLeafComparator(context); |
| - } |
| - |
| - return new LeafFieldComparator() { |
| - |
| - @Override |
| - public int compareBottom(int doc) throws IOException { |
| - return compare(bottomChild, bottomParent, doc, parent(doc)); |
| - } |
| - |
| - @Override |
| - public int compareTop(int doc) throws IOException { |
| - // we dont have enough information (the docid is needed) |
| - throw new UnsupportedOperationException("this comparator cannot be used with deep paging"); |
| - } |
| - |
| - @Override |
| - public void copy(int slot, int doc) throws IOException { |
| - childSlots[slot] = doc; |
| - parentSlots[slot] = parent(doc); |
| - } |
| - |
| - @Override |
| - public void setBottom(int slot) { |
| - bottomParent = parentSlots[slot]; |
| - bottomChild = childSlots[slot]; |
| - } |
| - |
| - @Override |
| - public void setScorer(Scorer scorer) { |
| - for (LeafFieldComparator comp : parentLeafComparators) { |
| - comp.setScorer(scorer); |
| - } |
| - for (LeafFieldComparator comp : childLeafComparators) { |
| - comp.setScorer(scorer); |
| - } |
| - } |
| - |
| - }; |
| - } |
| - |
| - @Override |
| - public Integer value(int slot) { |
| - // really our sort "value" is more complex... |
| - throw new UnsupportedOperationException("filling sort field values is not yet supported"); |
| - } |
| - |
| - int parent(int doc) { |
| - return parentBits.nextSetBit(doc); |
| - } |
| - |
| - int compare(int docID1, int parent1, int docID2, int parent2) throws IOException { |
| - if (parent1 == parent2) { // both are in the same block |
| - if (docID1 == parent1 || docID2 == parent2) { |
| - // keep parents at the end of blocks |
| - return docID1 - docID2; |
| - } else { |
| - return compare(docID1, docID2, childLeafComparators, childReverseMul); |
| - } |
| - } else { |
| - int cmp = compare(parent1, parent2, parentLeafComparators, parentReverseMul); |
| - if (cmp == 0) { |
| - return parent1 - parent2; |
| - } else { |
| - return cmp; |
| - } |
| - } |
| - } |
| - |
| - int compare(int docID1, int docID2, LeafFieldComparator comparators[], int reverseMul[]) throws IOException { |
| - for (int i = 0; i < comparators.length; i++) { |
| - // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, |
| - // the segments are always the same here... |
| - comparators[i].copy(0, docID1); |
| - comparators[i].setBottom(0); |
| - int comp = reverseMul[i] * comparators[i].compareBottom(docID2); |
| - if (comp != 0) { |
| - return comp; |
| - } |
| - } |
| - return 0; // no need to docid tiebreak |
| - } |
| - }; |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")"; |
| - } |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java indexsort/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java |
| --- trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,146 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.search; |
| - |
| -import java.io.IOException; |
| -import java.util.Arrays; |
| -import java.util.concurrent.atomic.AtomicBoolean; |
| - |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.IndexWriter; |
| -import org.apache.lucene.index.SortingMergePolicy; |
| -import org.apache.lucene.search.LeafCollector; |
| -import org.apache.lucene.search.CollectionTerminatedException; |
| -import org.apache.lucene.search.Collector; |
| -import org.apache.lucene.search.FilterLeafCollector; |
| -import org.apache.lucene.search.FilterCollector; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.TopDocsCollector; |
| -import org.apache.lucene.search.TotalHitCountCollector; |
| - |
| -/** |
| - * A {@link Collector} that early terminates collection of documents on a |
| - * per-segment basis, if the segment was sorted according to the given |
| - * {@link Sort}. |
| - * |
| - * <p> |
| - * <b>NOTE:</b> the {@code Collector} detects segments sorted according to a |
| - * {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction |
| - * with a {@link SortingMergePolicy}. Also,it collects up to a specified |
| - * {@code numDocsToCollect} from each segment, and therefore is mostly suitable |
| - * for use in conjunction with collectors such as {@link TopDocsCollector}, and |
| - * not e.g. {@link TotalHitCountCollector}. |
| - * <p> |
| - * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same |
| - * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs} |
| - * will be correct. However the total of {@link TopDocsCollector#getTotalHits() |
| - * hit count} will be underestimated since not all matching documents will have |
| - * been collected. |
| - * <p> |
| - * <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect |
| - * whether a segment was sorted with the same {@code Sort}. This has |
| - * two implications: |
| - * <ul> |
| - * <li>if a custom comparator is not implemented correctly and returns |
| - * different identifiers for equivalent instances, this collector will not |
| - * detect sorted segments,</li> |
| - * <li>if you suddenly change the {@link IndexWriter}'s |
| - * {@code SortingMergePolicy} to sort according to another criterion and if both |
| - * the old and the new {@code Sort}s have the same identifier, this |
| - * {@code Collector} will incorrectly detect sorted segments.</li> |
| - * </ul> |
| - * |
| - * @lucene.experimental |
| - */ |
| -public class EarlyTerminatingSortingCollector extends FilterCollector { |
| - |
| - /** Returns whether collection can be early-terminated if it sorts with the |
| - * provided {@link Sort} and if segments are merged with the provided |
| - * {@link Sort}. */ |
| - public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) { |
| - final SortField[] fields1 = searchSort.getSort(); |
| - final SortField[] fields2 = mergePolicySort.getSort(); |
| - // early termination is possible if fields1 is a prefix of fields2 |
| - if (fields1.length > fields2.length) { |
| - return false; |
| - } |
| - return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length)); |
| - } |
| - |
| - /** Sort used to sort the search results */ |
| - protected final Sort sort; |
| - /** Number of documents to collect in each segment */ |
| - protected final int numDocsToCollect; |
| - private final Sort mergePolicySort; |
| - private final AtomicBoolean terminatedEarly = new AtomicBoolean(false); |
| - |
| - /** |
| - * Create a new {@link EarlyTerminatingSortingCollector} instance. |
| - * |
| - * @param in |
| - * the collector to wrap |
| - * @param sort |
| - * the sort you are sorting the search results on |
| - * @param numDocsToCollect |
| - * the number of documents to collect on each segment. When wrapping |
| - * a {@link TopDocsCollector}, this number should be the number of |
| - * hits. |
| - * @param mergePolicySort |
| - * the sort your {@link SortingMergePolicy} uses |
| - * @throws IllegalArgumentException if the sort order doesn't allow for early |
| - * termination with the given merge policy. |
| - */ |
| - public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) { |
| - super(in); |
| - if (numDocsToCollect <= 0) { |
| - throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect); |
| - } |
| - if (canEarlyTerminate(sort, mergePolicySort) == false) { |
| - throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort); |
| - } |
| - this.sort = sort; |
| - this.numDocsToCollect = numDocsToCollect; |
| - this.mergePolicySort = mergePolicySort; |
| - } |
| - |
| - @Override |
| - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { |
| - if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) { |
| - // segment is sorted, can early-terminate |
| - return new FilterLeafCollector(super.getLeafCollector(context)) { |
| - private int numCollected; |
| - |
| - @Override |
| - public void collect(int doc) throws IOException { |
| - super.collect(doc); |
| - if (++numCollected >= numDocsToCollect) { |
| - terminatedEarly.set(true); |
| - throw new CollectionTerminatedException(); |
| - } |
| - } |
| - |
| - }; |
| - } else { |
| - return super.getLeafCollector(context); |
| - } |
| - } |
| - |
| - public boolean terminatedEarly() { |
| - return terminatedEarly.get(); |
| - } |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java indexsort/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,89 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.util.ArrayList; |
| -import java.util.Collections; |
| -import java.util.List; |
| - |
| -import org.apache.lucene.index.DirectoryReader; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.IndexWriter; |
| -import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.SortField; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.TestUtil; |
| -import org.junit.BeforeClass; |
| - |
| -public class IndexSortingTest extends SorterTestBase { |
| - |
| - private static final Sort[] SORT = new Sort[] { |
| - new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)), |
| - new Sort(new SortField(null, SortField.Type.DOC, true)) |
| - }; |
| - |
| - @BeforeClass |
| - public static void beforeClassSorterUtilTest() throws Exception { |
| - // NOTE: index was created by by super's @BeforeClass |
| - |
| - // only read the values of the undeleted documents, since after addIndexes, |
| - // the deleted ones will be dropped from the index. |
| - Bits liveDocs = unsortedReader.getLiveDocs(); |
| - List<Integer> values = new ArrayList<>(); |
| - for (int i = 0; i < unsortedReader.maxDoc(); i++) { |
| - if (liveDocs == null || liveDocs.get(i)) { |
| - values.add(Integer.valueOf(unsortedReader.document(i).get(ID_FIELD))); |
| - } |
| - } |
| - int idx = random().nextInt(SORT.length); |
| - Sort sorter = SORT[idx]; |
| - if (idx == 1) { // reverse doc sort |
| - Collections.reverse(values); |
| - } else { |
| - Collections.sort(values); |
| - if (random().nextBoolean()) { |
| - sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending |
| - Collections.reverse(values); |
| - } |
| - } |
| - sortedValues = values.toArray(new Integer[values.size()]); |
| - if (VERBOSE) { |
| - System.out.println("sortedValues: " + sortedValues); |
| - System.out.println("Sorter: " + sorter); |
| - } |
| - |
| - Directory target = newDirectory(); |
| - IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(null)); |
| - LeafReader reader = SortingLeafReader.wrap(unsortedReader, sorter); |
| - writer.addIndexes(SlowCodecReaderWrapper.wrap(reader)); |
| - writer.close(); |
| - // NOTE: also closes unsortedReader |
| - reader.close(); |
| - dir.close(); |
| - |
| - // CheckIndex the target directory |
| - dir = target; |
| - TestUtil.checkIndex(dir); |
| - |
| - // set reader for tests |
| - sortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); |
| - assertFalse("index should not have deletions", sortedReader.hasDeletions()); |
| - } |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java indexsort/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java 2016-03-08 17:22:26.836938630 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,405 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Arrays; |
| -import java.util.Collections; |
| -import java.util.List; |
| -import java.util.Random; |
| - |
| -import org.apache.lucene.analysis.MockAnalyzer; |
| -import org.apache.lucene.analysis.TokenStream; |
| -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
| -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| -import org.apache.lucene.document.BinaryDocValuesField; |
| -import org.apache.lucene.document.BinaryPoint; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field.Store; |
| -import org.apache.lucene.document.Field; |
| -import org.apache.lucene.document.FieldType; |
| -import org.apache.lucene.document.NumericDocValuesField; |
| -import org.apache.lucene.document.SortedDocValuesField; |
| -import org.apache.lucene.document.SortedNumericDocValuesField; |
| -import org.apache.lucene.document.SortedSetDocValuesField; |
| -import org.apache.lucene.document.StringField; |
| -import org.apache.lucene.document.TextField; |
| -import org.apache.lucene.index.PointValues.IntersectVisitor; |
| -import org.apache.lucene.index.PointValues.Relation; |
| -import org.apache.lucene.index.SortingLeafReader.SortingDocsEnum; |
| -import org.apache.lucene.index.TermsEnum.SeekStatus; |
| -import org.apache.lucene.search.CollectionStatistics; |
| -import org.apache.lucene.search.DocIdSetIterator; |
| -import org.apache.lucene.search.TermStatistics; |
| -import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.FixedBitSet; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.util.NumericUtils; |
| -import org.apache.lucene.util.TestUtil; |
| -import org.junit.AfterClass; |
| -import org.junit.BeforeClass; |
| - |
| -public abstract class SorterTestBase extends LuceneTestCase { |
| - |
| - static final class NormsSimilarity extends Similarity { |
| - |
| - private final Similarity in; |
| - |
| - public NormsSimilarity(Similarity in) { |
| - this.in = in; |
| - } |
| - |
| - @Override |
| - public long computeNorm(FieldInvertState state) { |
| - if (state.getName().equals(NORMS_FIELD)) { |
| - return Float.floatToIntBits(state.getBoost()); |
| - } else { |
| - return in.computeNorm(state); |
| - } |
| - } |
| - |
| - @Override |
| - public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { |
| - return in.computeWeight(collectionStats, termStats); |
| - } |
| - |
| - @Override |
| - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { |
| - return in.simScorer(weight, context); |
| - } |
| - |
| - } |
| - |
| - static final class PositionsTokenStream extends TokenStream { |
| - |
| - private final CharTermAttribute term; |
| - private final PayloadAttribute payload; |
| - private final OffsetAttribute offset; |
| - |
| - private int pos, off; |
| - |
| - public PositionsTokenStream() { |
| - term = addAttribute(CharTermAttribute.class); |
| - payload = addAttribute(PayloadAttribute.class); |
| - offset = addAttribute(OffsetAttribute.class); |
| - } |
| - |
| - @Override |
| - public boolean incrementToken() throws IOException { |
| - if (pos == 0) { |
| - return false; |
| - } |
| - |
| - clearAttributes(); |
| - term.append(DOC_POSITIONS_TERM); |
| - payload.setPayload(new BytesRef(Integer.toString(pos))); |
| - offset.setOffset(off, off); |
| - --pos; |
| - ++off; |
| - return true; |
| - } |
| - |
| - void setId(int id) { |
| - pos = id / 10 + 1; |
| - off = 0; |
| - } |
| - } |
| - |
| - protected static final String ID_FIELD = "id"; |
| - protected static final String DOCS_ENUM_FIELD = "docs"; |
| - protected static final String DOCS_ENUM_TERM = "$all$"; |
| - protected static final String DOC_POSITIONS_FIELD = "positions"; |
| - protected static final String DOC_POSITIONS_TERM = "$all$"; |
| - protected static final String NUMERIC_DV_FIELD = "numeric"; |
| - protected static final String SORTED_NUMERIC_DV_FIELD = "sorted_numeric"; |
| - protected static final String NORMS_FIELD = "norm"; |
| - protected static final String BINARY_DV_FIELD = "binary"; |
| - protected static final String SORTED_DV_FIELD = "sorted"; |
| - protected static final String SORTED_SET_DV_FIELD = "sorted_set"; |
| - protected static final String TERM_VECTORS_FIELD = "term_vectors"; |
| - protected static final String DIMENSIONAL_FIELD = "numeric1d"; |
| - |
| - private static final FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); |
| - static { |
| - TERM_VECTORS_TYPE.setStoreTermVectors(true); |
| - TERM_VECTORS_TYPE.freeze(); |
| - } |
| - |
| - private static final FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); |
| - static { |
| - POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); |
| - POSITIONS_TYPE.freeze(); |
| - } |
| - |
| - protected static Directory dir; |
| - protected static LeafReader unsortedReader; |
| - protected static LeafReader sortedReader; |
| - protected static Integer[] sortedValues; |
| - |
| - private static Document doc(final int id, PositionsTokenStream positions) { |
| - final Document doc = new Document(); |
| - doc.add(new StringField(ID_FIELD, Integer.toString(id), Store.YES)); |
| - doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO)); |
| - positions.setId(id); |
| - doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE)); |
| - doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); |
| - TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO); |
| - norms.setBoost(Float.intBitsToFloat(id)); |
| - doc.add(norms); |
| - doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id)))); |
| - doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id)))); |
| - doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id)))); |
| - doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1)))); |
| - doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id)); |
| - doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1)); |
| - doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE)); |
| - byte[] bytes = new byte[4]; |
| - NumericUtils.intToSortableBytes(id, bytes, 0); |
| - // TODO: index time sorting doesn't yet support points |
| - //doc.add(new BinaryPoint(DIMENSIONAL_FIELD, bytes)); |
| - return doc; |
| - } |
| - |
| - /** Creates an unsorted index; subclasses then sort this index and open sortedReader. */ |
| - private static void createIndex(Directory dir, int numDocs, Random random) throws IOException { |
| - List<Integer> ids = new ArrayList<>(); |
| - for (int i = 0; i < numDocs; i++) { |
| - ids.add(Integer.valueOf(i * 10)); |
| - } |
| - // shuffle them for indexing |
| - Collections.shuffle(ids, random); |
| - if (VERBOSE) { |
| - System.out.println("Shuffled IDs for indexing: " + Arrays.toString(ids.toArray())); |
| - } |
| - |
| - PositionsTokenStream positions = new PositionsTokenStream(); |
| - IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random)); |
| - conf.setMaxBufferedDocs(4); // create some segments |
| - conf.setSimilarity(new NormsSimilarity(conf.getSimilarity())); // for testing norms field |
| - RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf); |
| - writer.setDoRandomForceMerge(false); |
| - for (int id : ids) { |
| - writer.addDocument(doc(id, positions)); |
| - } |
| - // delete some documents |
| - writer.commit(); |
| - for (Integer id : ids) { |
| - if (random.nextDouble() < 0.2) { |
| - if (VERBOSE) { |
| - System.out.println("delete doc_id " + id); |
| - } |
| - writer.deleteDocuments(new Term(ID_FIELD, id.toString())); |
| - } |
| - } |
| - writer.close(); |
| - } |
| - |
| - @BeforeClass |
| - public static void beforeClassSorterTestBase() throws Exception { |
| - dir = newDirectory(); |
| - int numDocs = atLeast(20); |
| - createIndex(dir, numDocs, random()); |
| - |
| - unsortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); |
| - } |
| - |
| - @AfterClass |
| - public static void afterClassSorterTestBase() throws Exception { |
| - unsortedReader.close(); |
| - sortedReader.close(); |
| - dir.close(); |
| - unsortedReader = sortedReader = null; |
| - dir = null; |
| - } |
| - |
| - public void testBinaryDocValuesField() throws Exception { |
| - BinaryDocValues dv = sortedReader.getBinaryDocValues(BINARY_DV_FIELD); |
| - for (int i = 0; i < sortedReader.maxDoc(); i++) { |
| - final BytesRef bytes = dv.get(i); |
| - assertEquals("incorrect binary DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString()); |
| - } |
| - } |
| - |
| - public void testDocsAndPositionsEnum() throws Exception { |
| - TermsEnum termsEnum = sortedReader.terms(DOC_POSITIONS_FIELD).iterator(); |
| - assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM))); |
| - PostingsEnum sortedPositions = termsEnum.postings(null, PostingsEnum.ALL); |
| - int doc; |
| - |
| - // test nextDoc() |
| - while ((doc = sortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { |
| - int freq = sortedPositions.freq(); |
| - assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq); |
| - for (int i = 0; i < freq; i++) { |
| - assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition()); |
| - assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); |
| - assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); |
| - assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString())); |
| - } |
| - } |
| - |
| - // test advance() |
| - final PostingsEnum reuse = sortedPositions; |
| - sortedPositions = termsEnum.postings(reuse, PostingsEnum.ALL); |
| - if (sortedPositions instanceof SortingDocsEnum) { |
| - assertTrue(((SortingDocsEnum) sortedPositions).reused(reuse)); // make sure reuse worked |
| - } |
| - doc = 0; |
| - while ((doc = sortedPositions.advance(doc + TestUtil.nextInt(random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) { |
| - int freq = sortedPositions.freq(); |
| - assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq); |
| - for (int i = 0; i < freq; i++) { |
| - assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition()); |
| - assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); |
| - assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); |
| - assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString())); |
| - } |
| - } |
| - } |
| - |
| - Bits randomLiveDocs(int maxDoc) { |
| - if (rarely()) { |
| - if (random().nextBoolean()) { |
| - return null; |
| - } else { |
| - return new Bits.MatchNoBits(maxDoc); |
| - } |
| - } |
| - final FixedBitSet bits = new FixedBitSet(maxDoc); |
| - final int bitsSet = TestUtil.nextInt(random(), 1, maxDoc - 1); |
| - for (int i = 0; i < bitsSet; ++i) { |
| - while (true) { |
| - final int index = random().nextInt(maxDoc); |
| - if (!bits.get(index)) { |
| - bits.set(index); |
| - break; |
| - } |
| - } |
| - } |
| - return bits; |
| - } |
| - |
| - public void testDocsEnum() throws Exception { |
| - TermsEnum termsEnum = sortedReader.terms(DOCS_ENUM_FIELD).iterator(); |
| - assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOCS_ENUM_TERM))); |
| - PostingsEnum docs = termsEnum.postings(null); |
| - |
| - int doc; |
| - while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { |
| - assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD))); |
| - } |
| - |
| - PostingsEnum reuse = docs; |
| - docs = termsEnum.postings(reuse); |
| - if (docs instanceof SortingDocsEnum) { |
| - assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked |
| - } |
| - doc = -1; |
| - while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) { |
| - assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD))); |
| - } |
| - } |
| - |
| - public void testNormValues() throws Exception { |
| - NumericDocValues dv = sortedReader.getNormValues(NORMS_FIELD); |
| - int maxDoc = sortedReader.maxDoc(); |
| - for (int i = 0; i < maxDoc; i++) { |
| - assertEquals("incorrect norm value for doc " + i, sortedValues[i].intValue(), dv.get(i)); |
| - } |
| - } |
| - |
| - public void testNumericDocValuesField() throws Exception { |
| - NumericDocValues dv = sortedReader.getNumericDocValues(NUMERIC_DV_FIELD); |
| - int maxDoc = sortedReader.maxDoc(); |
| - for (int i = 0; i < maxDoc; i++) { |
| - assertEquals("incorrect numeric DocValues for doc " + i, sortedValues[i].intValue(), dv.get(i)); |
| - } |
| - } |
| - |
| - public void testSortedDocValuesField() throws Exception { |
| - SortedDocValues dv = sortedReader.getSortedDocValues(SORTED_DV_FIELD); |
| - int maxDoc = sortedReader.maxDoc(); |
| - for (int i = 0; i < maxDoc; i++) { |
| - final BytesRef bytes = dv.get(i); |
| - assertEquals("incorrect sorted DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString()); |
| - } |
| - } |
| - |
| - public void testSortedSetDocValuesField() throws Exception { |
| - SortedSetDocValues dv = sortedReader.getSortedSetDocValues(SORTED_SET_DV_FIELD); |
| - int maxDoc = sortedReader.maxDoc(); |
| - for (int i = 0; i < maxDoc; i++) { |
| - dv.setDocument(i); |
| - BytesRef bytes = dv.lookupOrd(dv.nextOrd()); |
| - int value = sortedValues[i].intValue(); |
| - assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString()); |
| - bytes = dv.lookupOrd(dv.nextOrd()); |
| - assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString()); |
| - assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); |
| - } |
| - } |
| - |
| - public void testSortedNumericDocValuesField() throws Exception { |
| - SortedNumericDocValues dv = sortedReader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD); |
| - int maxDoc = sortedReader.maxDoc(); |
| - for (int i = 0; i < maxDoc; i++) { |
| - dv.setDocument(i); |
| - assertEquals(2, dv.count()); |
| - int value = sortedValues[i].intValue(); |
| - assertEquals("incorrect sorted-numeric DocValues for doc " + i, value, dv.valueAt(0)); |
| - assertEquals("incorrect sorted-numeric DocValues for doc " + i, value + 1, dv.valueAt(1)); |
| - } |
| - } |
| - |
| - public void testTermVectors() throws Exception { |
| - int maxDoc = sortedReader.maxDoc(); |
| - for (int i = 0; i < maxDoc; i++) { |
| - Terms terms = sortedReader.getTermVector(i, TERM_VECTORS_FIELD); |
| - assertNotNull("term vectors not found for doc " + i + " field [" + TERM_VECTORS_FIELD + "]", terms); |
| - assertEquals("incorrect term vector for doc " + i, sortedValues[i].toString(), terms.iterator().next().utf8ToString()); |
| - } |
| - } |
| - |
| - // TODO: index sorting doesn't yet support points |
| - /* |
| - public void testPoints() throws Exception { |
| - PointValues values = sortedReader.getPointValues(); |
| - values.intersect(DIMENSIONAL_FIELD, |
| - new IntersectVisitor() { |
| - @Override |
| - public void visit(int docID) { |
| - throw new IllegalStateException(); |
| - } |
| - |
| - @Override |
| - public void visit(int docID, byte[] packedValues) { |
| - assertEquals(sortedValues[docID].intValue(), NumericUtils.bytesToInt(packedValues, 0)); |
| - } |
| - |
| - @Override |
| - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| - return Relation.CELL_CROSSES_QUERY; |
| - } |
| - }); |
| - } |
| - */ |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java indexsort/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java 2016-03-02 04:32:40.451807337 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,73 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.util.Arrays; |
| - |
| -import org.apache.lucene.index.NumericDocValues; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.SortField; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.TestUtil; |
| -import org.junit.BeforeClass; |
| - |
| -public class SortingLeafReaderTest extends SorterTestBase { |
| - |
| - @BeforeClass |
| - public static void beforeClassSortingLeafReaderTest() throws Exception { |
| - // NOTE: index was created by by super's @BeforeClass |
| - |
| - // sort the index by id (as integer, in NUMERIC_DV_FIELD) |
| - Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT)); |
| - final Sorter.DocMap docMap = new Sorter(sort).sort(unsortedReader); |
| - |
| - // Sorter.compute also sorts the values |
| - NumericDocValues dv = unsortedReader.getNumericDocValues(NUMERIC_DV_FIELD); |
| - sortedValues = new Integer[unsortedReader.maxDoc()]; |
| - for (int i = 0; i < unsortedReader.maxDoc(); ++i) { |
| - sortedValues[docMap.oldToNew(i)] = (int)dv.get(i); |
| - } |
| - if (VERBOSE) { |
| - System.out.println("docMap: " + docMap); |
| - System.out.println("sortedValues: " + Arrays.toString(sortedValues)); |
| - } |
| - |
| - // sort the index by id (as integer, in NUMERIC_DV_FIELD) |
| - sortedReader = SortingLeafReader.wrap(unsortedReader, sort); |
| - |
| - if (VERBOSE) { |
| - System.out.print("mapped-deleted-docs: "); |
| - Bits mappedLiveDocs = sortedReader.getLiveDocs(); |
| - for (int i = 0; i < mappedLiveDocs.length(); i++) { |
| - if (!mappedLiveDocs.get(i)) { |
| - System.out.print(i + " "); |
| - } |
| - } |
| - System.out.println(); |
| - } |
| - |
| - TestUtil.checkReader(sortedReader); |
| - } |
| - |
| - public void testBadSort() throws Exception { |
| - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { |
| - SortingLeafReader.wrap(sortedReader, Sort.RELEVANCE); |
| - }); |
| - assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage()); |
| - } |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java indexsort/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,128 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.List; |
| - |
| -import org.apache.lucene.analysis.MockAnalyzer; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field.Store; |
| -import org.apache.lucene.document.NumericDocValuesField; |
| -import org.apache.lucene.document.StringField; |
| -import org.apache.lucene.search.BlockJoinComparatorSource; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.Query; |
| -import org.apache.lucene.search.Scorer; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.SortField; |
| -import org.apache.lucene.search.TermQuery; |
| -import org.apache.lucene.search.Weight; |
| -import org.apache.lucene.util.ArrayUtil; |
| -import org.apache.lucene.util.BitSet; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -public class TestBlockJoinSorter extends LuceneTestCase { |
| - |
| - public void test() throws IOException { |
| - final int numParents = atLeast(200); |
| - IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); |
| - cfg.setMergePolicy(newLogMergePolicy()); |
| - final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), cfg); |
| - final Document parentDoc = new Document(); |
| - final NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); |
| - parentDoc.add(parentVal); |
| - final StringField parent = new StringField("parent", "true", Store.YES); |
| - parentDoc.add(parent); |
| - for (int i = 0; i < numParents; ++i) { |
| - List<Document> documents = new ArrayList<>(); |
| - final int numChildren = random().nextInt(10); |
| - for (int j = 0; j < numChildren; ++j) { |
| - final Document childDoc = new Document(); |
| - childDoc.add(new NumericDocValuesField("child_val", random().nextInt(5))); |
| - documents.add(childDoc); |
| - } |
| - parentVal.setLongValue(random().nextInt(50)); |
| - documents.add(parentDoc); |
| - writer.addDocuments(documents); |
| - } |
| - writer.forceMerge(1); |
| - IndexReader indexReader = writer.getReader(); |
| - writer.close(); |
| - |
| - IndexSearcher searcher = newSearcher(indexReader); |
| - indexReader = searcher.getIndexReader(); // newSearcher may have wrapped it |
| - assertEquals(1, indexReader.leaves().size()); |
| - final LeafReader reader = indexReader.leaves().get(0).reader(); |
| - final Query parentsFilter = new TermQuery(new Term("parent", "true")); |
| - |
| - final Weight weight = searcher.createNormalizedWeight(parentsFilter, false); |
| - final Scorer parents = weight.scorer(indexReader.leaves().get(0)); |
| - final BitSet parentBits = BitSet.of(parents.iterator(), reader.maxDoc()); |
| - final NumericDocValues parentValues = reader.getNumericDocValues("parent_val"); |
| - final NumericDocValues childValues = reader.getNumericDocValues("child_val"); |
| - |
| - final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG)); |
| - final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG)); |
| - |
| - final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort))); |
| - final Sorter sorter = new Sorter(sort); |
| - final Sorter.DocMap docMap = sorter.sort(reader); |
| - assertEquals(reader.maxDoc(), docMap.size()); |
| - |
| - int[] children = new int[1]; |
| - int numChildren = 0; |
| - int previousParent = -1; |
| - for (int i = 0; i < docMap.size(); ++i) { |
| - final int oldID = docMap.newToOld(i); |
| - if (parentBits.get(oldID)) { |
| - // check that we have the right children |
| - for (int j = 0; j < numChildren; ++j) { |
| - assertEquals(oldID, parentBits.nextSetBit(children[j])); |
| - } |
| - // check that children are sorted |
| - for (int j = 1; j < numChildren; ++j) { |
| - final int doc1 = children[j-1]; |
| - final int doc2 = children[j]; |
| - if (childValues.get(doc1) == childValues.get(doc2)) { |
| - assertTrue(doc1 < doc2); // sort is stable |
| - } else { |
| - assertTrue(childValues.get(doc1) < childValues.get(doc2)); |
| - } |
| - } |
| - // check that parents are sorted |
| - if (previousParent != -1) { |
| - if (parentValues.get(previousParent) == parentValues.get(oldID)) { |
| - assertTrue(previousParent < oldID); |
| - } else { |
| - assertTrue(parentValues.get(previousParent) < parentValues.get(oldID)); |
| - } |
| - } |
| - // reset |
| - previousParent = oldID; |
| - numChildren = 0; |
| - } else { |
| - children = ArrayUtil.grow(children, numChildren+1); |
| - children[numChildren++] = oldID; |
| - } |
| - } |
| - indexReader.close(); |
| - writer.w.getDirectory().close(); |
| - } |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java indexsort/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java 2016-03-02 04:32:40.451807337 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,201 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.index; |
| - |
| -import java.io.IOException; |
| -import java.lang.reflect.Method; |
| -import java.lang.reflect.Modifier; |
| -import java.util.ArrayList; |
| -import java.util.HashSet; |
| -import java.util.List; |
| -import java.util.Random; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.analysis.MockAnalyzer; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field.Store; |
| -import org.apache.lucene.document.NumericDocValuesField; |
| -import org.apache.lucene.document.StringField; |
| -import org.apache.lucene.index.LeafReader; |
| -import org.apache.lucene.index.DirectoryReader; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.IndexWriterConfig; |
| -import org.apache.lucene.index.LogMergePolicy; |
| -import org.apache.lucene.index.MergePolicy; |
| -import org.apache.lucene.index.NumericDocValues; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.TieredMergePolicy; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.SortField; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.util.TestUtil; |
| - |
| -import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| - |
| -public class TestSortingMergePolicy extends BaseMergePolicyTestCase { |
| - |
| - private List<String> terms; |
| - private Directory dir1, dir2; |
| - private Sort sort; |
| - private boolean reversedSort; |
| - private IndexReader reader; |
| - private IndexReader sortedReader; |
| - |
| - @Override |
| - public void setUp() throws Exception { |
| - super.setUp(); |
| - final Boolean reverse = (random().nextBoolean() ? null : new Boolean(random().nextBoolean())); |
| - final SortField sort_field = (reverse == null |
| - ? new SortField("ndv", SortField.Type.LONG) |
| - : new SortField("ndv", SortField.Type.LONG, reverse.booleanValue())); |
| - sort = new Sort(sort_field); |
| - reversedSort = (null != reverse && reverse.booleanValue()); |
| - createRandomIndexes(); |
| - } |
| - |
| - private Document randomDocument() { |
| - final Document doc = new Document(); |
| - doc.add(new NumericDocValuesField("ndv", random().nextLong())); |
| - doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES)); |
| - return doc; |
| - } |
| - |
| - public MergePolicy mergePolicy() { |
| - return newSortingMergePolicy(sort); |
| - } |
| - |
| - public static SortingMergePolicy newSortingMergePolicy(Sort sort) { |
| - // usually create a MP with a low merge factor so that many merges happen |
| - MergePolicy mp; |
| - int thingToDo = random().nextInt(3); |
| - if (thingToDo == 0) { |
| - TieredMergePolicy tmp = newTieredMergePolicy(random()); |
| - final int numSegs = TestUtil.nextInt(random(), 3, 5); |
| - tmp.setSegmentsPerTier(numSegs); |
| - tmp.setMaxMergeAtOnce(TestUtil.nextInt(random(), 2, numSegs)); |
| - mp = tmp; |
| - } else if (thingToDo == 1) { |
| - LogMergePolicy lmp = newLogMergePolicy(random()); |
| - lmp.setMergeFactor(TestUtil.nextInt(random(), 3, 5)); |
| - mp = lmp; |
| - } else { |
| - // just a regular random one from LTC (could be alcoholic etc) |
| - mp = newMergePolicy(); |
| - } |
| - // wrap it with a sorting mp |
| - if (VERBOSE) { |
| - System.out.println("TEST: return SortingMergePolicy(mp=" + mp + " sort=" + sort + ")"); |
| - } |
| - return new SortingMergePolicy(mp, sort); |
| - } |
| - |
| - private void createRandomIndexes() throws IOException { |
| - dir1 = newDirectory(); |
| - dir2 = newDirectory(); |
| - final int numDocs = atLeast(150); |
| - final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5); |
| - Set<String> randomTerms = new HashSet<>(); |
| - while (randomTerms.size() < numTerms) { |
| - randomTerms.add(TestUtil.randomSimpleString(random())); |
| - } |
| - terms = new ArrayList<>(randomTerms); |
| - final long seed = random().nextLong(); |
| - final IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); |
| - final IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); |
| - iwc2.setMergePolicy(mergePolicy()); |
| - final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1); |
| - final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2); |
| - for (int i = 0; i < numDocs; ++i) { |
| - if (random().nextInt(5) == 0 && i != numDocs - 1) { |
| - final String term = RandomPicks.randomFrom(random(), terms); |
| - iw1.deleteDocuments(new Term("s", term)); |
| - iw2.deleteDocuments(new Term("s", term)); |
| - } |
| - final Document doc = randomDocument(); |
| - iw1.addDocument(doc); |
| - iw2.addDocument(doc); |
| - if (random().nextInt(8) == 0) { |
| - iw1.commit(); |
| - iw2.commit(); |
| - } |
| - } |
| - // Make sure we have something to merge |
| - iw1.commit(); |
| - iw2.commit(); |
| - final Document doc = randomDocument(); |
| - // NOTE: don't use RIW.addDocument directly, since it sometimes commits |
| - // which may trigger a merge, at which case forceMerge may not do anything. |
| - // With field updates this is a problem, since the updates can go into the |
| - // single segment in the index, and threefore the index won't be sorted. |
| - // This hurts the assumption of the test later on, that the index is sorted |
| - // by SortingMP. |
| - iw1.w.addDocument(doc); |
| - iw2.w.addDocument(doc); |
| - |
| - // update NDV of docs belonging to one term (covers many documents) |
| - final long value = random().nextLong(); |
| - final String term = RandomPicks.randomFrom(random(), terms); |
| - iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value); |
| - iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value); |
| - |
| - iw1.forceMerge(1); |
| - iw2.forceMerge(1); |
| - iw1.close(); |
| - iw2.close(); |
| - reader = DirectoryReader.open(dir1); |
| - sortedReader = DirectoryReader.open(dir2); |
| - } |
| - |
| - @Override |
| - public void tearDown() throws Exception { |
| - reader.close(); |
| - sortedReader.close(); |
| - dir1.close(); |
| - dir2.close(); |
| - super.tearDown(); |
| - } |
| - |
| - private static void assertSorted(LeafReader reader, boolean reverse) throws IOException { |
| - final NumericDocValues ndv = reader.getNumericDocValues("ndv"); |
| - for (int i = 1; i < reader.maxDoc(); ++i) { |
| - final int lhs = (!reverse ? i-1 : i); |
| - final int rhs = (!reverse ? i : i-1); |
| - assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i)+",reverse="+reverse, ndv.get(lhs) <= ndv.get(rhs)); |
| - } |
| - } |
| - |
| - public void testSortingMP() throws IOException { |
| - final LeafReader sortedReader1 = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort); |
| - final LeafReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader); |
| - |
| - assertSorted(sortedReader1, reversedSort); |
| - assertSorted(sortedReader2, reversedSort); |
| - |
| - assertReaderEquals("", sortedReader1, sortedReader2); |
| - } |
| - |
| - public void testBadSort() throws Exception { |
| - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { |
| - new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE); |
| - }); |
| - assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage()); |
| - } |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java indexsort/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java 2016-05-10 05:44:23.752471119 -0400 |
| @@ -32,9 +32,9 @@ |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| +import org.apache.lucene.index.MultiDocValues; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.BooleanClause.Occur; |
| @@ -367,8 +367,7 @@ |
| reader = writer.getReader(); |
| writer.close(); |
| searcher = newSearcher(reader); |
| - LeafReader ar = SlowCompositeReaderWrapper.wrap(reader); |
| - artistDocValues = ar.getSortedDocValues("artist"); |
| + artistDocValues = MultiDocValues.getSortedValues(reader, "artist"); |
| |
| // All searches sort by song popularity |
| final Similarity base = searcher.getSimilarity(true); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java indexsort/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java |
| --- trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 2016-02-16 11:18:34.753021816 -0500 |
| +++ indexsort/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500 |
| @@ -1,305 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -package org.apache.lucene.search; |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.HashMap; |
| -import java.util.HashSet; |
| -import java.util.List; |
| -import java.util.Random; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.analysis.MockAnalyzer; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field.Store; |
| -import org.apache.lucene.document.NumericDocValuesField; |
| -import org.apache.lucene.document.StringField; |
| -import org.apache.lucene.index.DirectoryReader; |
| -import org.apache.lucene.index.ExitableDirectoryReader; |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.IndexWriterConfig; |
| -import org.apache.lucene.index.QueryTimeout; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.SerialMergeScheduler; |
| -import org.apache.lucene.index.SortingMergePolicy; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.TestSortingMergePolicy; |
| -import org.apache.lucene.search.LeafCollector; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.MatchAllDocsQuery; |
| -import org.apache.lucene.search.Query; |
| -import org.apache.lucene.search.ScoreDoc; |
| -import org.apache.lucene.search.Sort; |
| -import org.apache.lucene.search.SortField; |
| -import org.apache.lucene.search.TermQuery; |
| -import org.apache.lucene.search.TopFieldCollector; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.uninverting.UninvertingReader; |
| -import org.apache.lucene.uninverting.UninvertingReader.Type; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.util.TestUtil; |
| - |
| -import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| - |
| -public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { |
| - |
| - private int numDocs; |
| - private List<String> terms; |
| - private Directory dir; |
| - private Sort sort; |
| - private RandomIndexWriter iw; |
| - private IndexReader reader; |
| - private SortingMergePolicy mergePolicy; |
| - private final int forceMergeMaxSegmentCount = 5; |
| - |
| - @Override |
| - public void setUp() throws Exception { |
| - super.setUp(); |
| - sort = new Sort(new SortField("ndv1", SortField.Type.LONG)); |
| - } |
| - |
| - private Document randomDocument() { |
| - final Document doc = new Document(); |
| - doc.add(new NumericDocValuesField("ndv1", random().nextInt(10))); |
| - doc.add(new NumericDocValuesField("ndv2", random().nextInt(10))); |
| - doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES)); |
| - return doc; |
| - } |
| - |
| - private void createRandomIndex(boolean singleSortedSegment) throws IOException { |
| - dir = newDirectory(); |
| - numDocs = atLeast(150); |
| - final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5); |
| - Set<String> randomTerms = new HashSet<>(); |
| - while (randomTerms.size() < numTerms) { |
| - randomTerms.add(TestUtil.randomSimpleString(random())); |
| - } |
| - terms = new ArrayList<>(randomTerms); |
| - final long seed = random().nextLong(); |
| - final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); |
| - iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests |
| - mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort); |
| - iwc.setMergePolicy(mergePolicy); |
| - iw = new RandomIndexWriter(new Random(seed), dir, iwc); |
| - iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP |
| - for (int i = 0; i < numDocs; ++i) { |
| - final Document doc = randomDocument(); |
| - iw.addDocument(doc); |
| - if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) { |
| - iw.commit(); |
| - } |
| - if (random().nextInt(15) == 0) { |
| - final String term = RandomPicks.randomFrom(random(), terms); |
| - iw.deleteDocuments(new Term("s", term)); |
| - } |
| - } |
| - if (singleSortedSegment) { |
| - // because of deletions, there might still be a single flush segment in |
| - // the index, although want want a sorted segment so it needs to be merged |
| - iw.getReader().close(); // refresh |
| - iw.addDocument(new Document()); |
| - iw.commit(); |
| - iw.addDocument(new Document()); |
| - iw.forceMerge(1); |
| - } |
| - else if (random().nextBoolean()) { |
| - iw.forceMerge(forceMergeMaxSegmentCount); |
| - } |
| - reader = iw.getReader(); |
| - } |
| - |
| - private void closeIndex() throws IOException { |
| - reader.close(); |
| - iw.close(); |
| - dir.close(); |
| - } |
| - |
| - public void testEarlyTermination() throws IOException { |
| - final int iters = atLeast(8); |
| - for (int i = 0; i < iters; ++i) { |
| - createRandomIndex(false); |
| - for (int j = 0; j < iters; ++j) { |
| - final IndexSearcher searcher = newSearcher(reader); |
| - final int numHits = TestUtil.nextInt(random(), 1, numDocs); |
| - final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false)); |
| - final boolean fillFields = random().nextBoolean(); |
| - final boolean trackDocScores = random().nextBoolean(); |
| - final boolean trackMaxScore = random().nextBoolean(); |
| - final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); |
| - final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); |
| - |
| - final Query query; |
| - if (random().nextBoolean()) { |
| - query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); |
| - } else { |
| - query = new MatchAllDocsQuery(); |
| - } |
| - searcher.search(query, collector1); |
| - searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort())); |
| - assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); |
| - assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); |
| - } |
| - closeIndex(); |
| - } |
| - } |
| - |
| - public void testCanEarlyTerminate() { |
| - assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG)), |
| - new Sort(new SortField("a", SortField.Type.LONG)))); |
| - |
| - assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); |
| - |
| - assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG)), |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); |
| - |
| - assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG, true)), |
| - new Sort(new SortField("a", SortField.Type.LONG, false)))); |
| - |
| - assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| - new Sort(new SortField("a", SortField.Type.LONG)))); |
| - |
| - assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING)))); |
| - |
| - assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate( |
| - new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), |
| - new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); |
| - } |
| - |
| - public void testEarlyTerminationDifferentSorter() throws IOException { |
| - createRandomIndex(false); |
| - final int iters = atLeast(3); |
| - for (int i = 0; i < iters; ++i) { |
| - final IndexSearcher searcher = newSearcher(reader); |
| - // test that the collector works correctly when the index was sorted by a |
| - // different sorter than the one specified in the ctor. |
| - final int numHits = TestUtil.nextInt(random(), 1, numDocs); |
| - final Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false)); |
| - final boolean fillFields = random().nextBoolean(); |
| - final boolean trackDocScores = random().nextBoolean(); |
| - final boolean trackMaxScore = random().nextBoolean(); |
| - final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); |
| - final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); |
| - |
| - final Query query; |
| - if (random().nextBoolean()) { |
| - query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); |
| - } else { |
| - query = new MatchAllDocsQuery(); |
| - } |
| - searcher.search(query, collector1); |
| - Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG)); |
| - |
| - searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) { |
| - @Override |
| - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { |
| - final LeafCollector ret = super.getLeafCollector(context); |
| - assertTrue("segment should not be recognized as sorted as different sorter was used", ret.getClass() == in.getLeafCollector(context).getClass()); |
| - return ret; |
| - } |
| - }); |
| - assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); |
| - assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); |
| - } |
| - closeIndex(); |
| - } |
| - |
| - private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) { |
| - assertEquals(scoreDocs1.length, scoreDocs2.length); |
| - for (int i = 0; i < scoreDocs1.length; ++i) { |
| - final ScoreDoc scoreDoc1 = scoreDocs1[i]; |
| - final ScoreDoc scoreDoc2 = scoreDocs2[i]; |
| - assertEquals(scoreDoc1.doc, scoreDoc2.doc); |
| - assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f); |
| - } |
| - } |
| - |
| - private class TestTerminatedEarlySimpleCollector extends SimpleCollector { |
| - private boolean collectedSomething; |
| - public boolean collectedSomething() { |
| - return collectedSomething; |
| - } |
| - @Override |
| - public void collect(int doc) throws IOException { |
| - collectedSomething = true; |
| - } |
| - @Override |
| - public boolean needsScores() { |
| - return false; |
| - } |
| - } |
| - |
| - private class TestEarlyTerminatingSortingcollectorQueryTimeout implements QueryTimeout { |
| - final private boolean shouldExit; |
| - public TestEarlyTerminatingSortingcollectorQueryTimeout(boolean shouldExit) { |
| - this.shouldExit = shouldExit; |
| - } |
| - public boolean shouldExit() { |
| - return shouldExit; |
| - } |
| - } |
| - |
| - private IndexSearcher newSearcherForTestTerminatedEarly(IndexReader r) throws IOException { |
| - switch(random().nextInt(2)) { |
| - case 0: |
| - return new IndexSearcher(r); |
| - case 1: |
| - assertTrue(r+" is not a DirectoryReader", (r instanceof DirectoryReader)); |
| - final DirectoryReader directoryReader = ExitableDirectoryReader.wrap( |
| - UninvertingReader.wrap((DirectoryReader) r, new HashMap<String,Type>()), |
| - new TestEarlyTerminatingSortingcollectorQueryTimeout(false)); |
| - return new IndexSearcher(directoryReader); |
| - } |
| - fail("newSearcherForTestTerminatedEarly("+r+") fell through switch"); |
| - return null; |
| - } |
| - |
| - public void testTerminatedEarly() throws IOException { |
| - final int iters = atLeast(8); |
| - for (int i = 0; i < iters; ++i) { |
| - createRandomIndex(true); |
| - |
| - final IndexSearcher searcher = newSearcherForTestTerminatedEarly(reader); // future TODO: use newSearcher(reader); |
| - final Query query = new MatchAllDocsQuery(); // search for everything/anything |
| - |
| - final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector(); |
| - searcher.search(query, collector1); |
| - |
| - final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector(); |
| - final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1, mergePolicy.getSort()); |
| - searcher.search(query, etsCollector); |
| - |
| - assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething()); |
| - |
| - if (collector1.collectedSomething()) { |
| - // we collected something and since we modestly asked for just one document we should have terminated early |
| - assertTrue("should have terminated early (searcher.reader="+searcher.reader+")", etsCollector.terminatedEarly()); |
| - } |
| - closeIndex(); |
| - } |
| - } |
| - |
| -} |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java indexsort/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java |
| --- trunk/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java 2016-05-03 07:31:51.560971608 -0400 |
| +++ indexsort/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -247,7 +247,7 @@ |
| |
| private IndexWriterConfig getIndexWriterConfig() { |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| - iwc.setCodec(Codec.forName("Lucene60")); |
| + iwc.setCodec(Codec.forName("Lucene62")); |
| return iwc; |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java indexsort/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java |
| --- trunk/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java 2016-05-03 07:31:51.564971608 -0400 |
| +++ indexsort/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -85,14 +85,14 @@ |
| public class TestGeo3DPoint extends LuceneTestCase { |
| |
| private static Codec getCodec() { |
| - if (Codec.getDefault().getName().equals("Lucene60")) { |
| + if (Codec.getDefault().getName().equals("Lucene62")) { |
| int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); |
| double maxMBSortInHeap = 3.0 + (3*random().nextDouble()); |
| if (VERBOSE) { |
| System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); |
| } |
| |
| - return new FilterCodec("Lucene60", Codec.getDefault()) { |
| + return new FilterCodec("Lucene62", Codec.getDefault()) { |
| @Override |
| public PointsFormat pointsFormat() { |
| return new PointsFormat() { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java indexsort/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java |
| --- trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java 2016-02-16 11:18:34.833021818 -0500 |
| +++ indexsort/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -56,7 +56,6 @@ |
| import org.apache.lucene.index.ReaderUtil; |
| import org.apache.lucene.index.SegmentReader; |
| import org.apache.lucene.index.SortedSetDocValues; |
| -import org.apache.lucene.index.SortingMergePolicy; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.search.BooleanClause; |
| @@ -232,7 +231,7 @@ |
| // This way all merged segments will be sorted at |
| // merge time, allow for per-segment early termination |
| // when those segments are searched: |
| - iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT)); |
| + iwc.setIndexSort(SORT); |
| |
| return iwc; |
| } |
| @@ -586,10 +585,9 @@ |
| |
| // We sorted postings by weight during indexing, so we |
| // only retrieve the first num hits now: |
| - final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy(); |
| - Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort()); |
| - IndexSearcher searcher = searcherMgr.acquire(); |
| + Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); |
| List<LookupResult> results = null; |
| + IndexSearcher searcher = searcherMgr.acquire(); |
| try { |
| //System.out.println("got searcher=" + searcher); |
| searcher.search(finalQuery, c2); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java |
| --- trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java 2016-03-08 17:22:26.848938631 -0500 |
| +++ indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -66,7 +66,7 @@ |
| assertEquals("a penny saved is a penny earned", results.get(0).key); |
| assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey); |
| assertEquals(10, results.get(0).value); |
| - assertEquals(new BytesRef("foobaz"), results.get(0).payload); |
| + assertEquals("foobaz", results.get(0).payload.utf8ToString()); |
| |
| assertEquals("lend me your ear", results.get(1).key); |
| assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java |
| --- trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java 2016-03-08 17:22:26.848938631 -0500 |
| +++ indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -32,7 +32,7 @@ |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.lucene60.Lucene60Codec; |
| +import org.apache.lucene.codecs.lucene62.Lucene62Codec; |
| import org.apache.lucene.document.IntPoint; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| @@ -646,7 +646,7 @@ |
| static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) { |
| IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); |
| iwc.setMergePolicy(newLogMergePolicy()); |
| - Codec filterCodec = new Lucene60Codec() { |
| + Codec filterCodec = new Lucene62Codec() { |
| PostingsFormat postingsFormat = new Completion50PostingsFormat(); |
| |
| @Override |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java 2016-05-03 07:31:51.564971608 -0400 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -1242,7 +1242,7 @@ |
| // Else seeds may not reproduce: |
| iwc.setMergeScheduler(new SerialMergeScheduler()); |
| int pointsInLeaf = 2 + random().nextInt(4); |
| - iwc.setCodec(new FilterCodec("Lucene60", TestUtil.getDefaultCodec()) { |
| + iwc.setCodec(new FilterCodec("Lucene62", TestUtil.getDefaultCodec()) { |
| @Override |
| public PointsFormat pointsFormat() { |
| return new PointsFormat() { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java 2016-03-02 04:32:40.483807337 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -627,7 +627,7 @@ |
| |
| /** Returns a new fake segment */ |
| protected static SegmentInfo newSegmentInfo(Directory dir, String name) { |
| - return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| } |
| |
| /** Creates a file of the specified size with random data. */ |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java 2016-03-02 04:32:40.483807337 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -347,7 +347,7 @@ |
| |
| /** Returns a new fake segment */ |
| protected static SegmentInfo newSegmentInfo(Directory dir, String name) { |
| - return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| } |
| |
| @Override |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java 2016-03-13 05:38:07.395183845 -0400 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -303,7 +303,7 @@ |
| Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors")); |
| Codec codec = getCodec(); |
| |
| - SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field"); |
| FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), |
| proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java 2016-03-02 04:32:40.483807337 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -26,7 +26,8 @@ |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.StoredField; |
| -import org.apache.lucene.document.TextField; |
| +import org.apache.lucene.search.Sort; |
| +import org.apache.lucene.search.SortField; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.MockDirectoryWrapper; |
| @@ -52,7 +53,7 @@ |
| Codec codec = getCodec(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); |
| @@ -66,7 +67,7 @@ |
| Codec codec = getCodec(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| Set<String> originalFiles = Collections.singleton("_123.a"); |
| info.setFiles(originalFiles); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| @@ -95,7 +96,7 @@ |
| diagnostics.put("key1", "value1"); |
| diagnostics.put("key2", "value2"); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - diagnostics, id, new HashMap<>()); |
| + diagnostics, id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); |
| @@ -118,7 +119,7 @@ |
| attributes.put("key1", "value1"); |
| attributes.put("key2", "value2"); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.emptyMap(), id, attributes); |
| + Collections.emptyMap(), id, attributes, null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); |
| @@ -138,7 +139,7 @@ |
| Directory dir = newDirectory(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); |
| @@ -153,7 +154,7 @@ |
| Directory dir = newDirectory(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, v, "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); |
| @@ -161,7 +162,57 @@ |
| dir.close(); |
| } |
| } |
| - |
| + |
| + protected boolean supportsIndexSort() { |
| + return true; |
| + } |
| + |
| + /** Test sort */ |
| + public void testSort() throws IOException { |
| + assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort()); |
| + |
| + final int iters = atLeast(5); |
| + for (int i = 0; i < iters; ++i) { |
| + Sort sort; |
| + if (i == 0) { |
| + sort = null; |
| + } else { |
| + final int numSortFields = TestUtil.nextInt(random(), 1, 3); |
| + SortField[] sortFields = new SortField[numSortFields]; |
| + for (int j = 0; j < numSortFields; ++j) { |
| + sortFields[j] = new SortField( |
| + TestUtil.randomSimpleString(random()), |
| + random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING, |
| + random().nextBoolean()); |
| + if (random().nextBoolean()) { |
| + switch (sortFields[j].getType()) { |
| + case LONG: |
| + sortFields[j].setMissingValue(random().nextLong()); |
| + break; |
| + case STRING: |
| + sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST); |
| + break; |
| + default: |
| + fail(); |
| + } |
| + } |
| + } |
| + sort = new Sort(sortFields); |
| + } |
| + |
| + Directory dir = newDirectory(); |
| + Codec codec = getCodec(); |
| + byte id[] = StringHelper.randomId(); |
| + SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), sort); |
| + info.setFiles(Collections.<String>emptySet()); |
| + codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| + SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); |
| + assertEquals(sort, info2.getIndexSort()); |
| + dir.close(); |
| + } |
| + } |
| + |
| /** |
| * Test segment infos write that hits exception immediately on open. |
| * make sure we get our exception back, no file handle leaks, etc. |
| @@ -183,7 +234,7 @@ |
| Codec codec = getCodec(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| |
| fail.setDoFail(); |
| @@ -216,7 +267,7 @@ |
| Codec codec = getCodec(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| |
| fail.setDoFail(); |
| @@ -249,7 +300,7 @@ |
| Codec codec = getCodec(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| |
| @@ -283,7 +334,7 @@ |
| Codec codec = getCodec(); |
| byte id[] = StringHelper.randomId(); |
| SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, |
| - Collections.<String,String>emptyMap(), id, new HashMap<>()); |
| + Collections.<String,String>emptyMap(), id, new HashMap<>(), null); |
| info.setFiles(Collections.<String>emptySet()); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| |
| @@ -332,7 +383,7 @@ |
| TestUtil.randomUnicodeString(random())); |
| } |
| |
| - SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes); |
| + SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null); |
| info.setFiles(files); |
| codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); |
| SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT); |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java 2016-03-08 17:22:26.848938631 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -138,7 +138,6 @@ |
| |
| static class MockRandomOneMerge extends OneMerge { |
| final Random r; |
| - ArrayList<CodecReader> readers; |
| |
| MockRandomOneMerge(List<SegmentCommitInfo> segments, long seed) { |
| super(segments); |
| @@ -146,34 +145,31 @@ |
| } |
| |
| @Override |
| - public List<CodecReader> getMergeReaders() throws IOException { |
| - if (readers == null) { |
| - readers = new ArrayList<CodecReader>(super.getMergeReaders()); |
| - for (int i = 0; i < readers.size(); i++) { |
| - // wrap it (e.g. prevent bulk merge etc) |
| - // TODO: cut this over to FilterCodecReader api, we can explicitly |
| - // enable/disable bulk merge for portions of the index we want. |
| - int thingToDo = r.nextInt(7); |
| - if (thingToDo == 0) { |
| - // simple no-op FilterReader |
| - if (LuceneTestCase.VERBOSE) { |
| - System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + readers.get(i)); |
| - } |
| - readers.set(i, SlowCodecReaderWrapper.wrap(new FilterLeafReader(readers.get(i)) {})); |
| - } else if (thingToDo == 1) { |
| - // renumber fields |
| - // NOTE: currently this only "blocks" bulk merges just by |
| - // being a FilterReader. But it might find bugs elsewhere, |
| - // and maybe the situation can be improved in the future. |
| - if (LuceneTestCase.VERBOSE) { |
| - System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + readers.get(i)); |
| - } |
| - readers.set(i, SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(readers.get(i), r))); |
| - } |
| - // otherwise, reader is unchanged |
| + public CodecReader wrapForMerge(CodecReader reader) throws IOException { |
| + |
| + // wrap it (e.g. prevent bulk merge etc) |
| + // TODO: cut this over to FilterCodecReader api, we can explicitly |
| + // enable/disable bulk merge for portions of the index we want. |
| + int thingToDo = r.nextInt(7); |
| + if (thingToDo == 0) { |
| + // simple no-op FilterReader |
| + if (LuceneTestCase.VERBOSE) { |
| + System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + reader); |
| + } |
| + return SlowCodecReaderWrapper.wrap(new FilterLeafReader(reader) {}); |
| + } else if (thingToDo == 1) { |
| + // renumber fields |
| + // NOTE: currently this only "blocks" bulk merges just by |
| + // being a FilterReader. But it might find bugs elsewhere, |
| + // and maybe the situation can be improved in the future. |
| + if (LuceneTestCase.VERBOSE) { |
| + System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader); |
| } |
| + return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(reader, r)); |
| + } else { |
| + // otherwise, reader is unchanged |
| + return reader; |
| } |
| - return readers; |
| } |
| } |
| } |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java 2016-03-02 04:32:40.483807337 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -611,7 +611,7 @@ |
| // maxAllowed = the "highest" we can index, but we will still |
| // randomly index at lower IndexOption |
| public FieldsProducer buildIndex(Codec codec, Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException { |
| - SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); |
| + SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| |
| int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed); |
| if (LuceneTestCase.VERBOSE) { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java indexsort/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java 2016-03-13 05:38:07.399183845 -0400 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -283,6 +283,11 @@ |
| |
| @Override |
| protected void doClose() throws IOException {} |
| + |
| + @Override |
| + public Sort getIndexSort() { |
| + return null; |
| + } |
| }; |
| } |
| |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java 2016-02-16 11:18:34.853021818 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -32,7 +32,7 @@ |
| import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec; |
| import org.apache.lucene.codecs.compressing.CompressingCodec; |
| import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; |
| -import org.apache.lucene.codecs.lucene60.Lucene60Codec; |
| +import org.apache.lucene.codecs.lucene62.Lucene62Codec; |
| import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; |
| import org.apache.lucene.codecs.simpletext.SimpleTextCodec; |
| import org.apache.lucene.index.RandomCodec; |
| @@ -181,8 +181,8 @@ |
| codec = new AssertingCodec(); |
| } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) { |
| codec = CompressingCodec.randomInstance(random); |
| - } else if ("Lucene60".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene60"))) { |
| - codec = new Lucene60Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values())); |
| + } else if ("Lucene62".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene62"))) { |
| + codec = new Lucene62Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values())); |
| } else if (!"random".equals(TEST_CODEC)) { |
| codec = Codec.forName(TEST_CODEC); |
| } else if ("random".equals(TEST_POSTINGSFORMAT)) { |
| diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java |
| --- trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java 2016-03-10 16:23:24.703676109 -0500 |
| +++ indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java 2016-05-10 05:44:23.756471119 -0400 |
| @@ -54,7 +54,7 @@ |
| import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat; |
| import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; |
| import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat; |
| -import org.apache.lucene.codecs.lucene60.Lucene60Codec; |
| +import org.apache.lucene.codecs.lucene62.Lucene62Codec; |
| import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; |
| import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; |
| import org.apache.lucene.document.BinaryDocValuesField; |
| @@ -911,7 +911,7 @@ |
| * This may be different than {@link Codec#getDefault()} because that is randomized. |
| */ |
| public static Codec getDefaultCodec() { |
| - return new Lucene60Codec(); |
| + return new Lucene62Codec(); |
| } |
| |
| /** |