blob: 2c51c1277f15b423f246adb0ca25d730a7156351 [file] [log] [blame]
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
--- trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java 2016-03-08 17:22:26.824938630 -0500
+++ indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java 2016-05-10 05:44:23.740471118 -0400
@@ -108,7 +108,7 @@
}
@Override
- public final SegmentInfoFormat segmentInfoFormat() {
+ public SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
--- trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 2016-05-10 05:44:23.740471118 -0400
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene50;
+
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter; // javadocs
+import org.apache.lucene.index.SegmentInfo; // javadocs
+import org.apache.lucene.index.SegmentInfos; // javadocs
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput; // javadocs
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Version;
+
+/**
+ * Lucene 5.0 Segment info format.
+ * @deprecated Only for reading old 5.0-6.0 segments
+ */
+@Deprecated
+public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
+
+ /** Sole constructor. */
+ public Lucene50SegmentInfoFormat() {
+ }
+
+ @Override
+ public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
+ try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
+ Throwable priorE = null;
+ SegmentInfo si = null;
+ try {
+ int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
+ Lucene50SegmentInfoFormat.VERSION_START,
+ Lucene50SegmentInfoFormat.VERSION_CURRENT,
+ segmentID, "");
+ final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+
+ final int docCount = input.readInt();
+ if (docCount < 0) {
+ throw new CorruptIndexException("invalid docCount: " + docCount, input);
+ }
+ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+ final Map<String,String> diagnostics;
+ final Set<String> files;
+ final Map<String,String> attributes;
+
+ if (format >= VERSION_SAFE_MAPS) {
+ diagnostics = input.readMapOfStrings();
+ files = input.readSetOfStrings();
+ attributes = input.readMapOfStrings();
+ } else {
+ diagnostics = Collections.unmodifiableMap(input.readStringStringMap());
+ files = Collections.unmodifiableSet(input.readStringSet());
+ attributes = Collections.unmodifiableMap(input.readStringStringMap());
+ }
+
+ si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
+ si.setFiles(files);
+ } catch (Throwable exception) {
+ priorE = exception;
+ } finally {
+ CodecUtil.checkFooter(input, priorE);
+ }
+ return si;
+ }
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
+ throw new UnsupportedOperationException("this codec can only be used for reading");
+ }
+
+ /** File extension used to store {@link SegmentInfo}. */
+ public final static String SI_EXTENSION = "si";
+ static final String CODEC_NAME = "Lucene50SegmentInfo";
+ static final int VERSION_START = 0;
+ static final int VERSION_SAFE_MAPS = 1;
+ static final int VERSION_CURRENT = VERSION_SAFE_MAPS;
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
--- trunk/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 2016-05-10 05:44:23.740471118 -0400
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene60;
+
+import java.util.Objects;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
+import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 6.0 index format, with configurable per-field postings
+ * and docvalues formats.
+ * <p>
+ * If you want to reuse functionality of this codec in another codec, extend
+ * {@link FilterCodec}.
+ *
+ * @see org.apache.lucene.codecs.lucene60 package documentation for file format details.
+ *
+ * @lucene.experimental
+ */
+public class Lucene60Codec extends Codec {
+ private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
+ private final SegmentInfoFormat segmentInfosFormat = new Lucene50SegmentInfoFormat();
+ private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
+ private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
+
+ private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return Lucene60Codec.this.getPostingsFormatForField(field);
+ }
+ };
+
+ private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return Lucene60Codec.this.getDocValuesFormatForField(field);
+ }
+ };
+
+ private final StoredFieldsFormat storedFieldsFormat;
+
+ /**
+ * Instantiates a new codec.
+ */
+ public Lucene60Codec() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /**
+ * Instantiates a new codec, specifying the stored fields compression
+ * mode to use.
+ * @param mode stored fields compression mode to use for newly
+ * flushed/merged segments.
+ */
+ public Lucene60Codec(Mode mode) {
+ super("Lucene60");
+ this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode));
+ }
+
+ @Override
+ public final StoredFieldsFormat storedFieldsFormat() {
+ return storedFieldsFormat;
+ }
+
+ @Override
+ public final TermVectorsFormat termVectorsFormat() {
+ return vectorsFormat;
+ }
+
+ @Override
+ public final PostingsFormat postingsFormat() {
+ return postingsFormat;
+ }
+
+ @Override
+ public final FieldInfosFormat fieldInfosFormat() {
+ return fieldInfosFormat;
+ }
+
+ @Override
+ public final SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfosFormat;
+ }
+
+ @Override
+ public final LiveDocsFormat liveDocsFormat() {
+ return liveDocsFormat;
+ }
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return compoundFormat;
+ }
+
+ @Override
+ public final PointsFormat pointsFormat() {
+ return new Lucene60PointsFormat();
+ }
+
+ /** Returns the postings format that should be used for writing
+ * new segments of <code>field</code>.
+ *
+ * The default implementation always returns "Lucene50".
+ * <p>
+ * <b>WARNING:</b> if you subclass, you are responsible for index
+ * backwards compatibility: future version of Lucene are only
+ * guaranteed to be able to read the default implementation.
+ */
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return defaultFormat;
+ }
+
+ /** Returns the docvalues format that should be used for writing
+ * new segments of <code>field</code>.
+ *
+ * The default implementation always returns "Lucene54".
+ * <p>
+ * <b>WARNING:</b> if you subclass, you are responsible for index
+ * backwards compatibility: future version of Lucene are only
+ * guaranteed to be able to read the default implementation.
+ */
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return defaultDVFormat;
+ }
+
+ @Override
+ public final DocValuesFormat docValuesFormat() {
+ return docValuesFormat;
+ }
+
+ private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
+ private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54");
+
+ private final NormsFormat normsFormat = new Lucene53NormsFormat();
+
+ @Override
+ public final NormsFormat normsFormat() {
+ return normsFormat;
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec indexsort/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
--- trunk/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-01-24 13:09:49.836989951 -0500
+++ indexsort/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-05-10 05:44:23.740471118 -0400
@@ -16,3 +16,4 @@
org.apache.lucene.codecs.lucene50.Lucene50Codec
org.apache.lucene.codecs.lucene53.Lucene53Codec
org.apache.lucene.codecs.lucene54.Lucene54Codec
+org.apache.lucene.codecs.lucene60.Lucene60Codec
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java
--- trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java 2016-02-16 11:18:34.633021814 -0500
+++ indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java 2016-05-10 05:44:23.740471118 -0400
@@ -18,6 +18,7 @@
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
/**
* Codec for testing 5.0 index format
@@ -26,9 +27,15 @@
@Deprecated
final class Lucene50RWCodec extends Lucene50Codec {
private final NormsFormat normsFormat = new Lucene50RWNormsFormat();
+ private final SegmentInfoFormat segmentInfoFormat = new Lucene50RWSegmentInfoFormat();
@Override
public NormsFormat normsFormat() {
return normsFormat;
}
+
+ @Override
+ public SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfoFormat;
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java
--- trunk/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java 2016-05-10 05:44:23.740471118 -0400
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene50;
+
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter; // javadocs
+import org.apache.lucene.index.SegmentInfo; // javadocs
+import org.apache.lucene.index.SegmentInfos; // javadocs
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput; // javadocs
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Version;
+
+/**
+ * Read-write version of 5.0 SegmentInfoFormat for testing
+ * @deprecated for test purposes only
+ */
+@Deprecated
+public class Lucene50RWSegmentInfoFormat extends Lucene50SegmentInfoFormat {
+
+ /** Sole constructor. */
+ public Lucene50RWSegmentInfoFormat() {
+ }
+
+ @Override
+ public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
+ try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
+ Throwable priorE = null;
+ SegmentInfo si = null;
+ try {
+ int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
+ Lucene50SegmentInfoFormat.VERSION_START,
+ Lucene50SegmentInfoFormat.VERSION_CURRENT,
+ segmentID, "");
+ final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+
+ final int docCount = input.readInt();
+ if (docCount < 0) {
+ throw new CorruptIndexException("invalid docCount: " + docCount, input);
+ }
+ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+ final Map<String,String> diagnostics;
+ final Set<String> files;
+ final Map<String,String> attributes;
+
+ if (format >= VERSION_SAFE_MAPS) {
+ diagnostics = input.readMapOfStrings();
+ files = input.readSetOfStrings();
+ attributes = input.readMapOfStrings();
+ } else {
+ diagnostics = Collections.unmodifiableMap(input.readStringStringMap());
+ files = Collections.unmodifiableSet(input.readStringSet());
+ attributes = Collections.unmodifiableMap(input.readStringStringMap());
+ }
+
+ si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
+ si.setFiles(files);
+ } catch (Throwable exception) {
+ priorE = exception;
+ } finally {
+ CodecUtil.checkFooter(input, priorE);
+ }
+ return si;
+ }
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
+
+ assert si.getIndexSort() == null;
+
+ try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
+ // Only add the file once we've successfully created it, else IFD assert can trip:
+ si.addFile(fileName);
+ CodecUtil.writeIndexHeader(output,
+ Lucene50SegmentInfoFormat.CODEC_NAME,
+ Lucene50SegmentInfoFormat.VERSION_CURRENT,
+ si.getId(),
+ "");
+ Version version = si.getVersion();
+ if (version.major < 5) {
+ throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
+ }
+ // Write the Lucene version that created this segment, since 3.1
+ output.writeInt(version.major);
+ output.writeInt(version.minor);
+ output.writeInt(version.bugfix);
+ assert version.prerelease == 0;
+ output.writeInt(si.maxDoc());
+
+ output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
+ output.writeMapOfStrings(si.getDiagnostics());
+ Set<String> files = si.files();
+ for (String file : files) {
+ if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
+ throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
+ }
+ }
+ output.writeSetOfStrings(files);
+ output.writeMapOfStrings(si.getAttributes());
+
+ CodecUtil.writeFooter(output);
+ }
+ }
+
+ /** File extension used to store {@link SegmentInfo}. */
+ public final static String SI_EXTENSION = "si";
+ static final String CODEC_NAME = "Lucene50SegmentInfo";
+ static final int VERSION_START = 0;
+ static final int VERSION_SAFE_MAPS = 1;
+ static final int VERSION_CURRENT = VERSION_SAFE_MAPS;
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java indexsort/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
--- trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java 2016-02-16 11:18:34.637021814 -0500
+++ indexsort/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java 2016-05-10 05:44:23.740471118 -0400
@@ -29,7 +29,7 @@
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
@@ -139,7 +139,7 @@
if (defaultCodec == null && postingsFormat != null) {
try {
final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat);
- iwConf.setCodec(new Lucene60Codec() {
+ iwConf.setCodec(new Lucene62Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postingsFormatChosen;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/classification/build.xml indexsort/lucene/classification/build.xml
--- trunk/lucene/classification/build.xml 2016-04-24 06:00:46.361895938 -0400
+++ indexsort/lucene/classification/build.xml 2016-05-10 05:44:23.740471118 -0400
@@ -28,7 +28,6 @@
<path refid="base.classpath"/>
<pathelement path="${queries.jar}"/>
<pathelement path="${grouping.jar}"/>
- <pathelement path="${misc.jar}"/>
</path>
<path id="test.classpath">
@@ -37,17 +36,16 @@
<path refid="test.base.classpath"/>
</path>
- <target name="compile-core" depends="jar-misc,jar-grouping,jar-queries,jar-analyzers-common,common.compile-core" />
+ <target name="compile-core" depends="jar-grouping,jar-queries,jar-analyzers-common,common.compile-core" />
<target name="jar-core" depends="common.jar-core" />
- <target name="javadocs" depends="javadocs-misc,javadocs-grouping,javadocs-misc,compile-core,check-javadocs-uptodate"
+ <target name="javadocs" depends="javadocs-grouping,compile-core,check-javadocs-uptodate"
unless="javadocs-uptodate-${name}">
<invoke-module-javadoc>
<links>
<link href="../queries"/>
<link href="../grouping"/>
- <link href="../misc"/>
</links>
</invoke-module-javadoc>
</target>
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java indexsort/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
--- trunk/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java 2016-04-24 06:00:46.361895938 -0400
+++ indexsort/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java 2016-05-10 05:44:23.740471118 -0400
@@ -29,6 +29,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -38,7 +39,6 @@
import org.apache.lucene.search.grouping.GroupingSearch;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.uninverting.UninvertingReader;
/**
* Utility class for creating training / test / cross validation indexes from the original index.
@@ -68,7 +68,7 @@
* @param crossValidationIndex a {@link Directory} used to write the cross validation index
* @param analyzer {@link Analyzer} used to create the new docs
* @param termVectors {@code true} if term vectors should be kept
- * @param classFieldName names of the field used as the label for classification
+ * @param classFieldName name of the field used as the label for classification; this must be indexed with sorted doc values
* @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used
* @throws IOException if any writing operation fails on any of the indexes
*/
@@ -80,30 +80,23 @@
IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));
- // try to get the exact no. of existing classes
- Terms terms = originalIndex.terms(classFieldName);
- long noOfClasses = -1;
- if (terms != null) {
- noOfClasses = terms.size();
-
- }
- if (noOfClasses == -1) {
- noOfClasses = 10000; // fallback
+ // get the exact no. of existing classes
+ SortedDocValues classValues = originalIndex.getSortedDocValues(classFieldName);
+ if (classValues == null) {
+ throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values");
}
- HashMap<String, UninvertingReader.Type> mapping = new HashMap<>();
- mapping.put(classFieldName, UninvertingReader.Type.SORTED);
- UninvertingReader uninvertingReader = new UninvertingReader(originalIndex, mapping);
+ int noOfClasses = classValues.getValueCount();
try {
- IndexSearcher indexSearcher = new IndexSearcher(uninvertingReader);
+ IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
GroupingSearch gs = new GroupingSearch(classFieldName);
gs.setGroupSort(Sort.INDEXORDER);
gs.setSortWithinGroup(Sort.INDEXORDER);
gs.setAllGroups(true);
gs.setGroupDocsLimit(originalIndex.maxDoc());
- TopGroups<Object> topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, (int) noOfClasses);
+ TopGroups<Object> topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, noOfClasses);
// set the type to be indexed, stored, with term vectors
FieldType ft = new FieldType(TextField.TYPE_STORED);
@@ -156,7 +149,7 @@
testWriter.close();
cvWriter.close();
trainingWriter.close();
- uninvertingReader.close();
+ originalIndex.close();
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java indexsort/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java
--- trunk/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java 2016-04-24 06:00:46.361895938 -0400
+++ indexsort/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java 2016-05-10 05:44:23.740471118 -0400
@@ -70,7 +70,9 @@
doc = new Document();
doc.add(new Field(idFieldName, "id" + Integer.toString(i), ft));
doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
- doc.add(new Field(classFieldName, Integer.toString(rnd.nextInt(10)), ft));
+ String className = Integer.toString(rnd.nextInt(10));
+ doc.add(new Field(classFieldName, className, ft));
+ doc.add(new SortedDocValuesField(classFieldName, new BytesRef(className)));
indexWriter.addDocument(doc);
}
@@ -89,13 +91,11 @@
super.tearDown();
}
-
@Test
public void testSplitOnAllFields() throws Exception {
assertSplit(originalIndex, 0.1, 0.1);
}
-
@Test
public void testSplitOnSomeFields() throws Exception {
assertSplit(originalIndex, 0.2, 0.35, idFieldName, textFieldName);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
--- trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java 2016-02-16 11:18:34.649021815 -0500
+++ indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java 2016-05-10 05:44:23.740471118 -0400
@@ -36,6 +36,7 @@
private IndexOutput out;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final SegmentWriteState writeState;
+ final String segment;
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
@@ -49,6 +50,7 @@
public SimpleTextFieldsWriter(SegmentWriteState writeState) throws IOException {
final String fileName = SimpleTextPostingsFormat.getPostingsFileName(writeState.segmentInfo.name, writeState.segmentSuffix);
+ segment = writeState.segmentInfo.name;
out = writeState.directory.createOutput(fileName, writeState.context);
this.writeState = writeState;
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
--- trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java 2016-02-16 11:18:34.649021815 -0500
+++ indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java 2016-05-10 05:44:23.740471118 -0400
@@ -31,6 +31,8 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -59,6 +61,11 @@
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_ID = new BytesRef(" id ");
+ final static BytesRef SI_SORT = new BytesRef(" sort ");
+ final static BytesRef SI_SORT_FIELD = new BytesRef(" field ");
+ final static BytesRef SI_SORT_TYPE = new BytesRef(" type ");
+ final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse ");
+ final static BytesRef SI_SORT_MISSING = new BytesRef(" missing ");
public static final String SI_EXTENSION = "si";
@@ -137,10 +144,119 @@
+ ", got: " + StringHelper.idToString(id), input);
}
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT);
+ final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch));
+ SortField[] sortField = new SortField[numSortFields];
+ for (int i = 0; i < numSortFields; ++i) {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD);
+ final String field = readString(SI_SORT_FIELD.length, scratch);
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE);
+ final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
+
+ final SortField.Type type;
+ switch (typeAsString) {
+ case "string":
+ type = SortField.Type.STRING;
+ break;
+ case "long":
+ type = SortField.Type.LONG;
+ break;
+ case "int":
+ type = SortField.Type.INT;
+ break;
+ case "double":
+ type = SortField.Type.DOUBLE;
+ break;
+ case "float":
+ type = SortField.Type.FLOAT;
+ break;
+ default:
+ throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
+ }
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE);
+ final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch));
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING);
+ final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch);
+ final Object missingValue;
+ switch (type) {
+ case STRING:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ case "first":
+ missingValue = SortField.STRING_FIRST;
+ break;
+ case "last":
+ missingValue = SortField.STRING_LAST;
+ break;
+ default:
+ throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input);
+ }
+ break;
+ case LONG:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Long.parseLong(missingLastAsString);
+ break;
+ }
+ break;
+ case INT:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Integer.parseInt(missingLastAsString);
+ break;
+ }
+ break;
+ case DOUBLE:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Double.parseDouble(missingLastAsString);
+ break;
+ }
+ break;
+ case FLOAT:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Float.parseFloat(missingLastAsString);
+ break;
+ }
+ break;
+ default:
+ throw new AssertionError();
+ }
+ sortField[i] = new SortField(field, type, reverse);
+ if (missingValue != null) {
+ sortField[i].setMissingValue(missingValue);
+ }
+ }
+ Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
+
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
- isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes));
+ isCompoundFile, null, Collections.unmodifiableMap(diagnostics),
+ id, Collections.unmodifiableMap(attributes), indexSort);
info.setFiles(files);
return info;
}
@@ -223,6 +339,62 @@
SimpleTextUtil.write(output, new BytesRef(si.getId()));
SimpleTextUtil.writeNewline(output);
+ Sort indexSort = si.getIndexSort();
+ SimpleTextUtil.write(output, SI_SORT);
+ final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
+ SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch);
+ SimpleTextUtil.writeNewline(output);
+ for (int i = 0; i < numSortFields; ++i) {
+ final SortField sortField = indexSort.getSort()[i];
+
+ SimpleTextUtil.write(output, SI_SORT_FIELD);
+ SimpleTextUtil.write(output, sortField.getField(), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_SORT_TYPE);
+ final String sortType;
+ switch (sortField.getType()) {
+ case STRING:
+ sortType = "string";
+ break;
+ case LONG:
+ sortType = "long";
+ break;
+ case INT:
+ sortType = "int";
+ break;
+ case DOUBLE:
+ sortType = "double";
+ break;
+ case FLOAT:
+ sortType = "float";
+ break;
+ default:
+ throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
+ }
+ SimpleTextUtil.write(output, sortType, scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_SORT_REVERSE);
+ SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_SORT_MISSING);
+ final Object missingValue = sortField.getMissingValue();
+ final String missing;
+ if (missingValue == null) {
+ missing = "null";
+ } else if (missingValue == SortField.STRING_FIRST) {
+ missing = "first";
+ } else if (missingValue == SortField.STRING_LAST) {
+ missing = "last";
+ } else {
+ missing = missingValue.toString();
+ }
+ SimpleTextUtil.write(output, missing, scratch);
+ SimpleTextUtil.writeNewline(output);
+ }
+
SimpleTextUtil.writeChecksum(output, scratch);
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
--- trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java 2016-02-16 11:18:34.649021815 -0500
+++ indexsort/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java 2016-05-10 05:44:23.740471118 -0400
@@ -143,7 +143,6 @@
} else {
write(TYPE_STRING);
newLine();
-
write(VALUE);
write(field.stringValue());
newLine();
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/Codec.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/Codec.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/Codec.java 2016-05-10 05:44:23.740471118 -0400
@@ -57,7 +57,7 @@
}
// TODO: should we use this, or maybe a system property is better?
- static Codec defaultCodec = LOADER.lookup("Lucene60");
+ static Codec defaultCodec = LOADER.lookup("Lucene62");
}
private final String name;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java 2016-02-16 11:18:34.657021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java 2016-05-10 05:44:23.744471118 -0400
@@ -486,6 +486,12 @@
@Override
public int merge(MergeState mergeState) throws IOException {
+ if (mergeState.segmentInfo.getIndexSort() != null) {
+ // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
+ // being copied over...?
+ return super.merge(mergeState);
+ }
+
int docCount = 0;
int numReaders = mergeState.maxDocs.length;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java 2016-02-16 11:18:34.657021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java 2016-05-10 05:44:23.744471118 -0400
@@ -730,6 +730,11 @@
@Override
public int merge(MergeState mergeState) throws IOException {
+ if (mergeState.segmentInfo.getIndexSort() != null) {
+ // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
+ // being copied over...?
+ return super.merge(mergeState);
+ }
int docCount = 0;
int numReaders = mergeState.maxDocs.length;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java 2016-02-16 11:18:34.653021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java 2016-05-10 05:44:23.744471118 -0400
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
@@ -25,12 +24,13 @@
import java.util.NoSuchElementException;
import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocIDMerger;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
@@ -44,6 +44,8 @@
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.packed.PackedInts;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Abstract API that consumes numeric, binary and
* sorted docvalues. Concrete implementations of this
@@ -240,6 +242,32 @@
}
}
}
+
+ /** Tracks state of one numeric sub-reader that we are merging */
+ private static class NumericDocValuesSub extends DocIDMerger.Sub {
+
+ private final NumericDocValues values;
+ private final Bits docsWithField;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, Bits docsWithField, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.docsWithField = docsWithField;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
/**
* Merges the numeric docvalues from <code>toMerge</code>.
@@ -248,20 +276,23 @@
* an Iterable that merges and filters deleted documents on the fly.
*/
public void mergeNumericField(final FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge, final List<Bits> docsWithField) throws IOException {
-
addNumericField(fieldInfo,
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List<NumericDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
boolean nextHasValue;
- int currentMaxDoc;
- NumericDocValues currentValues;
- Bits currentLiveDocs;
- Bits currentDocsWithField;
boolean nextIsSet;
@Override
@@ -276,7 +307,7 @@
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -285,43 +316,46 @@
}
private boolean setNext() {
- while (true) {
- if (readerUpto == toMerge.size()) {
- return false;
- }
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < toMerge.size()) {
- currentValues = toMerge.get(readerUpto);
- currentDocsWithField = docsWithField.get(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- nextValue = currentValues.get(docIDUpto);
- if (nextValue == 0 && currentDocsWithField.get(docIDUpto) == false) {
- nextHasValue = false;
- } else {
- nextHasValue = true;
- }
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ NumericDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
}
+ nextIsSet = true;
+ nextValue = sub.values.get(sub.docID);
+ nextHasValue = nextValue != 0 || sub.docsWithField.get(sub.docID);
+ return true;
}
};
}
});
}
+ /** Tracks state of one binary sub-reader that we are merging */
+ private static class BinaryDocValuesSub extends DocIDMerger.Sub {
+
+ private final BinaryDocValues values;
+ private final Bits docsWithField;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values, Bits docsWithField, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.docsWithField = docsWithField;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the binary docvalues from <code>toMerge</code>.
* <p>
@@ -329,20 +363,23 @@
* an Iterable that merges and filters deleted documents on the fly.
*/
public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List<BinaryDocValues> toMerge, final List<Bits> docsWithField) throws IOException {
-
addBinaryField(fieldInfo,
new Iterable<BytesRef>() {
@Override
public Iterator<BytesRef> iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List<BinaryDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<BytesRef>() {
- int readerUpto = -1;
- int docIDUpto;
BytesRef nextValue;
BytesRef nextPointer; // points to null if missing, or nextValue
- int currentMaxDoc;
- BinaryDocValues currentValues;
- Bits currentLiveDocs;
- Bits currentDocsWithField;
boolean nextIsSet;
@Override
@@ -357,7 +394,7 @@
@Override
public BytesRef next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -367,42 +404,49 @@
private boolean setNext() {
while (true) {
- if (readerUpto == toMerge.size()) {
- return false;
- }
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < toMerge.size()) {
- currentValues = toMerge.get(readerUpto);
- currentDocsWithField = docsWithField.get(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- if (currentDocsWithField.get(docIDUpto)) {
- nextValue = currentValues.get(docIDUpto);
- nextPointer = nextValue;
- } else {
- nextPointer = null;
- }
- docIDUpto++;
- return true;
+ BinaryDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
+ }
+ nextIsSet = true;
+ if (sub.docsWithField.get(sub.docID)) {
+ nextPointer = nextValue = sub.values.get(sub.docID);
+ } else {
+ nextPointer = null;
+ }
+ return true;
}
-
- docIDUpto++;
}
- }
};
}
});
}
+ /** Tracks state of one sorted numeric sub-reader that we are merging */
+ private static class SortedNumericDocValuesSub extends DocIDMerger.Sub {
+
+ private final SortedNumericDocValues values;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public SortedNumericDocValuesSub(MergeState.DocMap docMap, SortedNumericDocValues values, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ values.setDocument(docID);
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the sorted docvalues from <code>toMerge</code>.
* <p>
@@ -410,21 +454,24 @@
* iterables that filter deleted documents.
*/
public void mergeSortedNumericField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedNumericDocValues> toMerge) throws IOException {
- final int numReaders = toMerge.size();
- final SortedNumericDocValues dvs[] = toMerge.toArray(new SortedNumericDocValues[numReaders]);
- // step 3: add field
addSortedNumericField(fieldInfo,
// doc -> value count
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List<SortedNumericDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
int nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
boolean nextIsSet;
@Override
@@ -439,7 +486,7 @@
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -449,30 +496,13 @@
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
+ SortedNumericDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
return false;
}
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- SortedNumericDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- nextValue = dv.count();
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ nextIsSet = true;
+ nextValue = sub.values.count();
+ return true;
}
}
};
@@ -482,15 +512,21 @@
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+ // We must make a new DocIDMerger for each iterator:
+ List<SortedNumericDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
boolean nextIsSet;
int valueUpto;
int valueLength;
+ SortedNumericDocValuesSub current;
@Override
public boolean hasNext() {
@@ -504,7 +540,7 @@
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -514,38 +550,21 @@
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
- return false;
- }
if (valueUpto < valueLength) {
- nextValue = dvs[readerUpto].valueAt(valueUpto);
+ nextValue = current.values.valueAt(valueUpto);
valueUpto++;
nextIsSet = true;
return true;
}
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- assert docIDUpto < currentMaxDoc;
- SortedNumericDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- valueUpto = 0;
- valueLength = dv.count();
- docIDUpto++;
- continue;
+ current = docIDMerger.next();
+ if (current == null) {
+ return false;
}
-
- docIDUpto++;
+ valueUpto = 0;
+ valueLength = current.values.count();
+ continue;
}
}
};
@@ -554,6 +573,32 @@
);
}
+ /** Tracks state of one sorted sub-reader that we are merging */
+ private static class SortedDocValuesSub extends DocIDMerger.Sub {
+
+ private final SortedDocValues values;
+ private int docID = -1;
+ private final int maxDoc;
+ private final LongValues map;
+
+ public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, int maxDoc, LongValues map) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ this.map = map;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the sorted docvalues from <code>toMerge</code>.
* <p>
@@ -608,7 +653,7 @@
@Override
public BytesRef next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
int segmentNumber = map.getFirstSegmentNumber(currentOrd);
@@ -629,13 +674,17 @@
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+ // We must make a new DocIDMerger for each iterator:
+ List<SortedDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new SortedDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
+ }
+
+ final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
int nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
- LongValues currentMap;
boolean nextIsSet;
@Override
@@ -650,7 +699,7 @@
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -661,30 +710,15 @@
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
+ SortedDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
return false;
}
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentMap = map.getGlobalOrds(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- int segOrd = dvs[readerUpto].getOrd(docIDUpto);
- nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd);
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ nextIsSet = true;
+ int segOrd = sub.values.getOrd(sub.docID);
+ nextValue = segOrd == -1 ? -1 : (int) sub.map.get(segOrd);
+ return true;
}
}
};
@@ -693,6 +727,37 @@
);
}
+ /** Tracks state of one sorted set sub-reader that we are merging */
+ private static class SortedSetDocValuesSub extends DocIDMerger.Sub {
+
+ private final SortedSetDocValues values;
+ int docID = -1;
+ private final int maxDoc;
+ private final LongValues map;
+
+ public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, int maxDoc, LongValues map) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ this.map = map;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "SortedSetDocValuesSub(docID=" + docID + " mappedDocID=" + mappedDocID + " values=" + values + ")";
+ }
+ }
+
/**
* Merges the sortedset docvalues from <code>toMerge</code>.
* <p>
@@ -700,14 +765,12 @@
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedSetField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException {
- final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]);
- final int numReaders = mergeState.maxDocs.length;
// step 1: iterate thru each sub and mark terms still in use
- TermsEnum liveTerms[] = new TermsEnum[dvs.length];
+ TermsEnum liveTerms[] = new TermsEnum[toMerge.size()];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < liveTerms.length; sub++) {
- SortedSetDocValues dv = dvs[sub];
+ SortedSetDocValues dv = toMerge.get(sub);
Bits liveDocs = mergeState.liveDocs[sub];
int maxDoc = mergeState.maxDocs[sub];
if (liveDocs == null) {
@@ -748,12 +811,12 @@
@Override
public BytesRef next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
int segmentNumber = map.getFirstSegmentNumber(currentOrd);
long segmentOrd = map.getFirstSegmentOrd(currentOrd);
- final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd);
+ final BytesRef term = toMerge.get(segmentNumber).lookupOrd(segmentOrd);
currentOrd++;
return term;
}
@@ -769,12 +832,18 @@
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List<SortedSetDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
+ }
+
+ final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
int nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
boolean nextIsSet;
@Override
@@ -789,7 +858,7 @@
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -800,33 +869,18 @@
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
+ SortedSetDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
return false;
}
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- SortedSetDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- nextValue = 0;
- while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
- nextValue++;
- }
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ sub.values.setDocument(sub.docID);
+ nextValue = 0;
+ while (sub.values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
+ nextValue++;
+ }
+ //System.out.println(" doc " + sub + " -> ord count = " + nextValue);
+ nextIsSet = true;
+ return true;
}
}
};
@@ -836,13 +890,18 @@
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List<SortedSetDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
+ }
+
+ final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
- LongValues currentMap;
boolean nextIsSet;
long ords[] = new long[8];
int ordUpto;
@@ -860,7 +919,7 @@
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -871,10 +930,6 @@
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
- return false;
- }
-
if (ordUpto < ordLength) {
nextValue = ords[ordUpto];
ordUpto++;
@@ -882,35 +937,22 @@
return true;
}
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentMap = map.getGlobalOrds(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
+ SortedSetDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
}
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- assert docIDUpto < currentMaxDoc;
- SortedSetDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- ordUpto = ordLength = 0;
- long ord;
- while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
- if (ordLength == ords.length) {
- ords = ArrayUtil.grow(ords, ordLength+1);
- }
- ords[ordLength] = currentMap.get(ord);
- ordLength++;
+ sub.values.setDocument(sub.docID);
+
+ ordUpto = ordLength = 0;
+ long ord;
+ while ((ord = sub.values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ if (ordLength == ords.length) {
+ ords = ArrayUtil.grow(ords, ordLength+1);
}
- docIDUpto++;
- continue;
+ ords[ordLength] = sub.map.get(ord);
+ ordLength++;
}
-
- docIDUpto++;
+ continue;
}
}
};
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 2016-02-16 11:18:34.657021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.lucene50;
-
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.SegmentInfoFormat;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexWriter; // javadocs
-import org.apache.lucene.index.SegmentInfo; // javadocs
-import org.apache.lucene.index.SegmentInfos; // javadocs
-import org.apache.lucene.store.ChecksumIndexInput;
-import org.apache.lucene.store.DataOutput; // javadocs
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.Version;
-
-/**
- * Lucene 5.0 Segment info format.
- * <p>
- * Files:
- * <ul>
- * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, Footer
- * </ul>
- * Data types:
- * <ul>
- * <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
- * <li>SegSize --&gt; {@link DataOutput#writeInt Int32}</li>
- * <li>SegVersion --&gt; {@link DataOutput#writeString String}</li>
- * <li>Files --&gt; {@link DataOutput#writeSetOfStrings Set&lt;String&gt;}</li>
- * <li>Diagnostics,Attributes --&gt; {@link DataOutput#writeMapOfStrings Map&lt;String,String&gt;}</li>
- * <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
- * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * Field Descriptions:
- * <ul>
- * <li>SegVersion is the code version that created the segment.</li>
- * <li>SegSize is the number of documents contained in the segment index.</li>
- * <li>IsCompoundFile records whether the segment is written as a compound file or
- * not. If this is -1, the segment is not a compound file. If it is 1, the segment
- * is a compound file.</li>
- * <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
- * for each segment it creates. It includes metadata like the current Lucene
- * version, OS, Java version, why the segment was created (merge, flush,
- * addIndexes), etc.</li>
- * <li>Files is a list of files referred to by this segment.</li>
- * </ul>
- *
- * @see SegmentInfos
- * @lucene.experimental
- */
-public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
-
- /** Sole constructor. */
- public Lucene50SegmentInfoFormat() {
- }
-
- @Override
- public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
- final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
- try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
- Throwable priorE = null;
- SegmentInfo si = null;
- try {
- int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
- Lucene50SegmentInfoFormat.VERSION_START,
- Lucene50SegmentInfoFormat.VERSION_CURRENT,
- segmentID, "");
- final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
-
- final int docCount = input.readInt();
- if (docCount < 0) {
- throw new CorruptIndexException("invalid docCount: " + docCount, input);
- }
- final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
-
- final Map<String,String> diagnostics;
- final Set<String> files;
- final Map<String,String> attributes;
-
- if (format >= VERSION_SAFE_MAPS) {
- diagnostics = input.readMapOfStrings();
- files = input.readSetOfStrings();
- attributes = input.readMapOfStrings();
- } else {
- diagnostics = Collections.unmodifiableMap(input.readStringStringMap());
- files = Collections.unmodifiableSet(input.readStringSet());
- attributes = Collections.unmodifiableMap(input.readStringStringMap());
- }
-
- si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes);
- si.setFiles(files);
- } catch (Throwable exception) {
- priorE = exception;
- } finally {
- CodecUtil.checkFooter(input, priorE);
- }
- return si;
- }
- }
-
- @Override
- public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
- final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
-
- try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
- // Only add the file once we've successfully created it, else IFD assert can trip:
- si.addFile(fileName);
- CodecUtil.writeIndexHeader(output,
- Lucene50SegmentInfoFormat.CODEC_NAME,
- Lucene50SegmentInfoFormat.VERSION_CURRENT,
- si.getId(),
- "");
- Version version = si.getVersion();
- if (version.major < 5) {
- throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
- }
- // Write the Lucene version that created this segment, since 3.1
- output.writeInt(version.major);
- output.writeInt(version.minor);
- output.writeInt(version.bugfix);
- assert version.prerelease == 0;
- output.writeInt(si.maxDoc());
-
- output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
- output.writeMapOfStrings(si.getDiagnostics());
- Set<String> files = si.files();
- for (String file : files) {
- if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
- throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
- }
- }
- output.writeSetOfStrings(files);
- output.writeMapOfStrings(si.getAttributes());
- CodecUtil.writeFooter(output);
- }
- }
-
- /** File extension used to store {@link SegmentInfo}. */
- public final static String SI_EXTENSION = "si";
- static final String CODEC_NAME = "Lucene50SegmentInfo";
- static final int VERSION_START = 0;
- static final int VERSION_SAFE_MAPS = 1;
- static final int VERSION_CURRENT = VERSION_SAFE_MAPS;
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java 2016-01-24 13:09:49.884989952 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene54/package-info.java 2016-05-10 05:44:23.744471118 -0400
@@ -135,7 +135,7 @@
* <p>Each segment index maintains the following:</p>
* <ul>
* <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}.
+ * {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment info}.
* This contains metadata about a segment, such as the number of documents,
* what files it uses,
* </li>
@@ -235,7 +235,7 @@
* file.</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment Info}</td>
* <td>.si</td>
* <td>Stores metadata about a segment</td>
* </tr>
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.lucene60;
-
-
-import java.util.Objects;
-
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.CompoundFormat;
-import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.FieldInfosFormat;
-import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.LiveDocsFormat;
-import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.SegmentInfoFormat;
-import org.apache.lucene.codecs.StoredFieldsFormat;
-import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
-import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat;
-import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
-import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
-
-/**
- * Implements the Lucene 6.0 index format, with configurable per-field postings
- * and docvalues formats.
- * <p>
- * If you want to reuse functionality of this codec in another codec, extend
- * {@link FilterCodec}.
- *
- * @see org.apache.lucene.codecs.lucene60 package documentation for file format details.
- *
- * @lucene.experimental
- */
-public class Lucene60Codec extends Codec {
- private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
- private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
- private final SegmentInfoFormat segmentInfosFormat = new Lucene50SegmentInfoFormat();
- private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
- private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
-
- private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
- @Override
- public PostingsFormat getPostingsFormatForField(String field) {
- return Lucene60Codec.this.getPostingsFormatForField(field);
- }
- };
-
- private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
- @Override
- public DocValuesFormat getDocValuesFormatForField(String field) {
- return Lucene60Codec.this.getDocValuesFormatForField(field);
- }
- };
-
- private final StoredFieldsFormat storedFieldsFormat;
-
- /**
- * Instantiates a new codec.
- */
- public Lucene60Codec() {
- this(Mode.BEST_SPEED);
- }
-
- /**
- * Instantiates a new codec, specifying the stored fields compression
- * mode to use.
- * @param mode stored fields compression mode to use for newly
- * flushed/merged segments.
- */
- public Lucene60Codec(Mode mode) {
- super("Lucene60");
- this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode));
- }
-
- @Override
- public final StoredFieldsFormat storedFieldsFormat() {
- return storedFieldsFormat;
- }
-
- @Override
- public final TermVectorsFormat termVectorsFormat() {
- return vectorsFormat;
- }
-
- @Override
- public final PostingsFormat postingsFormat() {
- return postingsFormat;
- }
-
- @Override
- public final FieldInfosFormat fieldInfosFormat() {
- return fieldInfosFormat;
- }
-
- @Override
- public final SegmentInfoFormat segmentInfoFormat() {
- return segmentInfosFormat;
- }
-
- @Override
- public final LiveDocsFormat liveDocsFormat() {
- return liveDocsFormat;
- }
-
- @Override
- public final CompoundFormat compoundFormat() {
- return compoundFormat;
- }
-
- @Override
- public final PointsFormat pointsFormat() {
- return new Lucene60PointsFormat();
- }
-
- /** Returns the postings format that should be used for writing
- * new segments of <code>field</code>.
- *
- * The default implementation always returns "Lucene50".
- * <p>
- * <b>WARNING:</b> if you subclass, you are responsible for index
- * backwards compatibility: future version of Lucene are only
- * guaranteed to be able to read the default implementation.
- */
- public PostingsFormat getPostingsFormatForField(String field) {
- return defaultFormat;
- }
-
- /** Returns the docvalues format that should be used for writing
- * new segments of <code>field</code>.
- *
- * The default implementation always returns "Lucene50".
- * <p>
- * <b>WARNING:</b> if you subclass, you are responsible for index
- * backwards compatibility: future version of Lucene are only
- * guaranteed to be able to read the default implementation.
- */
- public DocValuesFormat getDocValuesFormatForField(String field) {
- return defaultDVFormat;
- }
-
- @Override
- public final DocValuesFormat docValuesFormat() {
- return docValuesFormat;
- }
-
- private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
- private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54");
-
- private final NormsFormat normsFormat = new Lucene53NormsFormat();
-
- @Override
- public final NormsFormat normsFormat() {
- return normsFormat;
- }
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java 2016-04-24 06:00:46.365895938 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java 2016-05-10 05:44:23.744471118 -0400
@@ -123,6 +123,13 @@
@Override
public void merge(MergeState mergeState) throws IOException {
+ if (mergeState.segmentInfo.getIndexSort() != null) {
+ // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
+ // being copied over...?
+ super.merge(mergeState);
+ return;
+ }
+
for(PointsReader reader : mergeState.pointsReaders) {
if (reader instanceof Lucene60PointsReader == false) {
// We can only bulk merge when all to-be-merged segments use our format:
@@ -171,7 +178,6 @@
singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>();
- List<Integer> docIDBases = new ArrayList<>();
for(int i=0;i<mergeState.pointsReaders.length;i++) {
PointsReader reader = mergeState.pointsReaders[i];
@@ -191,7 +197,6 @@
if (readerFieldInfo != null) {
BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
if (bkdReader != null) {
- docIDBases.add(mergeState.docBase[i]);
bkdReaders.add(bkdReader);
docMaps.add(mergeState.docMaps[i]);
}
@@ -199,7 +204,7 @@
}
}
- long fp = writer.merge(dataOut, docMaps, bkdReaders, docIDBases);
+ long fp = writer.merge(dataOut, docMaps, bkdReaders);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java 2016-05-10 05:44:23.744471118 -0400
@@ -16,400 +16,7 @@
*/
/**
- * Lucene 6.0 file format.
- *
- * <h1>Apache Lucene - Index File Formats</h1>
- * <div>
- * <ul>
- * <li><a href="#Introduction">Introduction</a></li>
- * <li><a href="#Definitions">Definitions</a>
- * <ul>
- * <li><a href="#Inverted_Indexing">Inverted Indexing</a></li>
- * <li><a href="#Types_of_Fields">Types of Fields</a></li>
- * <li><a href="#Segments">Segments</a></li>
- * <li><a href="#Document_Numbers">Document Numbers</a></li>
- * </ul>
- * </li>
- * <li><a href="#Overview">Index Structure Overview</a></li>
- * <li><a href="#File_Naming">File Naming</a></li>
- * <li><a href="#file-names">Summary of File Extensions</a>
- * <ul>
- * <li><a href="#Lock_File">Lock File</a></li>
- * <li><a href="#History">History</a></li>
- * <li><a href="#Limitations">Limitations</a></li>
- * </ul>
- * </li>
- * </ul>
- * </div>
- * <a name="Introduction"></a>
- * <h2>Introduction</h2>
- * <div>
- * <p>This document defines the index file formats used in this version of Lucene.
- * If you are using a different version of Lucene, please consult the copy of
- * <code>docs/</code> that was distributed with
- * the version you are using.</p>
- * <p>Apache Lucene is written in Java, but several efforts are underway to write
- * <a href="http://wiki.apache.org/lucene-java/LuceneImplementations">versions of
- * Lucene in other programming languages</a>. If these versions are to remain
- * compatible with Apache Lucene, then a language-independent definition of the
- * Lucene index format is required. This document thus attempts to provide a
- * complete and independent definition of the Apache Lucene file formats.</p>
- * <p>As Lucene evolves, this document should evolve. Versions of Lucene in
- * different programming languages should endeavor to agree on file formats, and
- * generate new versions of this document.</p>
- * </div>
- * <a name="Definitions"></a>
- * <h2>Definitions</h2>
- * <div>
- * <p>The fundamental concepts in Lucene are index, document, field and term.</p>
- * <p>An index contains a sequence of documents.</p>
- * <ul>
- * <li>A document is a sequence of fields.</li>
- * <li>A field is a named sequence of terms.</li>
- * <li>A term is a sequence of bytes.</li>
- * </ul>
- * <p>The same sequence of bytes in two different fields is considered a different
- * term. Thus terms are represented as a pair: the string naming the field, and the
- * bytes within the field.</p>
- * <a name="Inverted_Indexing"></a>
- * <h3>Inverted Indexing</h3>
- * <p>The index stores statistics about terms in order to make term-based search
- * more efficient. Lucene's index falls into the family of indexes known as an
- * <i>inverted index.</i> This is because it can list, for a term, the documents
- * that contain it. This is the inverse of the natural relationship, in which
- * documents list terms.</p>
- * <a name="Types_of_Fields"></a>
- * <h3>Types of Fields</h3>
- * <p>In Lucene, fields may be <i>stored</i>, in which case their text is stored
- * in the index literally, in a non-inverted manner. Fields that are inverted are
- * called <i>indexed</i>. A field may be both stored and indexed.</p>
- * <p>The text of a field may be <i>tokenized</i> into terms to be indexed, or the
- * text of a field may be used literally as a term to be indexed. Most fields are
- * tokenized, but sometimes it is useful for certain identifier fields to be
- * indexed literally.</p>
- * <p>See the {@link org.apache.lucene.document.Field Field}
- * java docs for more information on Fields.</p>
- * <a name="Segments"></a>
- * <h3>Segments</h3>
- * <p>Lucene indexes may be composed of multiple sub-indexes, or <i>segments</i>.
- * Each segment is a fully independent index, which could be searched separately.
- * Indexes evolve by:</p>
- * <ol>
- * <li>Creating new segments for newly added documents.</li>
- * <li>Merging existing segments.</li>
- * </ol>
- * <p>Searches may involve multiple segments and/or multiple indexes, each index
- * potentially composed of a set of segments.</p>
- * <a name="Document_Numbers"></a>
- * <h3>Document Numbers</h3>
- * <p>Internally, Lucene refers to documents by an integer <i>document number</i>.
- * The first document added to an index is numbered zero, and each subsequent
- * document added gets a number one greater than the previous.</p>
- * <p>Note that a document's number may change, so caution should be taken when
- * storing these numbers outside of Lucene. In particular, numbers may change in
- * the following situations:</p>
- * <ul>
- * <li>
- * <p>The numbers stored in each segment are unique only within the segment, and
- * must be converted before they can be used in a larger context. The standard
- * technique is to allocate each segment a range of values, based on the range of
- * numbers used in that segment. To convert a document number from a segment to an
- * external value, the segment's <i>base</i> document number is added. To convert
- * an external value back to a segment-specific value, the segment is identified
- * by the range that the external value is in, and the segment's base value is
- * subtracted. For example two five document segments might be combined, so that
- * the first segment has a base value of zero, and the second of five. Document
- * three from the second segment would have an external value of eight.</p>
- * </li>
- * <li>
- * <p>When documents are deleted, gaps are created in the numbering. These are
- * eventually removed as the index evolves through merging. Deleted documents are
- * dropped when segments are merged. A freshly-merged segment thus has no gaps in
- * its numbering.</p>
- * </li>
- * </ul>
- * </div>
- * <a name="Overview"></a>
- * <h2>Index Structure Overview</h2>
- * <div>
- * <p>Each segment index maintains the following:</p>
- * <ul>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}.
- * This contains metadata about a segment, such as the number of documents,
- * what files it uses,
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Field names}.
- * This contains the set of field names used in the index.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Stored Field values}.
- * This contains, for each document, a list of attribute-value pairs, where the attributes
- * are field names. These are used to store auxiliary information about the document, such as
- * its title, url, or an identifier to access a database. The set of stored fields are what is
- * returned for each hit when searching. This is keyed by document number.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term dictionary}.
- * A dictionary containing all of the terms used in all of the
- * indexed fields of all of the documents. The dictionary also contains the number
- * of documents which contain the term, and pointers to the term's frequency and
- * proximity data.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Frequency data}.
- * For each term in the dictionary, the numbers of all the
- * documents that contain that term, and the frequency of the term in that
- * document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Proximity data}.
- * For each term in the dictionary, the positions that the
- * term occurs in each document. Note that this will not exist if all fields in
- * all documents omit position data.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Normalization factors}.
- * For each field in each document, a value is stored
- * that is multiplied into the score for hits on that field.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vectors}.
- * For each field in each document, the term vector (sometimes
- * called document vector) may be stored. A term vector consists of term text and
- * term frequency. To add Term Vectors to your index see the
- * {@link org.apache.lucene.document.Field Field} constructors
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-document values}.
- * Like stored values, these are also keyed by document
- * number, but are generally intended to be loaded into main memory for fast
- * access. Whereas stored values are generally intended for summary results from
- * searches, per-document values are useful for things like scoring factors.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}.
- * An optional file indicating which documents are live.
- * </li>
- * <li>
- * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.
- * Optional pair of files, recording dimensionally indexed fields, to enable fast
- * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D)
- * and geographic shape intersection (2D, 3D).
- * </li>
- * </ul>
- * <p>Details on each of these are provided in their linked pages.</p>
- * </div>
- * <a name="File_Naming"></a>
- * <h2>File Naming</h2>
- * <div>
- * <p>All files belonging to a segment have the same name with varying extensions.
- * The extensions correspond to the different file formats described below. When
- * using the Compound File format (default in 1.4 and greater) these files (except
- * for the Segment info file, the Lock file, and Deleted documents file) are collapsed
- * into a single .cfs file (see below for details)</p>
- * <p>Typically, all segments in an index are stored in a single directory,
- * although this is not required.</p>
- * <p>As of version 2.1 (lock-less commits), file names are never re-used.
- * That is, when any file is saved
- * to the Directory it is given a never before used filename. This is achieved
- * using a simple generations approach. For example, the first segments file is
- * segments_1, then segments_2, etc. The generation is a sequential long integer
- * represented in alpha-numeric (base 36) form.</p>
- * </div>
- * <a name="file-names"></a>
- * <h2>Summary of File Extensions</h2>
- * <div>
- * <p>The following table summarizes the names and extensions of the files in
- * Lucene:</p>
- * <table cellspacing="1" cellpadding="4" summary="lucene filenames by extension">
- * <tr>
- * <th>Name</th>
- * <th>Extension</th>
- * <th>Brief Description</th>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.index.SegmentInfos Segments File}</td>
- * <td>segments_N</td>
- * <td>Stores information about a commit point</td>
- * </tr>
- * <tr>
- * <td><a href="#Lock_File">Lock File</a></td>
- * <td>write.lock</td>
- * <td>The Write lock prevents multiple IndexWriters from writing to the same
- * file.</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}</td>
- * <td>.si</td>
- * <td>Stores metadata about a segment</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat Compound File}</td>
- * <td>.cfs, .cfe</td>
- * <td>An optional "virtual" file consisting of all the other index files for
- * systems that frequently run out of file handles.</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Fields}</td>
- * <td>.fnm</td>
- * <td>Stores information about the fields</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Index}</td>
- * <td>.fdx</td>
- * <td>Contains pointers to field data</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Data}</td>
- * <td>.fdt</td>
- * <td>The stored fields for documents</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Dictionary}</td>
- * <td>.tim</td>
- * <td>The term dictionary, stores term info</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Index}</td>
- * <td>.tip</td>
- * <td>The index into the Term Dictionary</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Frequencies}</td>
- * <td>.doc</td>
- * <td>Contains the list of docs which contain each term along with frequency</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Positions}</td>
- * <td>.pos</td>
- * <td>Stores position information about where a term occurs in the index</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Payloads}</td>
- * <td>.pay</td>
- * <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Norms}</td>
- * <td>.nvd, .nvm</td>
- * <td>Encodes length and boost factors for docs and fields</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-Document Values}</td>
- * <td>.dvd, .dvm</td>
- * <td>Encodes additional scoring factors or other per-document information.</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Index}</td>
- * <td>.tvx</td>
- * <td>Stores offset into the document data file</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Documents}</td>
- * <td>.tvd</td>
- * <td>Contains information about each document that has term vectors</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Fields}</td>
- * <td>.tvf</td>
- * <td>The field level info about term vectors</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td>
- * <td>.liv</td>
- * <td>Info about what files are live</td>
- * </tr>
- * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}</td>
- * <td>.dii, .dim</td>
- * <td>Holds indexed points, if any</td>
- * </tr>
- * </table>
- * </div>
- * <a name="Lock_File"></a>
- * <h2>Lock File</h2>
- * The write lock, which is stored in the index directory by default, is named
- * "write.lock". If the lock directory is different from the index directory then
- * the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix
- * derived from the full path to the index directory. When this file is present, a
- * writer is currently modifying the index (adding or removing documents). This
- * lock file ensures that only one writer is modifying the index at a time.
- * <a name="History"></a>
- * <h2>History</h2>
- * <p>Compatibility notes are provided in this document, describing how file
- * formats have changed from prior versions:</p>
- * <ul>
- * <li>In version 2.1, the file format was changed to allow lock-less commits (ie,
- * no more commit lock). The change is fully backwards compatible: you can open a
- * pre-2.1 index for searching or adding/deleting of docs. When the new segments
- * file is saved (committed), it will be written in the new file format (meaning
- * no specific "upgrade" process is needed). But note that once a commit has
- * occurred, pre-2.1 Lucene will not be able to read the index.</li>
- * <li>In version 2.3, the file format was changed to allow segments to share a
- * single set of doc store (vectors &amp; stored fields) files. This allows for
- * faster indexing in certain cases. The change is fully backwards compatible (in
- * the same way as the lock-less commits change in 2.1).</li>
- * <li>In version 2.4, Strings are now written as true UTF-8 byte sequence, not
- * Java's modified UTF-8. See <a href="http://issues.apache.org/jira/browse/LUCENE-510">
- * LUCENE-510</a> for details.</li>
- * <li>In version 2.9, an optional opaque Map&lt;String,String&gt; CommitUserData
- * may be passed to IndexWriter's commit methods (and later retrieved), which is
- * recorded in the segments_N file. See <a href="http://issues.apache.org/jira/browse/LUCENE-1382">
- * LUCENE-1382</a> for details. Also,
- * diagnostics were added to each segment written recording details about why it
- * was written (due to flush, merge; which OS/JRE was used; etc.). See issue
- * <a href="http://issues.apache.org/jira/browse/LUCENE-1654">LUCENE-1654</a> for details.</li>
- * <li>In version 3.0, compressed fields are no longer written to the index (they
- * can still be read, but on merge the new segment will write them, uncompressed).
- * See issue <a href="http://issues.apache.org/jira/browse/LUCENE-1960">LUCENE-1960</a>
- * for details.</li>
- * <li>In version 3.1, segments records the code version that created them. See
- * <a href="http://issues.apache.org/jira/browse/LUCENE-2720">LUCENE-2720</a> for details.
- * Additionally segments track explicitly whether or not they have term vectors.
- * See <a href="http://issues.apache.org/jira/browse/LUCENE-2811">LUCENE-2811</a>
- * for details.</li>
- * <li>In version 3.2, numeric fields are written as natively to stored fields
- * file, previously they were stored in text format only.</li>
- * <li>In version 3.4, fields can omit position data while still indexing term
- * frequencies.</li>
- * <li>In version 4.0, the format of the inverted index became extensible via
- * the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
- * ({@code DocValues}) was introduced. Normalization factors need no longer be a
- * single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}.
- * Terms need not be unicode strings, they can be any byte sequence. Term offsets
- * can optionally be indexed into the postings lists. Payloads can be stored in the
- * term vectors.</li>
- * <li>In version 4.1, the format of the postings list changed to use either
- * of FOR compression or variable-byte encoding, depending upon the frequency
- * of the term. Terms appearing only once were changed to inline directly into
- * the term dictionary. Stored fields are compressed by default. </li>
- * <li>In version 4.2, term vectors are compressed by default. DocValues has
- * a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining
- * on multi-valued fields.</li>
- * <li>In version 4.5, DocValues were extended to explicitly represent missing values.</li>
- * <li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
- * allow updating NumericDocValues fields.</li>
- * <li>In version 4.8, checksum footers were added to the end of each index file
- * for improved data integrity. Specifically, the last 8 bytes of every index file
- * contain the zlib-crc32 checksum of the file.</li>
- * <li>In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric)
- * that is suitable for faceting/sorting/analytics.
- * <li>In version 5.4, DocValues have been improved to store more information on disk:
- * addresses for binary fields and ord indexes for multi-valued fields.
- * <li>In version 6.0, Points were added, for multi-dimensional range/distance search.
- * </li>
- * </ul>
- * <a name="Limitations"></a>
- * <h2>Limitations</h2>
- * <div>
- * <p>Lucene uses a Java <code>int</code> to refer to
- * document numbers, and the index file format uses an <code>Int32</code>
- * on-disk to store document numbers. This is a limitation
- * of both the index file format and the current implementation. Eventually these
- * should be replaced with either <code>UInt64</code> values, or
- * better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.</p>
- * </div>
+ * Components from the Lucene 6.0 index format. See {@link org.apache.lucene.codecs.lucene62}
+ * for an overview of the index format.
*/
package org.apache.lucene.codecs.lucene60;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java 2016-05-10 05:44:23.744471118 -0400
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene62;
+
+import java.util.Objects;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
+import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat;
+import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
+import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 6.2 index format, with configurable per-field postings
+ * and docvalues formats.
+ * <p>
+ * If you want to reuse functionality of this codec in another codec, extend
+ * {@link FilterCodec}.
+ *
+ * @see org.apache.lucene.codecs.lucene60 package documentation for file format details.
+ *
+ * @lucene.experimental
+ */
+public class Lucene62Codec extends Codec {
+ private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
+ private final SegmentInfoFormat segmentInfosFormat = new Lucene62SegmentInfoFormat();
+ private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
+ private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
+
+ private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return Lucene62Codec.this.getPostingsFormatForField(field);
+ }
+ };
+
+ private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return Lucene62Codec.this.getDocValuesFormatForField(field);
+ }
+ };
+
+ private final StoredFieldsFormat storedFieldsFormat;
+
+ /**
+ * Instantiates a new codec.
+ */
+ public Lucene62Codec() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /**
+ * Instantiates a new codec, specifying the stored fields compression
+ * mode to use.
+ * @param mode stored fields compression mode to use for newly
+ * flushed/merged segments.
+ */
+ public Lucene62Codec(Mode mode) {
+ super("Lucene62");
+ this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode));
+ }
+
+ @Override
+ public final StoredFieldsFormat storedFieldsFormat() {
+ return storedFieldsFormat;
+ }
+
+ @Override
+ public final TermVectorsFormat termVectorsFormat() {
+ return vectorsFormat;
+ }
+
+ @Override
+ public final PostingsFormat postingsFormat() {
+ return postingsFormat;
+ }
+
+ @Override
+ public final FieldInfosFormat fieldInfosFormat() {
+ return fieldInfosFormat;
+ }
+
+ @Override
+ public final SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfosFormat;
+ }
+
+ @Override
+ public final LiveDocsFormat liveDocsFormat() {
+ return liveDocsFormat;
+ }
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return compoundFormat;
+ }
+
+ @Override
+ public final PointsFormat pointsFormat() {
+ return new Lucene60PointsFormat();
+ }
+
+ /** Returns the postings format that should be used for writing
+ * new segments of <code>field</code>.
+ *
+ * The default implementation always returns "Lucene50".
+ * <p>
+ * <b>WARNING:</b> if you subclass, you are responsible for index
+ * backwards compatibility: future version of Lucene are only
+ * guaranteed to be able to read the default implementation.
+ */
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return defaultFormat;
+ }
+
+ /** Returns the docvalues format that should be used for writing
+ * new segments of <code>field</code>.
+ *
+ * The default implementation always returns "Lucene54".
+ * <p>
+ * <b>WARNING:</b> if you subclass, you are responsible for index
+ * backwards compatibility: future version of Lucene are only
+ * guaranteed to be able to read the default implementation.
+ */
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return defaultDVFormat;
+ }
+
+ @Override
+ public final DocValuesFormat docValuesFormat() {
+ return docValuesFormat;
+ }
+
+ private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
+ private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54");
+
+ private final NormsFormat normsFormat = new Lucene53NormsFormat();
+
+ @Override
+ public final NormsFormat normsFormat() {
+ return normsFormat;
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java 2016-05-10 05:44:23.744471118 -0400
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene62;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter; // javadocs
+import org.apache.lucene.index.SegmentInfo; // javadocs
+import org.apache.lucene.index.SegmentInfos; // javadocs
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput; // javadocs
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Version;
+
+/**
+ * Lucene 6.2 Segment info format.
+ * <p>
+ * Files:
+ * <ul>
+ * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer
+ * </ul>
+ * Data types:
+ * <ul>
+ * <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
+ * <li>SegSize --&gt; {@link DataOutput#writeInt Int32}</li>
+ * <li>SegVersion --&gt; {@link DataOutput#writeString String}</li>
+ * <li>Files --&gt; {@link DataOutput#writeSetOfStrings Set&lt;String&gt;}</li>
+ * <li>Diagnostics,Attributes --&gt; {@link DataOutput#writeMapOfStrings Map&lt;String,String&gt;}</li>
+ * <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
+ * <li>IndexSort --&gt; {@link DataOutput#writeVInt Int32} count, followed by {@code count} SortField</li>
+ * <li>SortField --&gt; {@link DataOutput#writeString String} field name, followed by {@link DataOutput#writeVInt Int32} sort type ID,
+ * followed by {@link DataOutput#writeByte Int8} indicatating reversed sort, followed by a type-specific encoding of the optional missing value
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * Field Descriptions:
+ * <ul>
+ * <li>SegVersion is the code version that created the segment.</li>
+ * <li>SegSize is the number of documents contained in the segment index.</li>
+ * <li>IsCompoundFile records whether the segment is written as a compound file or
+ * not. If this is -1, the segment is not a compound file. If it is 1, the segment
+ * is a compound file.</li>
+ * <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
+ * for each segment it creates. It includes metadata like the current Lucene
+ * version, OS, Java version, why the segment was created (merge, flush,
+ * addIndexes), etc.</li>
+ * <li>Files is a list of files referred to by this segment.</li>
+ * </ul>
+ *
+ * @see SegmentInfos
+ * @lucene.experimental
+ */
+public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
+
+ /** Sole constructor. */
+ public Lucene62SegmentInfoFormat() {
+ }
+
+ @Override
+ public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
+ try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
+ Throwable priorE = null;
+ SegmentInfo si = null;
+ try {
+ int format = CodecUtil.checkIndexHeader(input, Lucene62SegmentInfoFormat.CODEC_NAME,
+ Lucene62SegmentInfoFormat.VERSION_START,
+ Lucene62SegmentInfoFormat.VERSION_CURRENT,
+ segmentID, "");
+ final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+
+ final int docCount = input.readInt();
+ if (docCount < 0) {
+ throw new CorruptIndexException("invalid docCount: " + docCount, input);
+ }
+ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+ final Map<String,String> diagnostics = input.readMapOfStrings();
+ final Set<String> files = input.readSetOfStrings();
+ final Map<String,String> attributes = input.readMapOfStrings();
+
+ int numSortFields = input.readVInt();
+ Sort indexSort;
+ if (numSortFields > 0) {
+ SortField[] sortFields = new SortField[numSortFields];
+ for(int i=0;i<numSortFields;i++) {
+ String fieldName = input.readString();
+ int sortTypeID = input.readVInt();
+ SortField.Type sortType;
+ switch(sortTypeID) {
+ case 0:
+ sortType = SortField.Type.STRING;
+ break;
+ case 1:
+ sortType = SortField.Type.LONG;
+ break;
+ case 2:
+ sortType = SortField.Type.INT;
+ break;
+ case 3:
+ sortType = SortField.Type.DOUBLE;
+ break;
+ case 4:
+ sortType = SortField.Type.FLOAT;
+ break;
+ default:
+ throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
+ }
+ byte b = input.readByte();
+ boolean reverse;
+ if (b == 0) {
+ reverse = true;
+ } else if (b == 1) {
+ reverse = false;
+ } else {
+ throw new CorruptIndexException("invalid index sort reverse: " + b, input);
+ }
+
+ sortFields[i] = new SortField(fieldName, sortType, reverse);
+
+ Object missingValue;
+ b = input.readByte();
+ if (b == 0) {
+ missingValue = null;
+ } else {
+ switch(sortType) {
+ case STRING:
+ if (b == 1) {
+ missingValue = SortField.STRING_LAST;
+ } else if (b == 2) {
+ missingValue = SortField.STRING_FIRST;
+ } else {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ break;
+ case LONG:
+ if (b != 1) {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ missingValue = input.readLong();
+ break;
+ case INT:
+ if (b != 1) {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ missingValue = input.readInt();
+ break;
+ case DOUBLE:
+ if (b != 1) {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ missingValue = Double.longBitsToDouble(input.readLong());
+ break;
+ case FLOAT:
+ if (b != 1) {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ missingValue = Float.intBitsToFloat(input.readInt());
+ break;
+ default:
+ throw new AssertionError("unhandled sortType=" + sortType);
+ }
+ }
+ if (missingValue != null) {
+ sortFields[i].setMissingValue(missingValue);
+ }
+ }
+ indexSort = new Sort(sortFields);
+ } else if (numSortFields < 0) {
+ throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
+ } else {
+ indexSort = null;
+ }
+
+ si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
+ si.setFiles(files);
+ } catch (Throwable exception) {
+ priorE = exception;
+ } finally {
+ CodecUtil.checkFooter(input, priorE);
+ }
+ return si;
+ }
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
+
+ try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
+ // Only add the file once we've successfully created it, else IFD assert can trip:
+ si.addFile(fileName);
+ CodecUtil.writeIndexHeader(output,
+ Lucene62SegmentInfoFormat.CODEC_NAME,
+ Lucene62SegmentInfoFormat.VERSION_CURRENT,
+ si.getId(),
+ "");
+ Version version = si.getVersion();
+ if (version.major < 5) {
+ throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
+ }
+ // Write the Lucene version that created this segment, since 3.1
+ output.writeInt(version.major);
+ output.writeInt(version.minor);
+ output.writeInt(version.bugfix);
+ assert version.prerelease == 0;
+ output.writeInt(si.maxDoc());
+
+ output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
+ output.writeMapOfStrings(si.getDiagnostics());
+ Set<String> files = si.files();
+ for (String file : files) {
+ if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
+ throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
+ }
+ }
+ output.writeSetOfStrings(files);
+ output.writeMapOfStrings(si.getAttributes());
+
+ Sort indexSort = si.getIndexSort();
+ int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
+ output.writeVInt(numSortFields);
+ for (int i = 0; i < numSortFields; ++i) {
+ SortField sortField = indexSort.getSort()[i];
+ output.writeString(sortField.getField());
+ int sortTypeID;
+ switch (sortField.getType()) {
+ case STRING:
+ sortTypeID = 0;
+ break;
+ case LONG:
+ sortTypeID = 1;
+ break;
+ case INT:
+ sortTypeID = 2;
+ break;
+ case DOUBLE:
+ sortTypeID = 3;
+ break;
+ case FLOAT:
+ sortTypeID = 4;
+ break;
+ default:
+ throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
+ }
+ output.writeVInt(sortTypeID);
+ output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
+
+ // write missing value
+ Object missingValue = sortField.getMissingValue();
+ if (missingValue == null) {
+ output.writeByte((byte) 0);
+ } else {
+ switch(sortField.getType()) {
+ case STRING:
+ if (missingValue == SortField.STRING_LAST) {
+ output.writeByte((byte) 1);
+ } else if (missingValue == SortField.STRING_FIRST) {
+ output.writeByte((byte) 2);
+ } else {
+ throw new AssertionError("unrecognized missing value for STRING field \"" + sortField.getField() + "\": " + missingValue);
+ }
+ break;
+ case LONG:
+ output.writeByte((byte) 1);
+ output.writeLong(((Long) missingValue).longValue());
+ break;
+ case INT:
+ output.writeByte((byte) 1);
+ output.writeInt(((Integer) missingValue).intValue());
+ break;
+ case DOUBLE:
+ output.writeByte((byte) 1);
+ output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
+ break;
+ case FLOAT:
+ output.writeByte((byte) 1);
+ output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue()));
+ break;
+ default:
+ throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
+ }
+ }
+ }
+
+ CodecUtil.writeFooter(output);
+ }
+ }
+
+ /** File extension used to store {@link SegmentInfo}. */
+ public final static String SI_EXTENSION = "si";
+ static final String CODEC_NAME = "Lucene62SegmentInfo";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java 2016-05-10 05:44:23.744471118 -0400
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Components from the Lucene 6.2 index format
+ * See {@link org.apache.lucene.codecs.lucene62} for an overview
+ * of the index format.
+ */
+
+package org.apache.lucene.codecs.lucene62;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java 2016-02-16 11:18:34.653021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java 2016-05-10 05:44:23.744471118 -0400
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
@@ -24,6 +23,7 @@
import java.util.List;
import java.util.NoSuchElementException;
+import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
@@ -31,6 +31,8 @@
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.Bits;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Abstract API that consumes normalization values.
* Concrete implementations of this
@@ -98,6 +100,30 @@
}
}
+ /** Tracks state of one numeric sub-reader that we are merging */
+ private static class NumericDocValuesSub extends DocIDMerger.Sub {
+
+ private final NumericDocValues values;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the norms from <code>toMerge</code>.
* <p>
@@ -111,13 +137,18 @@
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List<NumericDocValuesSub> subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i<toMerge.size();i++) {
+ subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator<Number>() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
- int maxDoc;
- NumericDocValues currentValues;
- Bits currentLiveDocs;
boolean nextIsSet;
@Override
@@ -141,31 +172,13 @@
}
private boolean setNext() {
- while (true) {
- if (readerUpto == toMerge.size()) {
- return false;
- }
-
- if (currentValues == null || docIDUpto == maxDoc) {
- readerUpto++;
- if (readerUpto < toMerge.size()) {
- currentValues = toMerge.get(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- maxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- nextValue = currentValues.get(docIDUpto);
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ NumericDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
}
+ nextIsSet = true;
+ nextValue = sub.values.get(sub.docID);
+ return true;
}
};
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java 2016-04-24 06:00:46.365895938 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java 2016-05-10 05:44:23.744471118 -0400
@@ -76,7 +76,6 @@
}
MergeState.DocMap docMap = mergeState.docMaps[i];
- int docBase = mergeState.docBase[i];
pointsReader.intersect(fieldInfo.name,
new IntersectVisitor() {
@Override
@@ -90,7 +89,7 @@
int newDocID = docMap.get(docID);
if (newDocID != -1) {
// Not deleted:
- mergedVisitor.visit(docBase + newDocID, packedValue);
+ mergedVisitor.visit(newDocID, packedValue);
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java 2016-02-16 11:18:34.653021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java 2016-05-10 05:44:23.744471118 -0400
@@ -20,10 +20,13 @@
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
@@ -33,6 +36,8 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Codec API for writing stored fields:
* <ol>
@@ -73,6 +78,30 @@
* check that this is the case to detect the JRE bug described
* in LUCENE-1282. */
public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
+
+ private static class StoredFieldsMergeSub extends DocIDMerger.Sub {
+ private final StoredFieldsReader reader;
+ private final int maxDoc;
+ private final MergeVisitor visitor;
+ int docID = -1;
+
+ public StoredFieldsMergeSub(MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.reader = reader;
+ this.visitor = visitor;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
/** Merges in the stored fields from the readers in
* <code>mergeState</code>. The default implementation skips
@@ -82,23 +111,26 @@
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
- int docCount = 0;
- for (int i=0;i<mergeState.storedFieldsReaders.length;i++) {
+ List<StoredFieldsMergeSub> subs = new ArrayList<>();
+ for(int i=0;i<mergeState.storedFieldsReaders.length;i++) {
StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i];
storedFieldsReader.checkIntegrity();
- MergeVisitor visitor = new MergeVisitor(mergeState, i);
- int maxDoc = mergeState.maxDocs[i];
- Bits liveDocs = mergeState.liveDocs[i];
- for (int docID=0;docID<maxDoc;docID++) {
- if (liveDocs != null && !liveDocs.get(docID)) {
- // skip deleted docs
- continue;
- }
- startDocument();
- storedFieldsReader.visitDocument(docID, visitor);
- finishDocument();
- docCount++;
+ subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
+ int docCount = 0;
+ while (true) {
+ StoredFieldsMergeSub sub = docIDMerger.next();
+ if (sub == null) {
+ break;
}
+ assert sub.mappedDocID == docCount;
+ startDocument();
+ sub.reader.visitDocument(sub.docID, sub.visitor);
+ finishDocument();
+ docCount++;
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java indexsort/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java 2016-02-16 11:18:34.653021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java 2016-05-10 05:44:23.744471118 -0400
@@ -16,16 +16,18 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.List;
-import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
@@ -34,6 +36,8 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Codec API for writing term vectors:
* <ol>
@@ -160,6 +164,28 @@
}
}
+ private static class TermVectorsMergeSub extends DocIDMerger.Sub {
+ private final TermVectorsReader reader;
+ private final int maxDoc;
+ int docID = -1;
+
+ public TermVectorsMergeSub(MergeState.DocMap docMap, TermVectorsReader reader, int maxDoc) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.reader = reader;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/** Merges in the term vectors from the readers in
* <code>mergeState</code>. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)},
@@ -170,32 +196,35 @@
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
+
+ List<TermVectorsMergeSub> subs = new ArrayList<>();
+ for(int i=0;i<mergeState.termVectorsReaders.length;i++) {
+ TermVectorsReader reader = mergeState.termVectorsReaders[i];
+ if (reader != null) {
+ reader.checkIntegrity();
+ }
+ subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
+ }
+
+ final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
int docCount = 0;
- int numReaders = mergeState.maxDocs.length;
- for (int i = 0; i < numReaders; i++) {
- int maxDoc = mergeState.maxDocs[i];
- Bits liveDocs = mergeState.liveDocs[i];
- TermVectorsReader termVectorsReader = mergeState.termVectorsReaders[i];
- if (termVectorsReader != null) {
- termVectorsReader.checkIntegrity();
+ while (true) {
+ TermVectorsMergeSub sub = docIDMerger.next();
+ if (sub == null) {
+ break;
}
- for (int docID=0;docID<maxDoc;docID++) {
- if (liveDocs != null && !liveDocs.get(docID)) {
- // skip deleted docs
- continue;
- }
- // NOTE: it's very important to first assign to vectors then pass it to
- // termVectorsWriter.addAllDocVectors; see LUCENE-1282
- Fields vectors;
- if (termVectorsReader == null) {
- vectors = null;
- } else {
- vectors = termVectorsReader.get(docID);
- }
- addAllDocVectors(vectors, mergeState);
- docCount++;
+ // NOTE: it's very important to first assign to vectors then pass it to
+ // termVectorsWriter.addAllDocVectors; see LUCENE-1282
+ Fields vectors;
+ if (sub.reader == null) {
+ vectors = null;
+ } else {
+ vectors = sub.reader.get(sub.docID);
}
+ addAllDocVectors(vectors, mergeState);
+ docCount++;
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java indexsort/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java 2016-03-10 16:23:24.691676109 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java 2016-05-10 05:44:23.744471118 -0400
@@ -43,6 +43,9 @@
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LeafFieldComparator;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -217,6 +220,9 @@
/** Status for testing of PointValues (null if PointValues could not be tested). */
public PointsStatus pointsStatus;
+
+ /** Status of index sort */
+ public IndexSortStatus indexSortStatus;
}
/**
@@ -374,6 +380,16 @@
/** Exception thrown during doc values test (null on success) */
public Throwable error = null;
}
+
+ /**
+ * Status from testing index sort
+ */
+ public static final class IndexSortStatus {
+
+ /** Exception thrown during term index test (null on success) */
+ public Throwable error = null;
+ }
+
}
/** Create a new CheckIndex on the directory. */
@@ -632,6 +648,7 @@
int toLoseDocCount = info.info.maxDoc();
SegmentReader reader = null;
+ Sort previousIndexSort = null;
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
@@ -642,6 +659,17 @@
msg(infoStream, " compound=" + info.info.getUseCompoundFile());
segInfoStat.compound = info.info.getUseCompoundFile();
msg(infoStream, " numFiles=" + info.files().size());
+ Sort indexSort = info.info.getIndexSort();
+ if (indexSort != null) {
+ msg(infoStream, " sort=" + indexSort);
+ if (previousIndexSort != null) {
+ if (previousIndexSort.equals(indexSort) == false) {
+ throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
+ }
+ } else {
+ previousIndexSort = indexSort;
+ }
+ }
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB));
@@ -722,6 +750,9 @@
// Test PointValues
segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast);
+ // Test index sort
+ segInfoStat.indexSortStatus = testSort(reader, indexSort, infoStream, failFast);
+
// Rethrow the first exception we encountered
// This will cause stats for failed segments to be incremented properly
if (segInfoStat.liveDocStatus.error != null) {
@@ -790,6 +821,68 @@
return result;
}
+
+ public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException {
+ // This segment claims its documents are sorted according to the incoming sort ... let's make sure:
+
+ long startNS = System.nanoTime();
+
+ Status.IndexSortStatus status = new Status.IndexSortStatus();
+
+ if (sort != null) {
+ if (infoStream != null) {
+ infoStream.print(" test: check index sort.....");
+ }
+
+ SortField fields[] = sort.getSort();
+ final int reverseMul[] = new int[fields.length];
+ final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length];
+
+ LeafReaderContext readerContext = new LeafReaderContext(reader);
+
+ for (int i = 0; i < fields.length; i++) {
+ reverseMul[i] = fields[i].getReverse() ? -1 : 1;
+ comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext);
+ }
+
+ int maxDoc = reader.maxDoc();
+
+ try {
+
+ for(int docID=1;docID < maxDoc;docID++) {
+
+ int cmp = 0;
+
+ for (int i = 0; i < comparators.length; i++) {
+ // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+ // the segments are always the same here...
+ comparators[i].copy(0, docID-1);
+ comparators[i].setBottom(0);
+ cmp = reverseMul[i] * comparators[i].compareBottom(docID);
+ if (cmp != 0) {
+ break;
+ }
+ }
+
+ if (cmp > 0) {
+ throw new RuntimeException("segment has indexSort=" + sort + " but docID=" + (docID-1) + " sorts after docID=" + docID);
+ }
+ }
+ msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime()-startNS)));
+ } catch (Throwable e) {
+ if (failFast) {
+ IOUtils.reThrow(e);
+ }
+ msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+ }
+
+ return status;
+ }
/**
* Test live docs.
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java indexsort/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java 2016-05-10 05:44:23.744471118 -0400
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSetIterator; // javadocs
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.PriorityQueue;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/** Utility class to help merging documents from sub-readers according to either simple
+ * concatenated (unsorted) order, or by a specified index-time sort, skipping
+ * deleted documents and remapping non-deleted documents. */
+
+public class DocIDMerger<T extends DocIDMerger.Sub> {
+
+ private final List<T> subs;
+
+ // Used when indexSort != null:
+ private final PriorityQueue<T> queue;
+ private boolean first;
+
+ // Used when indexIsSorted
+ private T current;
+ private int nextIndex;
+
+ public static abstract class Sub {
+ public int mappedDocID;
+ final MergeState.DocMap docMap;
+
+ public Sub(MergeState.DocMap docMap) {
+ this.docMap = docMap;
+ }
+
+ /** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */
+ public abstract int nextDoc();
+ }
+
+ public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
+ this.subs = subs;
+
+ if (indexIsSorted) {
+ queue = new PriorityQueue<T>(maxCount) {
+ @Override
+ protected boolean lessThan(Sub a, Sub b) {
+ assert a.mappedDocID != b.mappedDocID;
+ return a.mappedDocID < b.mappedDocID;
+ }
+ };
+ } else {
+ // We simply concatentate
+ queue = null;
+ }
+
+ reset();
+ }
+
+ public DocIDMerger(List<T> subs, boolean indexIsSorted) {
+ this(subs, subs.size(), indexIsSorted);
+ }
+
+ /** Reuse API, currently only used by postings during merge */
+ public void reset() {
+ if (queue != null) {
+ // caller may not have fully consumed the queue:
+ queue.clear();
+ for(T sub : subs) {
+ while (true) {
+ int docID = sub.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ // all docs in this sub were deleted; do not add it to the queue!
+ break;
+ }
+
+ int mappedDocID = sub.docMap.get(docID);
+ if (mappedDocID == -1) {
+ // doc was deleted
+ continue;
+ } else {
+ sub.mappedDocID = mappedDocID;
+ queue.add(sub);
+ break;
+ }
+ }
+ }
+ first = true;
+ } else if (subs.size() > 0) {
+ current = subs.get(0);
+ nextIndex = 1;
+ } else {
+ current = null;
+ nextIndex = 0;
+ }
+ }
+
+ /** Returns null when done */
+ public T next() {
+ // Loop until we find a non-deleted document
+ if (queue != null) {
+ T top = queue.top();
+ if (top == null) {
+ // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
+ return null;
+ }
+
+ if (first == false) {
+ while (true) {
+ int docID = top.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ queue.pop();
+ top = queue.top();
+ break;
+ }
+ int mappedDocID = top.docMap.get(docID);
+ if (mappedDocID == -1) {
+ // doc was deleted
+ continue;
+ } else {
+ top.mappedDocID = mappedDocID;
+ top = queue.updateTop();
+ break;
+ }
+ }
+ }
+
+ first = false;
+
+ return top;
+
+ } else {
+ while (true) {
+ if (current == null) {
+ // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
+ return null;
+ }
+ int docID = current.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ if (nextIndex == subs.size()) {
+ current = null;
+ return null;
+ }
+ current = subs.get(nextIndex);
+ nextIndex++;
+ continue;
+ }
+ int mappedDocID = current.docMap.get(docID);
+ if (mappedDocID == -1) {
+ // doc is deleted
+ continue;
+ }
+
+ current.mappedDocID = mappedDocID;
+ return current;
+ }
+ }
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java indexsort/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java 2016-03-02 04:32:40.435807336 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java 2016-05-10 05:44:23.744471118 -0400
@@ -178,7 +178,7 @@
pendingUpdates.clear();
deleteSlice = deleteQueue.newSlice();
- segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
assert numDocsInRAM == 0;
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/DocValues.java indexsort/lucene/core/src/java/org/apache/lucene/index/DocValues.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/DocValues.java 2016-02-16 11:18:34.661021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/DocValues.java 2016-05-10 05:44:23.744471118 -0400
@@ -210,7 +210,7 @@
(expected.length == 1
? "(expected=" + expected[0]
: "(expected one of " + Arrays.toString(expected)) + "). " +
- "Use UninvertingReader or index with docvalues.");
+ "Re-index with correct docvalues type.");
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java 2016-05-10 05:44:23.744471118 -0400
@@ -25,6 +25,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -102,6 +103,11 @@
}
@Override
+ public Sort getIndexSort() {
+ return in.getIndexSort();
+ }
+
+ @Override
public void addCoreClosedListener(CoreClosedListener listener) {
in.addCoreClosedListener(listener);
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java 2016-04-24 06:00:46.365895938 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java 2016-05-10 05:44:23.744471118 -0400
@@ -22,6 +22,7 @@
import java.util.Objects;
import org.apache.lucene.search.QueryCache;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -472,6 +473,12 @@
}
@Override
+ public Sort getIndexSort() {
+ ensureOpen();
+ return in.getIndexSort();
+ }
+
+ @Override
public void checkIntegrity() throws IOException {
ensureOpen();
in.checkIntegrity();
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java 2016-04-24 06:00:46.365895938 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java 2016-05-10 05:44:23.748471119 -0400
@@ -18,16 +18,19 @@
import java.io.PrintStream;
+import java.util.EnumSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.PrintStreamInfoStream;
-import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.SetOnce.AlreadySetException;
+import org.apache.lucene.util.SetOnce;
/**
* Holds all the configuration that is used to create an {@link IndexWriter}.
@@ -439,6 +442,26 @@
return this;
}
+ /** We only allow sorting on these types */
+ private static final EnumSet<SortField.Type> ALLOWED_INDEX_SORT_TYPES = EnumSet.of(SortField.Type.STRING,
+ SortField.Type.LONG,
+ SortField.Type.INT,
+ SortField.Type.DOUBLE,
+ SortField.Type.FLOAT);
+
+ /**
+ * Set the {@link Sort} order to use when merging segments. Note that newly flushed segments will remain unsorted.
+ */
+ public IndexWriterConfig setIndexSort(Sort sort) {
+ for(SortField sortField : sort.getSort()) {
+ if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) {
+ throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField);
+ }
+ }
+ this.indexSort = sort;
+ return this;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder(super.toString());
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java 2016-04-24 06:00:46.365895938 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java 2016-05-10 05:44:23.748471119 -0400
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.index;
-
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -32,8 +31,8 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
-import java.util.Map;
import java.util.Map.Entry;
+import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@@ -49,6 +48,7 @@
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -937,6 +937,8 @@
// NOTE: this is correct even for an NRT reader because we'll pull FieldInfos even for the un-committed segments:
globalFieldNumberMap = getFieldNumberMap();
+ validateIndexSort();
+
config.getFlushPolicy().init(config);
docWriter = new DocumentsWriter(this, config, directoryOrig, directory);
eventQueue = docWriter.eventQueue();
@@ -1000,6 +1002,20 @@
}
}
+ /** Confirms that the incoming index sort (if any) matches the existing index sort (if any). This is unfortunately just best effort,
+ * because it could be the old index only has flushed segments. */
+ private void validateIndexSort() {
+ Sort indexSort = config.getIndexSort();
+ if (indexSort != null) {
+ for(SegmentCommitInfo info : segmentInfos) {
+ Sort segmentIndexSort = info.info.getIndexSort();
+ if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
+ throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort);
+ }
+ }
+ }
+ }
+
// reads latest field infos for the commit
// this is used on IW init and addIndexes(Dir) to create/update the global field map.
// TODO: fix tests abusing this method!
@@ -2472,7 +2488,8 @@
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @throws IllegalArgumentException if addIndexes would cause
- * the index to exceed {@link #MAX_DOCS}
+ * the index to exceed {@link #MAX_DOCS}, or if the indoming
+ * index sort does not match this index's index sort
*/
public void addIndexes(Directory... dirs) throws IOException {
ensureOpen();
@@ -2481,6 +2498,8 @@
List<Lock> locks = acquireWriteLocks(dirs);
+ Sort indexSort = config.getIndexSort();
+
boolean successTop = false;
try {
@@ -2513,6 +2532,13 @@
for (SegmentCommitInfo info : sis) {
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
+ Sort segmentIndexSort = info.info.getIndexSort();
+
+ if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
+ // TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK
+ throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort);
+ }
+
String newSegName = newSegmentName();
if (infoStream.isEnabled("IW")) {
@@ -2609,6 +2635,8 @@
// long so we can detect int overflow:
long numDocs = 0;
+ Sort indexSort = config.getIndexSort();
+
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(CodecReader...)");
@@ -2618,6 +2646,10 @@
String mergedName = newSegmentName();
for (CodecReader leaf : readers) {
numDocs += leaf.numDocs();
+ Sort leafIndexSort = leaf.getIndexSort();
+ if (indexSort != null && leafIndexSort != null && indexSort.equals(leafIndexSort) == false) {
+ throw new IllegalArgumentException("cannot change index sort from " + leafIndexSort + " to " + indexSort);
+ }
}
// Best-effort up front check:
@@ -2630,7 +2662,7 @@
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, mergedName, -1,
- false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir,
globalFieldNumberMap,
@@ -2715,7 +2747,7 @@
// Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directoryOrig, info.info.getVersion(), segName, info.info.maxDoc(),
info.info.getUseCompoundFile(), info.info.getCodec(),
- info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes());
+ info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes(), info.info.getIndexSort());
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getDelGen(),
info.getFieldInfosGen(), info.getDocValuesGen());
@@ -3243,16 +3275,13 @@
private static class MergedDeletesAndUpdates {
ReadersAndUpdates mergedDeletesAndUpdates = null;
- MergePolicy.DocMap docMap = null;
boolean initializedWritableLiveDocs = false;
MergedDeletesAndUpdates() {}
- final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, boolean initWritableLiveDocs) throws IOException {
+ final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, boolean initWritableLiveDocs) throws IOException {
if (mergedDeletesAndUpdates == null) {
mergedDeletesAndUpdates = readerPool.get(merge.info, true);
- docMap = merge.getDocMap(mergeState);
- assert docMap.isConsistent(merge.info.info.maxDoc());
}
if (initWritableLiveDocs && !initializedWritableLiveDocs) {
mergedDeletesAndUpdates.initWritableLiveDocs();
@@ -3262,18 +3291,18 @@
}
- private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto,
+ private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState,
MergedDeletesAndUpdates holder, String[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates,
- DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) throws IOException {
+ DocValuesFieldUpdates.Iterator[] updatesIters, int segment, int curDoc) throws IOException {
int newDoc = -1;
for (int idx = 0; idx < mergingFields.length; idx++) {
DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx];
if (updatesIter.doc() == curDoc) { // document has an update
if (holder.mergedDeletesAndUpdates == null) {
- holder.init(readerPool, merge, mergeState, false);
+ holder.init(readerPool, merge, false);
}
if (newDoc == -1) { // map once per all field updates, but only if there are any updates
- newDoc = holder.docMap.map(docUpto);
+ newDoc = mergeState.docMaps[segment].get(curDoc);
}
DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx];
dvUpdates.add(newDoc, updatesIter.value());
@@ -3306,13 +3335,13 @@
// Carefully merge deletes that occurred after we
// started merging:
- int docUpto = 0;
long minGen = Long.MAX_VALUE;
// Lazy init (only when we find a delete to carry over):
final MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates();
final DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container();
-
+
+ assert sourceSegments.size() == mergeState.docMaps.length;
for (int i = 0; i < sourceSegments.size(); i++) {
SegmentCommitInfo info = sourceSegments.get(i);
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
@@ -3375,21 +3404,20 @@
// since we started the merge, so we
// must merge them:
for (int j = 0; j < maxDoc; j++) {
- if (!prevLiveDocs.get(j)) {
- assert !currentLiveDocs.get(j);
- } else {
- if (!currentLiveDocs.get(j)) {
- if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) {
- holder.init(readerPool, merge, mergeState, true);
- }
- holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto));
- if (mergingFields != null) { // advance all iters beyond the deleted document
- skipDeletedDoc(updatesIters, j);
- }
- } else if (mergingFields != null) {
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
+ if (prevLiveDocs.get(j) == false) {
+ // if the document was deleted before, it better still be deleted!
+ assert currentLiveDocs.get(j) == false;
+ } else if (currentLiveDocs.get(j) == false) {
+ // the document was deleted while we were merging:
+ if (holder.mergedDeletesAndUpdates == null || holder.initializedWritableLiveDocs == false) {
+ holder.init(readerPool, merge, true);
+ }
+ holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j)));
+ if (mergingFields != null) { // advance all iters beyond the deleted document
+ skipDeletedDoc(updatesIters, j);
}
- docUpto++;
+ } else if (mergingFields != null) {
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
}
}
} else if (mergingFields != null) {
@@ -3397,50 +3425,38 @@
for (int j = 0; j < maxDoc; j++) {
if (prevLiveDocs.get(j)) {
// document isn't deleted, check if any of the fields have an update to it
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
- // advance docUpto for every non-deleted document
- docUpto++;
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
} else {
// advance all iters beyond the deleted document
skipDeletedDoc(updatesIters, j);
}
}
- } else {
- docUpto += info.info.maxDoc() - info.getDelCount() - rld.getPendingDeleteCount();
}
} else if (currentLiveDocs != null) {
assert currentLiveDocs.length() == maxDoc;
// This segment had no deletes before but now it
// does:
for (int j = 0; j < maxDoc; j++) {
- if (!currentLiveDocs.get(j)) {
+ if (currentLiveDocs.get(j) == false) {
if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) {
- holder.init(readerPool, merge, mergeState, true);
+ holder.init(readerPool, merge, true);
}
- holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto));
+ holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j)));
if (mergingFields != null) { // advance all iters beyond the deleted document
skipDeletedDoc(updatesIters, j);
}
} else if (mergingFields != null) {
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
}
- docUpto++;
}
} else if (mergingFields != null) {
// no deletions before or after, but there were updates
for (int j = 0; j < maxDoc; j++) {
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
- // advance docUpto for every non-deleted document
- docUpto++;
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
}
- } else {
- // No deletes or updates before or after
- docUpto += info.info.maxDoc();
}
}
- assert docUpto == merge.info.info.maxDoc();
-
if (mergedDVUpdates.any()) {
// System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates);
boolean success = false;
@@ -3881,7 +3897,7 @@
// ConcurrentMergePolicy we keep deterministic segment
// names.
final String mergeSegmentName = newSegmentName();
- SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
Map<String,String> details = new HashMap<>();
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -4082,10 +4098,13 @@
}
// System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders());
-
- // we pass merge.getMergeReaders() instead of merge.readers to allow the
- // OneMerge to return a view over the actual segments to merge
- final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
+
+ // Let the merge wrap readers
+ List<CodecReader> mergeReaders = new ArrayList<>();
+ for (SegmentReader reader : merge.readers) {
+ mergeReaders.add(merge.wrapForMerge(reader));
+ }
+ final SegmentMerger merger = new SegmentMerger(mergeReaders,
merge.info.info, infoStream, dirWrapper,
globalFieldNumberMap,
context);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/LeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/LeafReader.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/LeafReader.java 2016-05-10 05:44:23.748471119 -0400
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
@@ -312,4 +313,7 @@
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
+
+ /** Returns null if this leaf is unsorted, or the {@link Sort} that it was sorted by */
+ public abstract Sort getIndexSort();
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java indexsort/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java 2016-02-16 11:18:34.665021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java 2016-05-10 05:44:23.748471119 -0400
@@ -23,6 +23,7 @@
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
@@ -94,6 +95,9 @@
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;
+ /** The sort order to use to write merged segments. */
+ protected Sort indexSort = null;
+
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer) {
this.analyzer = analyzer;
@@ -445,6 +449,14 @@
return commitOnClose;
}
+ /**
+ * Set the index-time {@link Sort} order. Merged segments will be written
+ * in this order.
+ */
+ public Sort getIndexSort() {
+ return indexSort;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -467,6 +479,7 @@
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
sb.append("commitOnClose=").append(getCommitOnClose()).append("\n");
+ sb.append("indexSort=").append(getIndexSort()).append("\n");
return sb.toString();
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java indexsort/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java 2016-04-28 20:11:21.846721717 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java 2016-05-10 05:44:23.748471119 -0400
@@ -18,8 +18,11 @@
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.index.MultiPostingsEnum.EnumWithSlice;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/**
@@ -30,52 +33,66 @@
*/
final class MappingMultiPostingsEnum extends PostingsEnum {
- private MultiPostingsEnum.EnumWithSlice[] subs;
- int numSubs;
- int upto;
- MergeState.DocMap currentMap;
- PostingsEnum current;
- int currentBase;
- int doc = -1;
- private MergeState mergeState;
MultiPostingsEnum multiDocsAndPositionsEnum;
final String field;
+ final DocIDMerger<MappingPostingsSub> docIDMerger;
+ private MappingPostingsSub current;
+ private final MappingPostingsSub[] allSubs;
+ private final List<MappingPostingsSub> subs = new ArrayList<>();
+
+ private static class MappingPostingsSub extends DocIDMerger.Sub {
+ public PostingsEnum postings;
+
+ public MappingPostingsSub(MergeState.DocMap docMap) {
+ super(docMap);
+ }
+
+ @Override
+ public int nextDoc() {
+ try {
+ return postings.nextDoc();
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+ }
/** Sole constructor. */
- public MappingMultiPostingsEnum(String field, MergeState mergeState) {
+ public MappingMultiPostingsEnum(String field, MergeState mergeState) throws IOException {
this.field = field;
- this.mergeState = mergeState;
+ allSubs = new MappingPostingsSub[mergeState.fieldsProducers.length];
+ for(int i=0;i<allSubs.length;i++) {
+ allSubs[i] = new MappingPostingsSub(mergeState.docMaps[i]);
+ }
+ this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.segmentInfo.getIndexSort() != null);
}
- MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) {
- this.numSubs = postingsEnum.getNumSubs();
- this.subs = postingsEnum.getSubs();
- upto = -1;
- doc = -1;
- current = null;
+ MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException {
this.multiDocsAndPositionsEnum = postingsEnum;
+ MultiPostingsEnum.EnumWithSlice[] subsArray = postingsEnum.getSubs();
+ int count = postingsEnum.getNumSubs();
+ subs.clear();
+ for(int i=0;i<count;i++) {
+ MappingPostingsSub sub = allSubs[subsArray[i].slice.readerIndex];
+ sub.postings = subsArray[i].postingsEnum;
+ subs.add(sub);
+ }
+ docIDMerger.reset();
return this;
}
- /** How many sub-readers we are merging.
- * @see #getSubs */
- public int getNumSubs() {
- return numSubs;
- }
-
- /** Returns sub-readers we are merging. */
- public EnumWithSlice[] getSubs() {
- return subs;
- }
-
@Override
public int freq() throws IOException {
- return current.freq();
+ return current.postings.freq();
}
@Override
public int docID() {
- return doc;
+ if (current == null) {
+ return -1;
+ } else {
+ return current.mappedDocID;
+ }
}
@Override
@@ -85,66 +102,47 @@
@Override
public int nextDoc() throws IOException {
- while(true) {
- if (current == null) {
- if (upto == numSubs-1) {
- return this.doc = NO_MORE_DOCS;
- } else {
- upto++;
- final int reader = subs[upto].slice.readerIndex;
- current = subs[upto].postingsEnum;
- currentBase = mergeState.docBase[reader];
- currentMap = mergeState.docMaps[reader];
- }
- }
-
- int doc = current.nextDoc();
- if (doc != NO_MORE_DOCS) {
- // compact deletions
- doc = currentMap.get(doc);
- if (doc == -1) {
- continue;
- }
- return this.doc = currentBase + doc;
- } else {
- current = null;
- }
+ current = docIDMerger.next();
+ if (current == null) {
+ return NO_MORE_DOCS;
+ } else {
+ return current.mappedDocID;
}
}
@Override
public int nextPosition() throws IOException {
- int pos = current.nextPosition();
+ int pos = current.postings.nextPosition();
if (pos < 0) {
- throw new CorruptIndexException("position=" + pos + " is negative, field=\"" + field + " doc=" + doc,
- mergeState.fieldsProducers[upto].toString());
+ throw new CorruptIndexException("position=" + pos + " is negative, field=\"" + field + " doc=" + current.mappedDocID,
+ current.postings.toString());
} else if (pos > IndexWriter.MAX_POSITION) {
- throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + doc,
- mergeState.fieldsProducers[upto].toString());
+ throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + current.mappedDocID,
+ current.postings.toString());
}
return pos;
}
@Override
public int startOffset() throws IOException {
- return current.startOffset();
+ return current.postings.startOffset();
}
@Override
public int endOffset() throws IOException {
- return current.endOffset();
+ return current.postings.endOffset();
}
@Override
public BytesRef getPayload() throws IOException {
- return current.getPayload();
+ return current.postings.getPayload();
}
@Override
public long cost() {
long cost = 0;
- for (EnumWithSlice enumWithSlice : subs) {
- cost += enumWithSlice.postingsEnum.cost();
+ for (MappingPostingsSub sub : subs) {
+ cost += sub.postings.cost();
}
return cost;
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java indexsort/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java 2016-02-16 11:18:34.665021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java 2016-05-10 05:44:23.748471119 -0400
@@ -58,31 +58,6 @@
*/
public abstract class MergePolicy {
- /** A map of doc IDs. */
- public static abstract class DocMap {
- /** Sole constructor, typically invoked from sub-classes constructors. */
- protected DocMap() {}
-
- /** Return the new doc ID according to its old value. */
- public abstract int map(int old);
-
- /** Useful from an assert. */
- boolean isConsistent(int maxDoc) {
- final FixedBitSet targets = new FixedBitSet(maxDoc);
- for (int i = 0; i < maxDoc; ++i) {
- final int target = map(i);
- if (target < 0 || target >= maxDoc) {
- assert false : "out of range: " + target + " not in [0-" + maxDoc + "[";
- return false;
- } else if (targets.get(target)) {
- assert false : target + " is already taken (" + i + ")";
- return false;
- }
- }
- return true;
- }
- }
-
/** OneMerge provides the information necessary to perform
* an individual primitive merge operation, resulting in
* a single new segment. The merge spec includes the
@@ -140,25 +115,11 @@
public void mergeFinished() throws IOException {
}
- /** Expert: Get the list of readers to merge. Note that this list does not
- * necessarily match the list of segments to merge and should only be used
- * to feed SegmentMerger to initialize a merge. When a {@link OneMerge}
- * reorders doc IDs, it must override {@link #getDocMap} too so that
- * deletes that happened during the merge can be applied to the newly
- * merged segment. */
- public List<CodecReader> getMergeReaders() throws IOException {
- if (readers == null) {
- throw new IllegalStateException("IndexWriter has not initialized readers from the segment infos yet");
- }
- final List<CodecReader> readers = new ArrayList<>(this.readers.size());
- for (SegmentReader reader : this.readers) {
- if (reader.numDocs() > 0) {
- readers.add(reader);
- }
- }
- return Collections.unmodifiableList(readers);
+ /** Wrap the reader in order to add/remove information to the merged segment. */
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+ return reader;
}
-
+
/**
* Expert: Sets the {@link SegmentCommitInfo} of the merged segment.
* Allows sub-classes to e.g. set diagnostics properties.
@@ -175,20 +136,6 @@
return info;
}
- /** Expert: If {@link #getMergeReaders()} reorders document IDs, this method
- * must be overridden to return a mapping from the <i>natural</i> doc ID
- * (the doc ID that would result from a natural merge) to the actual doc
- * ID. This mapping is used to apply deletions that happened during the
- * merge to the new segment. */
- public DocMap getDocMap(MergeState mergeState) {
- return new DocMap() {
- @Override
- public int map(int docID) {
- return docID;
- }
- };
- }
-
/** Record that an exception occurred while executing
* this merge */
synchronized void setException(Throwable error) {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java indexsort/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java 2016-05-10 05:44:23.748471119 -0400
@@ -0,0 +1,266 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.util.Bits;
+
+/** This is a hack to make index sorting fast, with a {@link LeafReader} that always returns merge instances when you ask for the codec readers. */
+class MergeReaderWrapper extends LeafReader {
+ final CodecReader in;
+ final FieldsProducer fields;
+ final NormsProducer norms;
+ final DocValuesProducer docValues;
+ final StoredFieldsReader store;
+ final TermVectorsReader vectors;
+
+ MergeReaderWrapper(CodecReader in) throws IOException {
+ this.in = in;
+
+ FieldsProducer fields = in.getPostingsReader();
+ if (fields != null) {
+ fields = fields.getMergeInstance();
+ }
+ this.fields = fields;
+
+ NormsProducer norms = in.getNormsReader();
+ if (norms != null) {
+ norms = norms.getMergeInstance();
+ }
+ this.norms = norms;
+
+ DocValuesProducer docValues = in.getDocValuesReader();
+ if (docValues != null) {
+ docValues = docValues.getMergeInstance();
+ }
+ this.docValues = docValues;
+
+ StoredFieldsReader store = in.getFieldsReader();
+ if (store != null) {
+ store = store.getMergeInstance();
+ }
+ this.store = store;
+
+ TermVectorsReader vectors = in.getTermVectorsReader();
+ if (vectors != null) {
+ vectors = vectors.getMergeInstance();
+ }
+ this.vectors = vectors;
+ }
+
+ @Override
+ public void addCoreClosedListener(CoreClosedListener listener) {
+ in.addCoreClosedListener(listener);
+ }
+
+ @Override
+ public void removeCoreClosedListener(CoreClosedListener listener) {
+ in.removeCoreClosedListener(listener);
+ }
+
+ @Override
+ public Fields fields() throws IOException {
+ return fields;
+ }
+
+ @Override
+ public NumericDocValues getNumericDocValues(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() != DocValuesType.NUMERIC) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ return docValues.getNumeric(fi);
+ }
+
+ @Override
+ public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() != DocValuesType.BINARY) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ return docValues.getBinary(fi);
+ }
+
+ @Override
+ public SortedDocValues getSortedDocValues(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() != DocValuesType.SORTED) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ return docValues.getSorted(fi);
+ }
+
+ @Override
+ public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ return docValues.getSortedNumeric(fi);
+ }
+
+ @Override
+ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() != DocValuesType.SORTED_SET) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ return docValues.getSortedSet(fi);
+ }
+
+ @Override
+ public Bits getDocsWithField(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() == DocValuesType.NONE) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ return docValues.getDocsWithField(fi);
+ }
+
+ @Override
+ public NumericDocValues getNormValues(String field) throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null || !fi.hasNorms()) {
+ // Field does not exist or does not index norms
+ return null;
+ }
+ return norms.getNorms(fi);
+ }
+
+ @Override
+ public FieldInfos getFieldInfos() {
+ return in.getFieldInfos();
+ }
+
+ @Override
+ public Bits getLiveDocs() {
+ return in.getLiveDocs();
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ in.checkIntegrity();
+ }
+
+ @Override
+ public Fields getTermVectors(int docID) throws IOException {
+ ensureOpen();
+ checkBounds(docID);
+ if (vectors == null) {
+ return null;
+ }
+ return vectors.get(docID);
+ }
+
+ @Override
+ public PointValues getPointValues() {
+ return in.getPointValues();
+ }
+
+ @Override
+ public int numDocs() {
+ return in.numDocs();
+ }
+
+ @Override
+ public int maxDoc() {
+ return in.maxDoc();
+ }
+
+ @Override
+ public void document(int docID, StoredFieldVisitor visitor) throws IOException {
+ ensureOpen();
+ checkBounds(docID);
+ store.visitDocument(docID, visitor);
+ }
+
+ @Override
+ protected void doClose() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public Object getCoreCacheKey() {
+ return in.getCoreCacheKey();
+ }
+
+ @Override
+ public Object getCombinedCoreAndDeletesKey() {
+ return in.getCombinedCoreAndDeletesKey();
+ }
+
+ private void checkBounds(int docID) {
+ if (docID < 0 || docID >= maxDoc()) {
+ throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "MergeReaderWrapper(" + in + ")";
+ }
+
+ @Override
+ public Sort getIndexSort() {
+ return in.getIndexSort();
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java indexsort/lucene/core/src/java/org/apache/lucene/index/MergeState.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/MergeState.java 2016-05-10 05:44:23.748471119 -0400
@@ -18,6 +18,8 @@
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import org.apache.lucene.codecs.DocValuesProducer;
@@ -26,6 +28,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
@@ -36,6 +39,12 @@
* @lucene.experimental */
public class MergeState {
+ /** Maps document IDs from old segments to document IDs in the new segment */
+ public final DocMap[] docMaps;
+
+ // Only used by IW when it must remap deletes that arrived against the merging segmetns while a merge was running:
+ final DocMap[] leafDocMaps;
+
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
@@ -60,18 +69,12 @@
/** Live docs for each reader */
public final Bits[] liveDocs;
- /** Maps docIDs around deletions. */
- public final DocMap[] docMaps;
-
/** Postings to merge */
public final FieldsProducer[] fieldsProducers;
/** Point readers to merge */
public final PointsReader[] pointsReaders;
- /** New docID base per reader. */
- public final int[] docBase;
-
/** Max docs per reader */
public final int[] maxDocs;
@@ -79,11 +82,15 @@
public final InfoStream infoStream;
/** Sole constructor. */
- MergeState(List<CodecReader> readers, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
+ MergeState(List<CodecReader> originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
+
+ this.infoStream = infoStream;
+
+ final Sort indexSort = segmentInfo.getIndexSort();
+ int numReaders = originalReaders.size();
+ leafDocMaps = new DocMap[numReaders];
+ List<CodecReader> readers = maybeSortReaders(originalReaders, segmentInfo);
- int numReaders = readers.size();
- docMaps = new DocMap[numReaders];
- docBase = new int[numReaders];
maxDocs = new int[numReaders];
fieldsProducers = new FieldsProducer[numReaders];
normsProducers = new NormsProducer[numReaders];
@@ -94,6 +101,7 @@
fieldInfos = new FieldInfos[numReaders];
liveDocs = new Bits[numReaders];
+ int numDocs = 0;
for(int i=0;i<numReaders;i++) {
final CodecReader reader = readers.get(i);
@@ -126,126 +134,138 @@
if (pointsReaders[i] != null) {
pointsReaders[i] = pointsReaders[i].getMergeInstance();
}
+ numDocs += reader.numDocs();
}
- this.segmentInfo = segmentInfo;
- this.infoStream = infoStream;
+ segmentInfo.setMaxDoc(numDocs);
- setDocMaps(readers);
+ this.segmentInfo = segmentInfo;
+ this.docMaps = buildDocMaps(readers, indexSort);
}
- // NOTE: removes any "all deleted" readers from mergeState.readers
- private void setDocMaps(List<CodecReader> readers) throws IOException {
- final int numReaders = maxDocs.length;
-
- // Remap docIDs
- int docBase = 0;
- for(int i=0;i<numReaders;i++) {
- final CodecReader reader = readers.get(i);
- this.docBase[i] = docBase;
- final DocMap docMap = DocMap.build(reader);
- docMaps[i] = docMap;
- docBase += docMap.numDocs();
- }
-
- segmentInfo.setMaxDoc(docBase);
- }
+ private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
- /**
- * Remaps docids around deletes during merge
- */
- public static abstract class DocMap {
+ int numReaders = readers.size();
- DocMap() {}
+ if (indexSort == null) {
+ // no index sort ... we only must map around deletions, and rebase to the merged segment's docID space
- /** Returns the mapped docID corresponding to the provided one. */
- public abstract int get(int docID);
+ int totalDocs = 0;
+ DocMap[] docMaps = new DocMap[numReaders];
- /** Returns the total number of documents, ignoring
- * deletions. */
- public abstract int maxDoc();
-
- /** Returns the number of not-deleted documents. */
- public final int numDocs() {
- return maxDoc() - numDeletedDocs();
- }
-
- /** Returns the number of deleted documents. */
- public abstract int numDeletedDocs();
-
- /** Returns true if there are any deletions. */
- public boolean hasDeletions() {
- return numDeletedDocs() > 0;
- }
-
- /** Creates a {@link DocMap} instance appropriate for
- * this reader. */
- public static DocMap build(CodecReader reader) {
- final int maxDoc = reader.maxDoc();
- if (!reader.hasDeletions()) {
- return new NoDelDocMap(maxDoc);
- }
- final Bits liveDocs = reader.getLiveDocs();
- return build(maxDoc, liveDocs);
- }
-
- static DocMap build(final int maxDoc, final Bits liveDocs) {
- assert liveDocs != null;
- final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- int del = 0;
- for (int i = 0; i < maxDoc; ++i) {
- docMapBuilder.add(i - del);
- if (!liveDocs.get(i)) {
- ++del;
+ // Remap docIDs around deletions:
+ for (int i = 0; i < numReaders; i++) {
+ LeafReader reader = readers.get(i);
+ Bits liveDocs = reader.getLiveDocs();
+
+ final PackedLongValues delDocMap;
+ if (liveDocs != null) {
+ delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
+ } else {
+ delDocMap = null;
}
- }
- final PackedLongValues docMap = docMapBuilder.build();
- final int numDeletedDocs = del;
- assert docMap.size() == maxDoc;
- return new DocMap() {
-
- @Override
- public int get(int docID) {
- if (!liveDocs.get(docID)) {
- return -1;
+
+ final int docBase = totalDocs;
+ docMaps[i] = new DocMap() {
+ @Override
+ public int get(int docID) {
+ if (liveDocs == null) {
+ return docBase + docID;
+ } else if (liveDocs.get(docID)) {
+ return docBase + (int) delDocMap.get(docID);
+ } else {
+ return -1;
+ }
}
- return (int) docMap.get(docID);
- }
+ };
+ totalDocs += reader.numDocs();
+ }
- @Override
- public int maxDoc() {
- return maxDoc;
- }
+ return docMaps;
- @Override
- public int numDeletedDocs() {
- return numDeletedDocs;
- }
- };
+ } else {
+ // do a merge sort of the incoming leaves:
+ return MultiSorter.sort(indexSort, readers);
}
}
- private static final class NoDelDocMap extends DocMap {
-
- private final int maxDoc;
+ private List<CodecReader> maybeSortReaders(List<CodecReader> originalReaders, SegmentInfo segmentInfo) throws IOException {
- NoDelDocMap(int maxDoc) {
- this.maxDoc = maxDoc;
+ // Default to identity:
+ for(int i=0;i<originalReaders.size();i++) {
+ leafDocMaps[i] = new DocMap() {
+ @Override
+ public int get(int docID) {
+ return docID;
+ }
+ };
}
- @Override
- public int get(int docID) {
- return docID;
+ Sort indexSort = segmentInfo.getIndexSort();
+ if (indexSort == null) {
+ return originalReaders;
}
- @Override
- public int maxDoc() {
- return maxDoc;
+ // If an incoming reader is not sorted, because it was flushed by IW, we sort it here:
+ final Sorter sorter = new Sorter(indexSort);
+ List<CodecReader> readers = new ArrayList<>(originalReaders.size());
+
+ for (CodecReader leaf : originalReaders) {
+ Sort segmentSort = leaf.getIndexSort();
+
+ if (segmentSort == null) {
+ // TODO: fix IW to also sort when flushing? It's somewhat tricky because of stored fields and term vectors, which write "live"
+ // to their index files on each indexed document:
+
+ // This segment was written by flush, so documents are not yet sorted, so we sort them now:
+ Sorter.DocMap sortDocMap = sorter.sort(leaf);
+ if (sortDocMap != null) {
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", "segment " + leaf + " is not sorted; wrapping for sort " + indexSort + " now");
+ }
+ leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
+ leafDocMaps[readers.size()] = new DocMap() {
+ @Override
+ public int get(int docID) {
+ return sortDocMap.oldToNew(docID);
+ }
+ };
+ } else {
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", "segment " + leaf + " is not sorted, but is already accidentally in sort " + indexSort + " order");
+ }
+ }
+
+ } else {
+ if (segmentSort.equals(indexSort) == false) {
+ throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort + " but to-be-merged segment has sort=" + segmentSort);
+ }
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", "segment " + leaf + " already sorted");
+ }
+ }
+
+ readers.add(leaf);
}
- @Override
- public int numDeletedDocs() {
- return 0;
+ return readers;
+ }
+
+ /** A map of doc IDs. */
+ public static abstract class DocMap {
+ /** Return the mapped docID or -1 if the given doc is not mapped. */
+ public abstract int get(int docID);
+ }
+
+ static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
+ final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ int del = 0;
+ for (int i = 0; i < maxDoc; ++i) {
+ docMapBuilder.add(i - del);
+ if (liveDocs.get(i) == false) {
+ ++del;
+ }
}
+ return docMapBuilder.build();
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java indexsort/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java 2016-02-16 11:18:34.665021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java 2016-05-10 05:44:23.748471119 -0400
@@ -57,7 +57,7 @@
return this.parent == parent;
}
- /** Rre-use and reset this instance on the provided slices. */
+ /** Re-use and reset this instance on the provided slices. */
public MultiPostingsEnum reset(final EnumWithSlice[] subs, final int numSubs) {
this.numSubs = numSubs;
for(int i=0;i<numSubs;i++) {
@@ -165,9 +165,6 @@
/** Holds a {@link PostingsEnum} along with the
* corresponding {@link ReaderSlice}. */
public final static class EnumWithSlice {
- EnumWithSlice() {
- }
-
/** {@link PostingsEnum} for this sub-reader. */
public PostingsEnum postingsEnum;
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java indexsort/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java 2016-05-10 05:44:23.748471119 -0400
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.search.LeafFieldComparator;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
+
+final class MultiSorter {
+
+ /** Does a merge sort of the leaves of the incoming reader, returning {@link MergeState#DocMap} to map each leaf's
+ * documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */
+ static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {
+
+ // TODO: optimize if only 1 reader is incoming, though that's a rare case
+
+ SortField fields[] = sort.getSort();
+ final CrossReaderComparator[] comparators = new CrossReaderComparator[fields.length];
+ for(int i=0;i<fields.length;i++) {
+ comparators[i] = getComparator(readers, fields[i]);
+ }
+
+ int leafCount = readers.size();
+
+ PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {
+ @Override
+ public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
+ for(int i=0;i<comparators.length;i++) {
+ int cmp = comparators[i].compare(a.readerIndex, a.docID, b.readerIndex, b.docID);
+ if (cmp != 0) {
+ return cmp < 0;
+ }
+ }
+
+ // tie-break by docID natural order:
+ if (a.readerIndex != b.readerIndex) {
+ return a.readerIndex < b.readerIndex;
+ }
+ return a.docID < b.docID;
+ }
+ };
+
+ PackedLongValues.Builder[] builders = new PackedLongValues.Builder[leafCount];
+
+ for(int i=0;i<leafCount;i++) {
+ CodecReader reader = readers.get(i);
+ queue.add(new LeafAndDocID(i, reader.getLiveDocs(), reader.maxDoc()));
+ builders[i] = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ }
+
+ int mappedDocID = 0;
+ while (queue.size() != 0) {
+ LeafAndDocID top = queue.top();
+ builders[top.readerIndex].add(mappedDocID);
+ if (top.liveDocs == null || top.liveDocs.get(top.docID)) {
+ mappedDocID++;
+ }
+ top.docID++;
+ if (top.docID < top.maxDoc) {
+ queue.updateTop();
+ } else {
+ queue.pop();
+ }
+ }
+
+ MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount];
+ for(int i=0;i<leafCount;i++) {
+ final PackedLongValues remapped = builders[i].build();
+ final Bits liveDocs = readers.get(i).getLiveDocs();
+ docMaps[i] = new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ if (liveDocs == null || liveDocs.get(docID)) {
+ return (int) remapped.get(docID);
+ } else {
+ return -1;
+ }
+ }
+ };
+ }
+
+ return docMaps;
+ }
+
+ private static class LeafAndDocID {
+ final int readerIndex;
+ final Bits liveDocs;
+ final int maxDoc;
+ int docID;
+
+ public LeafAndDocID(int readerIndex, Bits liveDocs, int maxDoc) {
+ this.readerIndex = readerIndex;
+ this.liveDocs = liveDocs;
+ this.maxDoc = maxDoc;
+ }
+ }
+
+ private interface CrossReaderComparator {
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB);
+ }
+
+ private static CrossReaderComparator getComparator(List<CodecReader> readers, SortField sortField) throws IOException {
+ switch(sortField.getType()) {
+
+ case STRING:
+ {
+ // this uses the efficient segment-local ordinal map:
+ MultiReader multiReader = new MultiReader(readers.toArray(new LeafReader[readers.size()]));
+ final SortedDocValues sorted = MultiDocValues.getSortedValues(multiReader, sortField.getField());
+ final int[] docStarts = new int[readers.size()];
+ List<LeafReaderContext> leaves = multiReader.leaves();
+ for(int i=0;i<readers.size();i++) {
+ docStarts[i] = leaves.get(i).docBase;
+ }
+ final int missingOrd;
+ if (sortField.getMissingValue() == SortField.STRING_LAST) {
+ missingOrd = Integer.MAX_VALUE;
+ } else {
+ missingOrd = Integer.MIN_VALUE;
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ int ordA = sorted.getOrd(docStarts[readerIndexA] + docIDA);
+ if (ordA == -1) {
+ ordA = missingOrd;
+ }
+ int ordB = sorted.getOrd(docStarts[readerIndexB] + docIDB);
+ if (ordB == -1) {
+ ordB = missingOrd;
+ }
+ return reverseMul * Integer.compare(ordA, ordB);
+ }
+ };
+ }
+
+ case LONG:
+ {
+ List<NumericDocValues> values = new ArrayList<>();
+ List<Bits> docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final long missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Long) sortField.getMissingValue();
+ } else {
+ missingValue = 0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ long valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = values.get(readerIndexA).get(docIDA);
+ } else {
+ valueA = missingValue;
+ }
+
+ long valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = values.get(readerIndexB).get(docIDB);
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Long.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case INT:
+ {
+ List<NumericDocValues> values = new ArrayList<>();
+ List<Bits> docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final int missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Integer) sortField.getMissingValue();
+ } else {
+ missingValue = 0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ int valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = (int) values.get(readerIndexA).get(docIDA);
+ } else {
+ valueA = missingValue;
+ }
+
+ int valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = (int) values.get(readerIndexB).get(docIDB);
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Integer.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case DOUBLE:
+ {
+ List<NumericDocValues> values = new ArrayList<>();
+ List<Bits> docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final double missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Double) sortField.getMissingValue();
+ } else {
+ missingValue = 0.0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ double valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA));
+ } else {
+ valueA = missingValue;
+ }
+
+ double valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB));
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Double.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case FLOAT:
+ {
+ List<NumericDocValues> values = new ArrayList<>();
+ List<Bits> docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final float missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Float) sortField.getMissingValue();
+ } else {
+ missingValue = 0.0f;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ float valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA));
+ } else {
+ valueA = missingValue;
+ }
+
+ float valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB));
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Float.compare(valueA, valueB);
+ }
+ };
+ }
+
+ default:
+ throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
+ }
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java 2016-03-08 17:22:26.828938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java 2016-05-10 05:44:23.748471119 -0400
@@ -26,6 +26,7 @@
import java.util.SortedMap;
import java.util.TreeMap;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/** An {@link LeafReader} which reads multiple, parallel indexes. Each index
@@ -55,6 +56,7 @@
private final boolean closeSubReaders;
private final int maxDoc, numDocs;
private final boolean hasDeletions;
+ private final Sort indexSort;
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();
private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
@@ -100,8 +102,18 @@
// TODO: make this read-only in a cleaner way?
FieldInfos.Builder builder = new FieldInfos.Builder();
+
+ Sort indexSort = null;
+
// build FieldInfos and fieldToReader map:
for (final LeafReader reader : this.parallelReaders) {
+ Sort leafIndexSort = reader.getIndexSort();
+ if (indexSort == null) {
+ indexSort = leafIndexSort;
+ } else if (leafIndexSort != null && indexSort.equals(leafIndexSort) == false) {
+ throw new IllegalArgumentException("cannot combine LeafReaders that have different index sorts: saw both sort=" + indexSort + " and " + leafIndexSort);
+ }
+
final FieldInfos readerFieldInfos = reader.getFieldInfos();
for (FieldInfo fieldInfo : readerFieldInfos) {
// NOTE: first reader having a given field "wins":
@@ -115,6 +127,7 @@
}
}
fieldInfos = builder.finish();
+ this.indexSort = indexSort;
// build Fields instance
for (final LeafReader reader : this.parallelReaders) {
@@ -423,4 +436,10 @@
ensureOpen();
return parallelReaders;
}
+
+ @Override
+ public Sort getIndexSort() {
+ return indexSort;
+ }
+
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java 2016-02-16 11:18:34.669021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java 2016-05-10 05:44:23.748471119 -0400
@@ -28,6 +28,7 @@
import java.util.regex.Matcher;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.StringHelper;
@@ -69,6 +70,8 @@
private final Map<String,String> attributes;
+ private final Sort indexSort;
+
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
@@ -93,7 +96,7 @@
*/
public SegmentInfo(Directory dir, Version version, String name, int maxDoc,
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
- byte[] id, Map<String,String> attributes) {
+ byte[] id, Map<String,String> attributes, Sort indexSort) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = Objects.requireNonNull(dir);
this.version = Objects.requireNonNull(version);
@@ -107,6 +110,7 @@
throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
}
this.attributes = Objects.requireNonNull(attributes);
+ this.indexSort = indexSort;
}
/**
@@ -194,13 +198,9 @@
s.append('/').append(delCount);
}
- final String sorter_key = "sorter"; // SortingMergePolicy.SORTER_ID_PROP; // TODO: use this once we can import SortingMergePolicy (currently located in 'misc' instead of 'core')
- final String sorter_val = diagnostics.get(sorter_key);
- if (sorter_val != null) {
- s.append(":[");
- s.append(sorter_key);
- s.append('=');
- s.append(sorter_val);
+ if (indexSort != null) {
+ s.append(":[indexSort=");
+ s.append(indexSort);
s.append(']');
}
@@ -311,5 +311,10 @@
public Map<String,String> getAttributes() {
return attributes;
}
+
+ /** Return the sort order of this segment, or null if the index has no sort. */
+ public Sort getIndexSort() {
+ return indexSort;
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java 2016-03-08 17:22:26.832938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java 2016-05-10 05:44:23.748471119 -0400
@@ -59,6 +59,11 @@
this.codec = segmentInfo.getCodec();
this.context = context;
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
+ if (mergeState.infoStream.isEnabled("SM")) {
+ if (segmentInfo.getIndexSort() != null) {
+ mergeState.infoStream.message("SM", "index sort during merge: " + segmentInfo.getIndexSort());
+ }
+ }
}
/** True if any merging should happen */
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java 2016-03-08 17:22:26.832938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java 2016-05-10 05:44:23.748471119 -0400
@@ -28,6 +28,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
@@ -303,4 +304,9 @@
ensureOpen();
core.removeCoreClosedListener(listener);
}
+
+ @Override
+ public Sort getIndexSort() {
+ return si.info.getIndexSort();
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java indexsort/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java 2016-03-08 17:22:26.832938630 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java 2016-05-10 05:44:23.748471119 -0400
@@ -26,6 +26,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -125,6 +126,16 @@
public void removeCoreClosedListener(CoreClosedListener listener) {
reader.removeCoreClosedListener(listener);
}
+
+ @Override
+ public String toString() {
+ return "SlowCodecReaderWrapper(" + reader + ")";
+ }
+
+ @Override
+ public Sort getIndexSort() {
+ return reader.getIndexSort();
+ }
};
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/Sorter.java indexsort/lucene/core/src/java/org/apache/lucene/index/Sorter.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/Sorter.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/Sorter.java 2016-05-10 05:44:23.748471119 -0400
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LeafFieldComparator;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.TimSorter;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
+
+/**
+ * Sorts documents of a given index by returning a permutation on the document
+ * IDs.
+ * @lucene.experimental
+ */
+final class Sorter {
+ final Sort sort;
+
+ /** Creates a new Sorter to sort the index with {@code sort} */
+ Sorter(Sort sort) {
+ if (sort.needsScores()) {
+ throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
+ }
+ this.sort = sort;
+ }
+
+ /**
+ * A permutation of doc IDs. For every document ID between <tt>0</tt> and
+ * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
+ * return <code>docID</code>.
+ */
+ static abstract class DocMap {
+
+ /** Given a doc ID from the original index, return its ordinal in the
+ * sorted index. */
+ abstract int oldToNew(int docID);
+
+ /** Given the ordinal of a doc ID, return its doc ID in the original index. */
+ abstract int newToOld(int docID);
+
+ /** Return the number of documents in this map. This must be equal to the
+ * {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
+ * {@link org.apache.lucene.index.LeafReader} which is sorted. */
+ abstract int size();
+ }
+
+ /** Check consistency of a {@link DocMap}, useful for assertions. */
+ static boolean isConsistent(DocMap docMap) {
+ final int maxDoc = docMap.size();
+ for (int i = 0; i < maxDoc; ++i) {
+ final int newID = docMap.oldToNew(i);
+ final int oldID = docMap.newToOld(newID);
+ assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
+ assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
+ if (i != oldID || newID < 0 || newID >= maxDoc) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /** A comparator of doc IDs. */
+ static abstract class DocComparator {
+
+ /** Compare docID1 against docID2. The contract for the return value is the
+ * same as {@link Comparator#compare(Object, Object)}. */
+ public abstract int compare(int docID1, int docID2);
+
+ }
+
+ private static final class DocValueSorter extends TimSorter {
+
+ private final int[] docs;
+ private final Sorter.DocComparator comparator;
+ private final int[] tmp;
+
+ DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
+ super(docs.length / 64);
+ this.docs = docs;
+ this.comparator = comparator;
+ tmp = new int[docs.length / 64];
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return comparator.compare(docs[i], docs[j]);
+ }
+
+ @Override
+ protected void swap(int i, int j) {
+ int tmpDoc = docs[i];
+ docs[i] = docs[j];
+ docs[j] = tmpDoc;
+ }
+
+ @Override
+ protected void copy(int src, int dest) {
+ docs[dest] = docs[src];
+ }
+
+ @Override
+ protected void save(int i, int len) {
+ System.arraycopy(docs, i, tmp, 0, len);
+ }
+
+ @Override
+ protected void restore(int i, int j) {
+ docs[j] = tmp[i];
+ }
+
+ @Override
+ protected int compareSaved(int i, int j) {
+ return comparator.compare(tmp[i], docs[j]);
+ }
+ }
+
+ /** Computes the old-to-new permutation over the given comparator. */
+ private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
+ // check if the index is sorted
+ boolean sorted = true;
+ for (int i = 1; i < maxDoc; ++i) {
+ if (comparator.compare(i-1, i) > 0) {
+ sorted = false;
+ break;
+ }
+ }
+ if (sorted) {
+ return null;
+ }
+
+ // sort doc IDs
+ final int[] docs = new int[maxDoc];
+ for (int i = 0; i < maxDoc; i++) {
+ docs[i] = i;
+ }
+
+ DocValueSorter sorter = new DocValueSorter(docs, comparator);
+ // It can be common to sort a reader, add docs, sort it again, ... and in
+ // that case timSort can save a lot of time
+ sorter.sort(0, docs.length); // docs is now the newToOld mapping
+
+ // The reason why we use MonotonicAppendingLongBuffer here is that it
+ // wastes very little memory if the index is in random order but can save
+ // a lot of memory if the index is already "almost" sorted
+ final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ for (int i = 0; i < maxDoc; ++i) {
+ newToOldBuilder.add(docs[i]);
+ }
+ final PackedLongValues newToOld = newToOldBuilder.build();
+
+ // invert the docs mapping:
+ for (int i = 0; i < maxDoc; ++i) {
+ docs[(int) newToOld.get(i)] = i;
+ } // docs is now the oldToNew mapping
+
+ final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ for (int i = 0; i < maxDoc; ++i) {
+ oldToNewBuilder.add(docs[i]);
+ }
+ final PackedLongValues oldToNew = oldToNewBuilder.build();
+
+ return new Sorter.DocMap() {
+
+ @Override
+ public int oldToNew(int docID) {
+ return (int) oldToNew.get(docID);
+ }
+
+ @Override
+ public int newToOld(int docID) {
+ return (int) newToOld.get(docID);
+ }
+
+ @Override
+ public int size() {
+ return maxDoc;
+ }
+ };
+ }
+
+ /**
+ * Returns a mapping from the old document ID to its new location in the
+ * sorted index. Implementations can use the auxiliary
+ * {@link #sort(int, DocComparator)} to compute the old-to-new permutation
+ * given a list of documents and their corresponding values.
+ * <p>
+ * A return value of <tt>null</tt> is allowed and means that
+ * <code>reader</code> is already sorted.
+ * <p>
+ * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
+ * well, they will however be marked as deleted in the sorted view.
+ */
+ DocMap sort(LeafReader reader) throws IOException {
+ SortField fields[] = sort.getSort();
+ final int reverseMul[] = new int[fields.length];
+ final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length];
+
+ for (int i = 0; i < fields.length; i++) {
+ reverseMul[i] = fields[i].getReverse() ? -1 : 1;
+ comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext());
+ comparators[i].setScorer(FAKESCORER);
+ }
+ final DocComparator comparator = new DocComparator() {
+ @Override
+ public int compare(int docID1, int docID2) {
+ try {
+ for (int i = 0; i < comparators.length; i++) {
+ // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+ // the segments are always the same here...
+ comparators[i].copy(0, docID1);
+ comparators[i].setBottom(0);
+ int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
+ if (comp != 0) {
+ return comp;
+ }
+ }
+ return Integer.compare(docID1, docID2); // docid order tiebreak
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ return sort(reader.maxDoc(), comparator);
+ }
+
+ /**
+ * Returns the identifier of this {@link Sorter}.
+ * <p>This identifier is similar to {@link Object#hashCode()} and should be
+ * chosen so that two instances of this class that sort documents likewise
+ * will have the same identifier. On the contrary, this identifier should be
+ * different on different {@link Sort sorts}.
+ */
+ public String getID() {
+ return sort.toString();
+ }
+
+ @Override
+ public String toString() {
+ return getID();
+ }
+
+ static final Scorer FAKESCORER = new Scorer(null) {
+
+ float score;
+ int doc = -1;
+ int freq = 1;
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ public DocIdSetIterator iterator() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return freq;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return score;
+ }
+ };
+
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java indexsort/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
--- trunk/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java 2016-05-10 05:44:23.748471119 -0400
@@ -0,0 +1,914 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.index.Sorter.DocMap;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMFile;
+import org.apache.lucene.store.RAMInputStream;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.TimSorter;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+
+/**
+ * An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given
+ * {@link Sort}. This is package private and is only used by Lucene when it needs to merge
+ * a newly flushed (unsorted) segment.
+ *
+ * @lucene.experimental
+ */
+
+class SortingLeafReader extends FilterLeafReader {
+
+ private static class SortingFields extends FilterFields {
+
+ private final Sorter.DocMap docMap;
+ private final FieldInfos infos;
+
+ public SortingFields(final Fields in, FieldInfos infos, Sorter.DocMap docMap) {
+ super(in);
+ this.docMap = docMap;
+ this.infos = infos;
+ }
+
+ @Override
+ public Terms terms(final String field) throws IOException {
+ Terms terms = in.terms(field);
+ if (terms == null) {
+ return null;
+ } else {
+ return new SortingTerms(terms, infos.fieldInfo(field).getIndexOptions(), docMap);
+ }
+ }
+
+ }
+
+ private static class SortingTerms extends FilterTerms {
+
+ private final Sorter.DocMap docMap;
+ private final IndexOptions indexOptions;
+
+ public SortingTerms(final Terms in, IndexOptions indexOptions, final Sorter.DocMap docMap) {
+ super(in);
+ this.docMap = docMap;
+ this.indexOptions = indexOptions;
+ }
+
+ @Override
+ public TermsEnum iterator() throws IOException {
+ return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions());
+ }
+
+ @Override
+ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm)
+ throws IOException {
+ return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions());
+ }
+
+ }
+
+ private static class SortingTermsEnum extends FilterTermsEnum {
+
+ final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
+ private final IndexOptions indexOptions;
+ private final boolean hasPositions;
+
+ public SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) {
+ super(in);
+ this.docMap = docMap;
+ this.indexOptions = indexOptions;
+ this.hasPositions = hasPositions;
+ }
+
+ @Override
+ public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException {
+
+ if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
+ final PostingsEnum inReuse;
+ final SortingPostingsEnum wrapReuse;
+ if (reuse != null && reuse instanceof SortingPostingsEnum) {
+ // if we're asked to reuse the given DocsEnum and it is Sorting, return
+ // the wrapped one, since some Codecs expect it.
+ wrapReuse = (SortingPostingsEnum) reuse;
+ inReuse = wrapReuse.getWrapped();
+ } else {
+ wrapReuse = null;
+ inReuse = reuse;
+ }
+
+ final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags);
+ // we ignore the fact that offsets may be stored but not asked for,
+ // since this code is expected to be used during addIndexes which will
+ // ask for everything. if that assumption changes in the future, we can
+ // factor in whether 'flags' says offsets are not required.
+ final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ return new SortingPostingsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets);
+ }
+
+ final PostingsEnum inReuse;
+ final SortingDocsEnum wrapReuse;
+ if (reuse != null && reuse instanceof SortingDocsEnum) {
+ // if we're asked to reuse the given DocsEnum and it is Sorting, return
+ // the wrapped one, since some Codecs expect it.
+ wrapReuse = (SortingDocsEnum) reuse;
+ inReuse = wrapReuse.getWrapped();
+ } else {
+ wrapReuse = null;
+ inReuse = reuse;
+ }
+
+ final PostingsEnum inDocs = in.postings(inReuse, flags);
+ final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
+ return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
+ }
+
+ }
+
+ private static class SortingBinaryDocValues extends BinaryDocValues {
+
+ private final BinaryDocValues in;
+ private final Sorter.DocMap docMap;
+
+ SortingBinaryDocValues(BinaryDocValues in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public BytesRef get(int docID) {
+ return in.get(docMap.newToOld(docID));
+ }
+ }
+
+ private static class SortingNumericDocValues extends NumericDocValues {
+
+ private final NumericDocValues in;
+ private final Sorter.DocMap docMap;
+
+ public SortingNumericDocValues(final NumericDocValues in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public long get(int docID) {
+ return in.get(docMap.newToOld(docID));
+ }
+ }
+
+ private static class SortingSortedNumericDocValues extends SortedNumericDocValues {
+
+ private final SortedNumericDocValues in;
+ private final Sorter.DocMap docMap;
+
+ SortingSortedNumericDocValues(SortedNumericDocValues in, DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public int count() {
+ return in.count();
+ }
+
+ @Override
+ public void setDocument(int doc) {
+ in.setDocument(docMap.newToOld(doc));
+ }
+
+ @Override
+ public long valueAt(int index) {
+ return in.valueAt(index);
+ }
+ }
+
+ private static class SortingBits implements Bits {
+
+ private final Bits in;
+ private final Sorter.DocMap docMap;
+
+ public SortingBits(final Bits in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public boolean get(int index) {
+ return in.get(docMap.newToOld(index));
+ }
+
+ @Override
+ public int length() {
+ return in.length();
+ }
+ }
+
+ private static class SortingPointValues extends PointValues {
+
+ private final PointValues in;
+ private final Sorter.DocMap docMap;
+
+ public SortingPointValues(final PointValues in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
+ in.intersect(fieldName,
+ new IntersectVisitor() {
+ @Override
+ public void visit(int docID) throws IOException {
+ visitor.visit(docMap.oldToNew(docID));
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) throws IOException {
+ visitor.visit(docMap.oldToNew(docID), packedValue);
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+ return visitor.compare(minPackedValue, maxPackedValue);
+ }
+ });
+ }
+
+ @Override
+ public byte[] getMinPackedValue(String fieldName) throws IOException {
+ return in.getMinPackedValue(fieldName);
+ }
+
+ @Override
+ public byte[] getMaxPackedValue(String fieldName) throws IOException {
+ return in.getMaxPackedValue(fieldName);
+ }
+
+ @Override
+ public int getNumDimensions(String fieldName) throws IOException {
+ return in.getNumDimensions(fieldName);
+ }
+
+ @Override
+ public int getBytesPerDimension(String fieldName) throws IOException {
+ return in.getBytesPerDimension(fieldName);
+ }
+
+ @Override
+ public long size(String fieldName) {
+ return in.size(fieldName);
+ }
+
+ @Override
+ public int getDocCount(String fieldName) {
+ return in.getDocCount(fieldName);
+ }
+ }
+
+ private static class SortingSortedDocValues extends SortedDocValues {
+
+ private final SortedDocValues in;
+ private final Sorter.DocMap docMap;
+
+ SortingSortedDocValues(SortedDocValues in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public int getOrd(int docID) {
+ return in.getOrd(docMap.newToOld(docID));
+ }
+
+ @Override
+ public BytesRef lookupOrd(int ord) {
+ return in.lookupOrd(ord);
+ }
+
+ @Override
+ public int getValueCount() {
+ return in.getValueCount();
+ }
+
+ @Override
+ public BytesRef get(int docID) {
+ return in.get(docMap.newToOld(docID));
+ }
+
+ @Override
+ public int lookupTerm(BytesRef key) {
+ return in.lookupTerm(key);
+ }
+ }
+
+ private static class SortingSortedSetDocValues extends SortedSetDocValues {
+
+ private final SortedSetDocValues in;
+ private final Sorter.DocMap docMap;
+
+ SortingSortedSetDocValues(SortedSetDocValues in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public long nextOrd() {
+ return in.nextOrd();
+ }
+
+ @Override
+ public void setDocument(int docID) {
+ //System.out.println(" slr.sssdv.setDocument docID=" + docID + " this=" + this);
+ in.setDocument(docMap.newToOld(docID));
+ }
+
+ @Override
+ public BytesRef lookupOrd(long ord) {
+ return in.lookupOrd(ord);
+ }
+
+ @Override
+ public long getValueCount() {
+ return in.getValueCount();
+ }
+
+ @Override
+ public long lookupTerm(BytesRef key) {
+ return in.lookupTerm(key);
+ }
+ }
+
+ static class SortingDocsEnum extends FilterPostingsEnum {
+
+ private static final class DocFreqSorter extends TimSorter {
+
+ private int[] docs;
+ private int[] freqs;
+ private final int[] tmpDocs;
+ private int[] tmpFreqs;
+
+ public DocFreqSorter(int maxDoc) {
+ super(maxDoc / 64);
+ this.tmpDocs = new int[maxDoc / 64];
+ }
+
+ public void reset(int[] docs, int[] freqs) {
+ this.docs = docs;
+ this.freqs = freqs;
+ if (freqs != null && tmpFreqs == null) {
+ tmpFreqs = new int[tmpDocs.length];
+ }
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return docs[i] - docs[j];
+ }
+
+ @Override
+ protected void swap(int i, int j) {
+ int tmpDoc = docs[i];
+ docs[i] = docs[j];
+ docs[j] = tmpDoc;
+
+ if (freqs != null) {
+ int tmpFreq = freqs[i];
+ freqs[i] = freqs[j];
+ freqs[j] = tmpFreq;
+ }
+ }
+
+ @Override
+ protected void copy(int src, int dest) {
+ docs[dest] = docs[src];
+ if (freqs != null) {
+ freqs[dest] = freqs[src];
+ }
+ }
+
+ @Override
+ protected void save(int i, int len) {
+ System.arraycopy(docs, i, tmpDocs, 0, len);
+ if (freqs != null) {
+ System.arraycopy(freqs, i, tmpFreqs, 0, len);
+ }
+ }
+
+ @Override
+ protected void restore(int i, int j) {
+ docs[j] = tmpDocs[i];
+ if (freqs != null) {
+ freqs[j] = tmpFreqs[i];
+ }
+ }
+
+ @Override
+ protected int compareSaved(int i, int j) {
+ return tmpDocs[i] - docs[j];
+ }
+ }
+
+ private final int maxDoc;
+ private final DocFreqSorter sorter;
+ private int[] docs;
+ private int[] freqs;
+ private int docIt = -1;
+ private final int upto;
+ private final boolean withFreqs;
+
+ SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
+ super(in);
+ this.maxDoc = maxDoc;
+ this.withFreqs = withFreqs;
+ if (reuse != null) {
+ if (reuse.maxDoc == maxDoc) {
+ sorter = reuse.sorter;
+ } else {
+ sorter = new DocFreqSorter(maxDoc);
+ }
+ docs = reuse.docs;
+ freqs = reuse.freqs; // maybe null
+ } else {
+ docs = new int[64];
+ sorter = new DocFreqSorter(maxDoc);
+ }
+ docIt = -1;
+ int i = 0;
+ int doc;
+ if (withFreqs) {
+ if (freqs == null || freqs.length < docs.length) {
+ freqs = new int[docs.length];
+ }
+ while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
+ if (i >= docs.length) {
+ docs = ArrayUtil.grow(docs, docs.length + 1);
+ freqs = ArrayUtil.grow(freqs, freqs.length + 1);
+ }
+ docs[i] = docMap.oldToNew(doc);
+ freqs[i] = in.freq();
+ ++i;
+ }
+ } else {
+ freqs = null;
+ while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
+ if (i >= docs.length) {
+ docs = ArrayUtil.grow(docs, docs.length + 1);
+ }
+ docs[i++] = docMap.oldToNew(doc);
+ }
+ }
+ // TimSort can save much time compared to other sorts in case of
+ // reverse sorting, or when sorting a concatenation of sorted readers
+ sorter.reset(docs, freqs);
+ sorter.sort(0, i);
+ upto = i;
+ }
+
+ // for testing
+ boolean reused(PostingsEnum other) {
+ if (other == null || !(other instanceof SortingDocsEnum)) {
+ return false;
+ }
+ return docs == ((SortingDocsEnum) other).docs;
+ }
+
+ @Override
+ public int advance(final int target) throws IOException {
+ // need to support it for checkIndex, but in practice it won't be called, so
+ // don't bother to implement efficiently for now.
+ return slowAdvance(target);
+ }
+
+ @Override
+ public int docID() {
+ return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return withFreqs && docIt < upto ? freqs[docIt] : 1;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (++docIt >= upto) return NO_MORE_DOCS;
+ return docs[docIt];
+ }
+
+ /** Returns the wrapped {@link PostingsEnum}. */
+ PostingsEnum getWrapped() {
+ return in;
+ }
+
+ // we buffer up docs/freqs only, don't forward any positions requests to underlying enum
+
+ @Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return null;
+ }
+ }
+
+ static class SortingPostingsEnum extends FilterPostingsEnum {
+
+ /**
+ * A {@link TimSorter} which sorts two parallel arrays of doc IDs and
+ * offsets in one go. Everytime a doc ID is 'swapped', its corresponding offset
+ * is swapped too.
+ */
+ private static final class DocOffsetSorter extends TimSorter {
+
+ private int[] docs;
+ private long[] offsets;
+ private final int[] tmpDocs;
+ private final long[] tmpOffsets;
+
+ public DocOffsetSorter(int maxDoc) {
+ super(maxDoc / 64);
+ this.tmpDocs = new int[maxDoc / 64];
+ this.tmpOffsets = new long[maxDoc / 64];
+ }
+
+ public void reset(int[] docs, long[] offsets) {
+ this.docs = docs;
+ this.offsets = offsets;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return docs[i] - docs[j];
+ }
+
+ @Override
+ protected void swap(int i, int j) {
+ int tmpDoc = docs[i];
+ docs[i] = docs[j];
+ docs[j] = tmpDoc;
+
+ long tmpOffset = offsets[i];
+ offsets[i] = offsets[j];
+ offsets[j] = tmpOffset;
+ }
+
+ @Override
+ protected void copy(int src, int dest) {
+ docs[dest] = docs[src];
+ offsets[dest] = offsets[src];
+ }
+
+ @Override
+ protected void save(int i, int len) {
+ System.arraycopy(docs, i, tmpDocs, 0, len);
+ System.arraycopy(offsets, i, tmpOffsets, 0, len);
+ }
+
+ @Override
+ protected void restore(int i, int j) {
+ docs[j] = tmpDocs[i];
+ offsets[j] = tmpOffsets[i];
+ }
+
+ @Override
+ protected int compareSaved(int i, int j) {
+ return tmpDocs[i] - docs[j];
+ }
+ }
+
+ private final int maxDoc;
+ private final DocOffsetSorter sorter;
+ private int[] docs;
+ private long[] offsets;
+ private final int upto;
+
+ private final IndexInput postingInput;
+ private final boolean storeOffsets;
+
+ private int docIt = -1;
+ private int pos;
+ private int startOffset = -1;
+ private int endOffset = -1;
+ private final BytesRef payload;
+ private int currFreq;
+
+ private final RAMFile file;
+
+ SortingPostingsEnum(int maxDoc, SortingPostingsEnum reuse, final PostingsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
+ super(in);
+ this.maxDoc = maxDoc;
+ this.storeOffsets = storeOffsets;
+ if (reuse != null) {
+ docs = reuse.docs;
+ offsets = reuse.offsets;
+ payload = reuse.payload;
+ file = reuse.file;
+ if (reuse.maxDoc == maxDoc) {
+ sorter = reuse.sorter;
+ } else {
+ sorter = new DocOffsetSorter(maxDoc);
+ }
+ } else {
+ docs = new int[32];
+ offsets = new long[32];
+ payload = new BytesRef(32);
+ file = new RAMFile();
+ sorter = new DocOffsetSorter(maxDoc);
+ }
+ final IndexOutput out = new RAMOutputStream(file, false);
+ int doc;
+ int i = 0;
+ while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (i == docs.length) {
+ final int newLength = ArrayUtil.oversize(i + 1, 4);
+ docs = Arrays.copyOf(docs, newLength);
+ offsets = Arrays.copyOf(offsets, newLength);
+ }
+ docs[i] = docMap.oldToNew(doc);
+ offsets[i] = out.getFilePointer();
+ addPositions(in, out);
+ i++;
+ }
+ upto = i;
+ sorter.reset(docs, offsets);
+ sorter.sort(0, upto);
+ out.close();
+ this.postingInput = new RAMInputStream("", file);
+ }
+
+ // for testing
+ boolean reused(PostingsEnum other) {
+ if (other == null || !(other instanceof SortingPostingsEnum)) {
+ return false;
+ }
+ return docs == ((SortingPostingsEnum) other).docs;
+ }
+
+ private void addPositions(final PostingsEnum in, final IndexOutput out) throws IOException {
+ int freq = in.freq();
+ out.writeVInt(freq);
+ int previousPosition = 0;
+ int previousEndOffset = 0;
+ for (int i = 0; i < freq; i++) {
+ final int pos = in.nextPosition();
+ final BytesRef payload = in.getPayload();
+ // The low-order bit of token is set only if there is a payload, the
+ // previous bits are the delta-encoded position.
+ final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
+ out.writeVInt(token);
+ previousPosition = pos;
+ if (storeOffsets) { // don't encode offsets if they are not stored
+ final int startOffset = in.startOffset();
+ final int endOffset = in.endOffset();
+ out.writeVInt(startOffset - previousEndOffset);
+ out.writeVInt(endOffset - startOffset);
+ previousEndOffset = endOffset;
+ }
+ if (payload != null) {
+ out.writeVInt(payload.length);
+ out.writeBytes(payload.bytes, payload.offset, payload.length);
+ }
+ }
+ }
+
+ @Override
+ public int advance(final int target) throws IOException {
+ // need to support it for checkIndex, but in practice it won't be called, so
+ // don't bother to implement efficiently for now.
+ return slowAdvance(target);
+ }
+
+ @Override
+ public int docID() {
+ return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return endOffset;
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return currFreq;
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return payload.length == 0 ? null : payload;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS;
+ postingInput.seek(offsets[docIt]);
+ currFreq = postingInput.readVInt();
+ // reset variables used in nextPosition
+ pos = 0;
+ endOffset = 0;
+ return docs[docIt];
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ final int token = postingInput.readVInt();
+ pos += token >>> 1;
+ if (storeOffsets) {
+ startOffset = endOffset + postingInput.readVInt();
+ endOffset = startOffset + postingInput.readVInt();
+ }
+ if ((token & 1) != 0) {
+ payload.offset = 0;
+ payload.length = postingInput.readVInt();
+ if (payload.length > payload.bytes.length) {
+ payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)];
+ }
+ postingInput.readBytes(payload.bytes, 0, payload.length);
+ } else {
+ payload.length = 0;
+ }
+ return pos;
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return startOffset;
+ }
+
+ /** Returns the wrapped {@link PostingsEnum}. */
+ PostingsEnum getWrapped() {
+ return in;
+ }
+ }
+
+ /** Return a sorted view of <code>reader</code> according to the order
+ * defined by <code>sort</code>. If the reader is already sorted, this
+ * method might return the reader as-is. */
+ public static LeafReader wrap(LeafReader reader, Sort sort) throws IOException {
+ return wrap(reader, new Sorter(sort).sort(reader));
+ }
+
+ /** Expert: same as {@link #wrap(org.apache.lucene.index.LeafReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
+ static LeafReader wrap(LeafReader reader, Sorter.DocMap docMap) {
+ if (docMap == null) {
+ // the reader is already sorted
+ return reader;
+ }
+ if (reader.maxDoc() != docMap.size()) {
+ throw new IllegalArgumentException("reader.maxDoc() should be equal to docMap.size(), got" + reader.maxDoc() + " != " + docMap.size());
+ }
+ assert Sorter.isConsistent(docMap);
+ return new SortingLeafReader(reader, docMap);
+ }
+
+ final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
+
+ private SortingLeafReader(final LeafReader in, final Sorter.DocMap docMap) {
+ super(in);
+ this.docMap = docMap;
+ }
+
+ @Override
+ public void document(final int docID, final StoredFieldVisitor visitor) throws IOException {
+ in.document(docMap.newToOld(docID), visitor);
+ }
+
+ @Override
+ public Fields fields() throws IOException {
+ return new SortingFields(in.fields(), in.getFieldInfos(), docMap);
+ }
+
+ @Override
+ public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+ BinaryDocValues oldDocValues = in.getBinaryDocValues(field);
+ if (oldDocValues == null) {
+ return null;
+ } else {
+ return new SortingBinaryDocValues(oldDocValues, docMap);
+ }
+ }
+
+ @Override
+ public Bits getLiveDocs() {
+ final Bits inLiveDocs = in.getLiveDocs();
+ if (inLiveDocs == null) {
+ return null;
+ } else {
+ return new SortingBits(inLiveDocs, docMap);
+ }
+ }
+
+ @Override
+ public PointValues getPointValues() {
+ final PointValues inPointValues = in.getPointValues();
+ if (inPointValues == null) {
+ return null;
+ } else {
+ return new SortingPointValues(inPointValues, docMap);
+ }
+ }
+
+ @Override
+ public NumericDocValues getNormValues(String field) throws IOException {
+ final NumericDocValues norm = in.getNormValues(field);
+ if (norm == null) {
+ return null;
+ } else {
+ return new SortingNumericDocValues(norm, docMap);
+ }
+ }
+
+ @Override
+ public NumericDocValues getNumericDocValues(String field) throws IOException {
+ final NumericDocValues oldDocValues = in.getNumericDocValues(field);
+ if (oldDocValues == null) return null;
+ return new SortingNumericDocValues(oldDocValues, docMap);
+ }
+
+ @Override
+ public SortedNumericDocValues getSortedNumericDocValues(String field)
+ throws IOException {
+ final SortedNumericDocValues oldDocValues = in.getSortedNumericDocValues(field);
+ if (oldDocValues == null) {
+ return null;
+ } else {
+ return new SortingSortedNumericDocValues(oldDocValues, docMap);
+ }
+ }
+
+ @Override
+ public SortedDocValues getSortedDocValues(String field) throws IOException {
+ SortedDocValues sortedDV = in.getSortedDocValues(field);
+ if (sortedDV == null) {
+ return null;
+ } else {
+ return new SortingSortedDocValues(sortedDV, docMap);
+ }
+ }
+
+ @Override
+ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field);
+ if (sortedSetDV == null) {
+ return null;
+ } else {
+ return new SortingSortedSetDocValues(sortedSetDV, docMap);
+ }
+ }
+
+ @Override
+ public Bits getDocsWithField(String field) throws IOException {
+ Bits bits = in.getDocsWithField(field);
+ if (bits == null || bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) {
+ return bits;
+ } else {
+ return new SortingBits(bits, docMap);
+ }
+ }
+
+ @Override
+ public Fields getTermVectors(final int docID) throws IOException {
+ return in.getTermVectors(docMap.newToOld(docID));
+ }
+
+ @Override
+ public String toString() {
+ return "SortingLeafReader(" + in + ")";
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java indexsort/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
--- trunk/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 2016-05-10 05:44:23.748471119 -0400
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.CollectionTerminatedException;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FilterCollector;
+import org.apache.lucene.search.FilterLeafCollector;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocsCollector;
+import org.apache.lucene.search.TotalHitCountCollector;
+
+/**
+ * A {@link Collector} that early terminates collection of documents on a
+ * per-segment basis, if the segment was sorted according to the given
+ * {@link Sort}.
+ *
+ * <p>
+ * <b>NOTE:</b> the {@code Collector} detects segments sorted according to a
+ * an {@link IndexWriterConfig#setIndexSort}. Also, it collects up to a specified
+ * {@code numDocsToCollect} from each segment, and therefore is mostly suitable
+ * for use in conjunction with collectors such as {@link TopDocsCollector}, and
+ * not e.g. {@link TotalHitCountCollector}.
+ * <p>
+ * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
+ * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
+ * will be correct. However the total of {@link TopDocsCollector#getTotalHits()
+ * hit count} will be vastly underestimated since not all matching documents will have
+ * been collected.
+ *
+ * @lucene.experimental
+ */
+
+public class EarlyTerminatingSortingCollector extends FilterCollector {
+
+ /** Returns whether collection can be early-terminated if it sorts with the
+ * provided {@link Sort} and if segments are merged with the provided
+ * {@link Sort}. */
+ public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) {
+ final SortField[] fields1 = searchSort.getSort();
+ final SortField[] fields2 = mergePolicySort.getSort();
+ // early termination is possible if fields1 is a prefix of fields2
+ if (fields1.length > fields2.length) {
+ return false;
+ }
+ return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
+ }
+
+ /** Sort used to sort the search results */
+ protected final Sort sort;
+ /** Number of documents to collect in each segment */
+ protected final int numDocsToCollect;
+ private final AtomicBoolean terminatedEarly = new AtomicBoolean(false);
+
+ /**
+ * Create a new {@link EarlyTerminatingSortingCollector} instance.
+ *
+ * @param in
+ * the collector to wrap
+ * @param sort
+ * the sort you are sorting the search results on
+ * @param numDocsToCollect
+ * the number of documents to collect on each segment. When wrapping
+ * a {@link TopDocsCollector}, this number should be the number of
+ * hits.
+ * @throws IllegalArgumentException if the sort order doesn't allow for early
+ * termination with the given merge policy.
+ */
+ public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
+ super(in);
+ if (numDocsToCollect <= 0) {
+ throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
+ }
+ this.sort = sort;
+ this.numDocsToCollect = numDocsToCollect;
+ }
+
+ @Override
+ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+ Sort segmentSort = context.reader().getIndexSort();
+ if (segmentSort != null && canEarlyTerminate(sort, segmentSort) == false) {
+ throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + segmentSort);
+ }
+
+ if (segmentSort != null) {
+ // segment is sorted, can early-terminate
+ return new FilterLeafCollector(super.getLeafCollector(context)) {
+ private int numCollected;
+
+ @Override
+ public void collect(int doc) throws IOException {
+ super.collect(doc);
+ if (++numCollected >= numDocsToCollect) {
+ terminatedEarly.set(true);
+ throw new CollectionTerminatedException();
+ }
+ }
+
+ };
+ } else {
+ return super.getLeafCollector(context);
+ }
+ }
+
+ public boolean terminatedEarly() {
+ return terminatedEarly.get();
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java indexsort/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
--- trunk/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java 2016-03-23 06:11:24.645189984 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java 2016-05-10 05:44:23.748471119 -0400
@@ -819,6 +819,7 @@
sumTotalTermFreq = terms.getSumTotalTermFreq();
sumDocFreq = terms.getSumDocFreq();
}
+
return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/SortField.java indexsort/lucene/core/src/java/org/apache/lucene/search/SortField.java
--- trunk/lucene/core/src/java/org/apache/lucene/search/SortField.java 2016-03-02 04:32:40.439807336 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/search/SortField.java 2016-05-10 05:44:23.748471119 -0400
@@ -77,9 +77,6 @@
* uses ordinals to do the sorting. */
STRING_VAL,
- /** Sort use byte[] index values. */
- BYTES,
-
/** Force rewriting of SortField using {@link SortField#rewrite(IndexSearcher)}
* before it can be used for sorting */
REWRITEABLE
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/search/Sort.java indexsort/lucene/core/src/java/org/apache/lucene/search/Sort.java
--- trunk/lucene/core/src/java/org/apache/lucene/search/Sort.java 2016-02-16 11:18:34.677021815 -0500
+++ indexsort/lucene/core/src/java/org/apache/lucene/search/Sort.java 2016-05-10 05:44:23.748471119 -0400
@@ -147,6 +147,9 @@
* etc. Finally, if there is still a tie after all SortFields
* are checked, the internal Lucene docid is used to break it. */
public void setSort(SortField... fields) {
+ if (fields.length == 0) {
+ throw new IllegalArgumentException("There must be at least 1 sort field");
+ }
this.fields = fields;
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java indexsort/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
--- trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java 2016-04-24 06:00:46.369895938 -0400
+++ indexsort/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java 2016-05-10 05:44:23.748471119 -0400
@@ -299,9 +299,6 @@
final BKDReader.IntersectState state;
final MergeState.DocMap docMap;
- /** Base offset for all our docIDs */
- final int docIDBase;
-
/** Current doc ID */
public int docID;
@@ -314,7 +311,7 @@
/** Which leaf block we are up to */
private int blockID;
- public MergeReader(BKDReader bkd, MergeState.DocMap docMap, int docIDBase) throws IOException {
+ public MergeReader(BKDReader bkd, MergeState.DocMap docMap) throws IOException {
this.bkd = bkd;
state = new BKDReader.IntersectState(bkd.in.clone(),
bkd.numDims,
@@ -322,7 +319,6 @@
bkd.maxPointsInLeafNode,
null);
this.docMap = docMap;
- this.docIDBase = docIDBase;
long minFP = Long.MAX_VALUE;
//System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
for(long fp : bkd.leafBlockFPs) {
@@ -396,14 +392,14 @@
}
// Tie break by sorting smaller docIDs earlier:
- return a.docIDBase < b.docIDBase;
+ return a.docID < b.docID;
}
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
- public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers, List<Integer> docIDBases) throws IOException {
+ public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
}
@@ -411,8 +407,6 @@
throw new IllegalStateException("cannot mix add and merge");
}
- //System.out.println("BKDW.merge segs=" + readers.size());
-
// Catch user silliness:
if (heapPointWriter == null && tempInput == null) {
throw new IllegalStateException("already finished");
@@ -433,7 +427,7 @@
} else {
docMap = docMaps.get(i);
}
- MergeReader reader = new MergeReader(bkd, docMap, docIDBases.get(i));
+ MergeReader reader = new MergeReader(bkd, docMap);
if (reader.next()) {
queue.add(reader);
}
@@ -468,7 +462,7 @@
// System.out.println("iter reader=" + reader);
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
- int docID = reader.docIDBase + reader.docID;
+ int docID = reader.docID;
leafBlockDocIDs[leafCount] = docID;
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
docsSeen.set(docID);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec indexsort/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
--- trunk/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-01-24 13:09:49.940989953 -0500
+++ indexsort/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec 2016-05-10 05:44:23.748471119 -0400
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene60.Lucene60Codec
+org.apache.lucene.codecs.lucene62.Lucene62Codec
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java
--- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java 2016-02-16 11:18:34.701021815 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.lucene50;
-
-
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
-import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.Version;
-
-/**
- * Tests Lucene50SegmentInfoFormat
- */
-public class TestLucene50SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
-
- @Override
- protected Version[] getVersions() {
- return new Version[] { Version.LATEST };
- }
-
- @Override
- protected Codec getCodec() {
- return TestUtil.getDefaultCodec();
- }
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
--- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java 2016-03-02 04:32:40.439807336 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java 2016-05-10 05:44:23.748471119 -0400
@@ -19,7 +19,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
@@ -33,7 +33,7 @@
public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene60Codec(Mode.BEST_COMPRESSION);
+ return new Lucene62Codec(Mode.BEST_COMPRESSION);
}
/**
@@ -44,7 +44,7 @@
Directory dir = newDirectory();
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(new Lucene60Codec(RandomPicks.randomFrom(random(), Mode.values())));
+ iwc.setCodec(new Lucene62Codec(RandomPicks.randomFrom(random(), Mode.values())));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -71,7 +71,7 @@
public void testInvalidOptions() throws Exception {
expectThrows(NullPointerException.class, () -> {
- new Lucene60Codec(null);
+ new Lucene62Codec(null);
});
expectThrows(NullPointerException.class, () -> {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java
--- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java 2016-02-16 11:18:34.701021815 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java 2016-05-10 05:44:23.748471119 -0400
@@ -18,14 +18,14 @@
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.index.BaseNormsFormatTestCase;
/**
* Tests Lucene53NormsFormat
*/
public class TestLucene53NormsFormat extends BaseNormsFormatTestCase {
- private final Codec codec = new Lucene60Codec();
+ private final Codec codec = new Lucene62Codec();
@Override
protected Codec getCodec() {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java
--- trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java 2016-05-10 05:44:23.748471119 -0400
@@ -0,0 +1,39 @@
+package org.apache.lucene.codecs.lucene62;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.Version;
+
+/**
+ * Tests Lucene62SegmentInfoFormat
+ */
+public class TestLucene62SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
+
+ @Override
+ protected Version[] getVersions() {
+ return new Version[] { Version.LATEST };
+ }
+
+ @Override
+ protected Codec getCodec() {
+ return TestUtil.getDefaultCodec();
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java 2016-04-24 06:00:27.689895636 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java 2016-05-10 05:44:23.748471119 -0400
@@ -24,8 +24,6 @@
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.search.IndexSearcher;
@@ -143,6 +141,6 @@
}
private static Codec getCodec() {
- return Codec.forName("Lucene60");
+ return Codec.forName("Lucene62");
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java 2016-02-16 11:18:34.705021816 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java 2016-05-10 05:44:23.752471119 -0400
@@ -53,7 +53,7 @@
// disk (but, should run successfully). Best to run w/
// -Dtests.codec=<current codec>, and w/ plenty of RAM, eg:
//
-// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene60 -Dtestcase=Test2BTerms
+// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene62 -Dtestcase=Test2BTerms
//
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
@Monster("very slow, use 5g minimum heap")
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java 2016-03-02 04:32:40.443807336 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java 2016-05-10 05:44:23.752471119 -0400
@@ -39,6 +39,8 @@
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
@@ -1281,4 +1283,53 @@
w2.close();
IOUtils.close(src, dest);
}
+
+ public void testIllegalIndexSortChange1() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.commit();
+ // so the index sort is in fact burned into the index:
+ w1.forceMerge(1);
+ w1.close();
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
+ RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
+ String message = expectThrows(IllegalArgumentException.class, () -> {
+ w2.addIndexes(dir1);
+ }).getMessage();
+ assertEquals("cannot change index sort from <int: \"foo\"> to <string: \"foo\">", message);
+ IOUtils.close(dir1, w2, dir2);
+ }
+
+ public void testIllegalIndexSortChange2() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.commit();
+ // so the index sort is in fact burned into the index:
+ w1.forceMerge(1);
+ w1.close();
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
+ RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
+ IndexReader r1 = DirectoryReader.open(dir1);
+ String message = expectThrows(IllegalArgumentException.class, () -> {
+ w2.addIndexes((SegmentReader) getOnlyLeafReader(r1));
+ }).getMessage();
+ assertEquals("cannot change index sort from <int: \"foo\"> to <string: \"foo\">", message);
+ IOUtils.close(r1, dir1, w2, dir2);
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java 2016-02-16 11:18:34.705021816 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java 2016-05-10 05:44:23.752471119 -0400
@@ -222,7 +222,7 @@
final FieldInfos fieldInfos = builder.finish();
final Directory dir = newDirectory();
Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
this.write(si, fieldInfos, dir, fields);
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
@@ -279,7 +279,7 @@
}
Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
this.write(si, fieldInfos, dir, fields);
if (VERBOSE) {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java 2016-03-13 05:38:07.383183845 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java 2016-05-10 05:44:23.752471119 -0400
@@ -503,7 +503,7 @@
class ReindexingOneMerge extends OneMerge {
- List<LeafReader> parallelReaders;
+ final List<ParallelLeafReader> parallelReaders = new ArrayList<>();
final long schemaGen;
ReindexingOneMerge(List<SegmentCommitInfo> segments) {
@@ -519,33 +519,23 @@
}
@Override
- public List<CodecReader> getMergeReaders() throws IOException {
- if (parallelReaders == null) {
- parallelReaders = new ArrayList<>();
- for (CodecReader reader : super.getMergeReaders()) {
- parallelReaders.add(getCurrentReader((SegmentReader)reader, schemaGen));
- }
- }
-
- // TODO: fix ParallelLeafReader, if this is a good use case
- List<CodecReader> mergeReaders = new ArrayList<>();
- for (LeafReader reader : parallelReaders) {
- mergeReaders.add(SlowCodecReaderWrapper.wrap(reader));
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+ LeafReader wrapped = getCurrentReader((SegmentReader)reader, schemaGen);
+ if (wrapped instanceof ParallelLeafReader) {
+ parallelReaders.add((ParallelLeafReader) wrapped);
}
- return mergeReaders;
+ return SlowCodecReaderWrapper.wrap(wrapped);
}
@Override
public void mergeFinished() throws IOException {
Throwable th = null;
- for(LeafReader r : parallelReaders) {
- if (r instanceof ParallelLeafReader) {
- try {
- r.decRef();
- } catch (Throwable t) {
- if (th == null) {
- th = t;
- }
+ for (ParallelLeafReader r : parallelReaders) {
+ try {
+ r.decRef();
+ } catch (Throwable t) {
+ if (th == null) {
+ th = t;
}
}
}
@@ -561,10 +551,6 @@
super.setMergeInfo(info);
}
- @Override
- public MergePolicy.DocMap getDocMap(final MergeState mergeState) {
- return super.getDocMap(mergeState);
- }
}
class ReindexingMergeSpecification extends MergeSpecification {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java 2016-05-10 05:44:23.752471119 -0400
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+public class TestDocIDMerger extends LuceneTestCase {
+
+ private static class TestSubUnsorted extends DocIDMerger.Sub {
+ private int docID = -1;
+ final int valueStart;
+ final int maxDoc;
+
+ public TestSubUnsorted(MergeState.DocMap docMap, int maxDoc, int valueStart) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.valueStart = valueStart;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+
+ public int getValue() {
+ return valueStart + docID;
+ }
+ }
+
+ public void testNoSort() throws Exception {
+
+ int subCount = TestUtil.nextInt(random(), 1, 20);
+ List<TestSubUnsorted> subs = new ArrayList<>();
+ int valueStart = 0;
+ for(int i=0;i<subCount;i++) {
+ int maxDoc = TestUtil.nextInt(random(), 1, 1000);
+ final int docBase = valueStart;
+ subs.add(new TestSubUnsorted(new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ return docBase + docID;
+ }
+ }, maxDoc, valueStart));
+ valueStart += maxDoc;
+ }
+
+ DocIDMerger<TestSubUnsorted> merger = new DocIDMerger<>(subs, false);
+
+ int count = 0;
+ while (true) {
+ TestSubUnsorted sub = merger.next();
+ if (sub == null) {
+ break;
+ }
+ assertEquals(count, sub.mappedDocID);
+ assertEquals(count, sub.getValue());
+ count++;
+ }
+
+ assertEquals(valueStart, count);
+ }
+
+ private static class TestSubSorted extends DocIDMerger.Sub {
+ private int docID = -1;
+ final int maxDoc;
+ final int index;
+
+ public TestSubSorted(MergeState.DocMap docMap, int maxDoc, int index) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.index = index;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "TestSubSorted(index=" + index + ", mappedDocID=" + mappedDocID+ ")";
+ }
+ }
+
+ public void testWithSort() throws Exception {
+
+ int subCount = TestUtil.nextInt(random(), 1, 20);
+ List<int[]> oldToNew = new ArrayList<>();
+ // how many docs we've written to each sub:
+ List<Integer> uptos = new ArrayList<>();
+ int totDocCount = 0;
+ for(int i=0;i<subCount;i++) {
+ int maxDoc = TestUtil.nextInt(random(), 1, 1000);
+ uptos.add(0);
+ oldToNew.add(new int[maxDoc]);
+ totDocCount += maxDoc;
+ }
+
+ List<int[]> completedSubs = new ArrayList<>();
+
+ // randomly distribute target docIDs into the segments:
+ for(int docID=0;docID<totDocCount;docID++) {
+ int sub = random().nextInt(oldToNew.size());
+ int upto = uptos.get(sub);
+ int[] subDocs = oldToNew.get(sub);
+ subDocs[upto] = docID;
+ upto++;
+ if (upto == subDocs.length) {
+ completedSubs.add(subDocs);
+ oldToNew.remove(sub);
+ uptos.remove(sub);
+ } else {
+ uptos.set(sub, upto);
+ }
+ }
+ assertEquals(0, oldToNew.size());
+
+ // sometimes do some deletions:
+ final FixedBitSet liveDocs;
+ if (random().nextBoolean()) {
+ liveDocs = new FixedBitSet(totDocCount);
+ liveDocs.set(0, totDocCount);
+ int deleteAttemptCount = TestUtil.nextInt(random(), 1, totDocCount);
+ for(int i=0;i<deleteAttemptCount;i++) {
+ liveDocs.clear(random().nextInt(totDocCount));
+ }
+ } else {
+ liveDocs = null;
+ }
+
+ List<TestSubSorted> subs = new ArrayList<>();
+ for(int i=0;i<subCount;i++) {
+ final int[] docMap = completedSubs.get(i);
+ subs.add(new TestSubSorted(new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ int mapped = docMap[docID];
+ if (liveDocs == null || liveDocs.get(mapped)) {
+ return mapped;
+ } else {
+ return -1;
+ }
+ }
+ }, docMap.length, i));
+ }
+
+ DocIDMerger<TestSubSorted> merger = new DocIDMerger<>(subs, true);
+
+ int count = 0;
+ while (true) {
+ TestSubSorted sub = merger.next();
+ if (sub == null) {
+ break;
+ }
+ if (liveDocs != null) {
+ count = liveDocs.nextSetBit(count);
+ }
+ assertEquals(count, sub.mappedDocID);
+ count++;
+ }
+
+ if (liveDocs != null) {
+ if (count < totDocCount) {
+ assertEquals(NO_MORE_DOCS, liveDocs.nextSetBit(count));
+ } else {
+ assertEquals(totDocCount, count);
+ }
+ } else {
+ assertEquals(totDocCount, count);
+ }
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java 2016-02-16 11:18:34.705021816 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestDoc.java 2016-05-10 05:44:23.752471119 -0400
@@ -218,7 +218,7 @@
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
- final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java 2016-05-10 05:44:23.752471119 -0400
@@ -0,0 +1,1358 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.BinaryPoint;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatDocValuesField;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.EarlyTerminatingSortingCollector;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.TestUtil;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestIndexSorting extends LuceneTestCase {
+
+ public void testBasicString() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.STRING));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("aaa")));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals("aaa", values.get(0).utf8ToString());
+ assertEquals("mmm", values.get(1).utf8ToString());
+ assertEquals("zzz", values.get(2).utf8ToString());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingStringFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.STRING);
+ sortField.setMissingValue(SortField.STRING_FIRST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals(-1, values.getOrd(0));
+ assertEquals("mmm", values.get(1).utf8ToString());
+ assertEquals("zzz", values.get(2).utf8ToString());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingStringLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.STRING);
+ sortField.setMissingValue(SortField.STRING_LAST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals("mmm", values.get(0).utf8ToString());
+ assertEquals("zzz", values.get(1).utf8ToString());
+ assertEquals(-1, values.getOrd(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicLong() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingLongFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingLongLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertEquals(0, values.get(2));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicInt() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertEquals(0, values.get(2));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicDouble() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", -1.0));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingDoubleFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingDoubleLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.POSITIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicFloat() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", -1.0f));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingFloatFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingFloatLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.POSITIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testRandom1() throws IOException {
+ boolean withDeletes = random().nextBoolean();
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ final int numDocs = atLeast(1000);
+ final FixedBitSet deleted = new FixedBitSet(numDocs);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
+ doc.add(new StringField("id", Integer.toString(i), Store.YES));
+ doc.add(new NumericDocValuesField("id", i));
+ w.addDocument(doc);
+ if (random().nextInt(5) == 0) {
+ w.getReader().close();
+ } else if (random().nextInt(30) == 0) {
+ w.forceMerge(2);
+ } else if (random().nextInt(4) == 0) {
+ final int id = TestUtil.nextInt(random(), 0, i);
+ deleted.set(id);
+ w.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+ }
+
+ // Check that segments are sorted
+ DirectoryReader reader = w.getReader();
+ for (LeafReaderContext ctx : reader.leaves()) {
+ final SegmentReader leaf = (SegmentReader) ctx.reader();
+ SegmentInfo info = leaf.getSegmentInfo().info;
+ switch (info.getDiagnostics().get(IndexWriter.SOURCE)) {
+ case IndexWriter.SOURCE_FLUSH:
+ assertNull(info.getIndexSort());
+ break;
+ case IndexWriter.SOURCE_MERGE:
+ assertEquals(indexSort, info.getIndexSort());
+ final NumericDocValues values = leaf.getNumericDocValues("foo");
+ long previous = Long.MIN_VALUE;
+ for (int i = 0; i < leaf.maxDoc(); ++i) {
+ final long value = values.get(i);
+ assertTrue(value >= previous);
+ previous = value;
+ }
+ break;
+ default:
+ fail();
+ }
+ }
+
+ // Now check that the index is consistent
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
+ final TopDocs topDocs = searcher.search(termQuery, 1);
+ if (deleted.get(i)) {
+ assertEquals(0, topDocs.totalHits);
+ } else {
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc));
+ Document document = reader.document(topDocs.scoreDocs[0].doc);
+ assertEquals(Integer.toString(i), document.get("id"));
+ }
+ }
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ static class UpdateRunnable implements Runnable {
+
+ private final int numDocs;
+ private final Random random;
+ private final AtomicInteger updateCount;
+ private final IndexWriter w;
+ private final Map<Integer, Long> values;
+ private final CountDownLatch latch;
+
+ UpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map<Integer, Long> values) {
+ this.numDocs = numDocs;
+ this.random = random;
+ this.latch = latch;
+ this.updateCount = updateCount;
+ this.w = w;
+ this.values = values;
+ }
+
+ @Override
+ public void run() {
+ try {
+ latch.await();
+ while (updateCount.decrementAndGet() >= 0) {
+ final int id = random.nextInt(numDocs);
+ final long value = random.nextInt(20);
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(id), Store.NO));
+ doc.add(new NumericDocValuesField("foo", value));
+
+ synchronized (values) {
+ w.updateDocument(new Term("id", Integer.toString(id)), doc);
+ values.put(id, value);
+ }
+
+ switch (random.nextInt(10)) {
+ case 0:
+ case 1:
+ // reopen
+ DirectoryReader.open(w).close();
+ break;
+ case 2:
+ w.forceMerge(3);
+ break;
+ }
+ }
+ } catch (IOException | InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+ // There is tricky logic to resolve deletes that happened while merging
+ public void testConcurrentUpdates() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Map<Integer, Long> values = new HashMap<>();
+
+ final int numDocs = atLeast(100);
+ Thread[] threads = new Thread[2];
+ final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
+ final CountDownLatch latch = new CountDownLatch(1);
+ for (int i = 0; i < threads.length; ++i) {
+ Random r = new Random(random().nextLong());
+ threads[i] = new Thread(new UpdateRunnable(numDocs, r, latch, updateCount, w, values));
+ }
+ for (Thread thread : threads) {
+ thread.start();
+ }
+ latch.countDown();
+ for (Thread thread : threads) {
+ thread.join();
+ }
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
+ if (values.containsKey(i) == false) {
+ assertEquals(0, topDocs.totalHits);
+ } else {
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc));
+ }
+ }
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ static class DVUpdateRunnable implements Runnable {
+
+ private final int numDocs;
+ private final Random random;
+ private final AtomicInteger updateCount;
+ private final IndexWriter w;
+ private final Map<Integer, Long> values;
+ private final CountDownLatch latch;
+
+ DVUpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map<Integer, Long> values) {
+ this.numDocs = numDocs;
+ this.random = random;
+ this.latch = latch;
+ this.updateCount = updateCount;
+ this.w = w;
+ this.values = values;
+ }
+
+ @Override
+ public void run() {
+ try {
+ latch.await();
+ while (updateCount.decrementAndGet() >= 0) {
+ final int id = random.nextInt(numDocs);
+ final long value = random.nextInt(20);
+
+ synchronized (values) {
+ w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value));
+ values.put(id, value);
+ }
+
+ switch (random.nextInt(10)) {
+ case 0:
+ case 1:
+ // reopen
+ DirectoryReader.open(w).close();
+ break;
+ case 2:
+ w.forceMerge(3);
+ break;
+ }
+ }
+ } catch (IOException | InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+ // There is tricky logic to resolve dv updates that happened while merging
+ public void testConcurrentDVUpdates() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Map<Integer, Long> values = new HashMap<>();
+
+ final int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(i), Store.NO));
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ values.put(i, -1L);
+ }
+ Thread[] threads = new Thread[2];
+ final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
+ final CountDownLatch latch = new CountDownLatch(1);
+ for (int i = 0; i < threads.length; ++i) {
+ Random r = new Random(random().nextLong());
+ threads[i] = new Thread(new DVUpdateRunnable(numDocs, r, latch, updateCount, w, values));
+ }
+ for (Thread thread : threads) {
+ thread.start();
+ }
+ latch.countDown();
+ for (Thread thread : threads) {
+ thread.join();
+ }
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc));
+ }
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception {
+ Directory dir = newDirectory();
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ IndexWriterConfig iwc1 = newIndexWriterConfig();
+ if (random().nextBoolean()) {
+ iwc1.setIndexSort(indexSort);
+ }
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ final int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(i), Store.NO));
+ doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
+ w.addDocument(doc);
+ }
+ if (withDeletes) {
+ for (int i = random().nextInt(5); i < numDocs; i += TestUtil.nextInt(random(), 1, 5)) {
+ w.deleteDocuments(new Term("id", Integer.toString(i)));
+ }
+ }
+ if (random().nextBoolean()) {
+ w.forceMerge(1);
+ }
+ final IndexReader reader = w.getReader();
+ w.close();
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w2 = new IndexWriter(dir2, iwc);
+
+ if (useReaders) {
+ CodecReader[] codecReaders = new CodecReader[reader.leaves().size()];
+ for (int i = 0; i < codecReaders.length; ++i) {
+ codecReaders[i] = (CodecReader) reader.leaves().get(i).reader();
+ }
+ w2.addIndexes(codecReaders);
+ } else {
+ w2.addIndexes(dir);
+ }
+ final IndexReader reader2 = w2.getReader();
+ final IndexSearcher searcher = newSearcher(reader);
+ final IndexSearcher searcher2 = newSearcher(reader2);
+ for (int i = 0; i < numDocs; ++i) {
+ Query query = new TermQuery(new Term("id", Integer.toString(i)));
+ final TopDocs topDocs = searcher.search(query, 1);
+ final TopDocs topDocs2 = searcher2.search(query, 1);
+ assertEquals(topDocs.totalHits, topDocs2.totalHits);
+ if (topDocs.totalHits == 1) {
+ assertEquals(
+ MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc),
+ MultiDocValues.getNumericValues(reader2, "foo").get(topDocs2.scoreDocs[0].doc));
+ }
+ }
+
+ IOUtils.close(reader, reader2, w2, dir, dir2);
+ }
+
+ public void testAddIndexes() throws Exception {
+ testAddIndexes(false, true);
+ }
+
+ public void testAddIndexesWithDeletions() throws Exception {
+ testAddIndexes(true, true);
+ }
+
+ public void testAddIndexesWithDirectory() throws Exception {
+ testAddIndexes(false, false);
+ }
+
+ public void testAddIndexesWithDeletionsAndDirectory() throws Exception {
+ testAddIndexes(true, false);
+ }
+
+ public void testBadSort() throws Exception {
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ iwc.setIndexSort(Sort.RELEVANCE);
+ });
+ assertEquals("invalid SortField type: must be one of [STRING, INT, FLOAT, LONG, DOUBLE] but got: <score>", expected.getMessage());
+ }
+
+ // you can't change the index sort on an existing index:
+ public void testIllegalChangeSort() throws Exception {
+ final Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG)));
+ IndexWriter w = new IndexWriter(dir, iwc);
+ w.addDocument(new Document());
+ DirectoryReader.open(w).close();
+ w.addDocument(new Document());
+ w.forceMerge(1);
+ w.close();
+
+ final IndexWriterConfig iwc2 = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.LONG)));
+ IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
+ new IndexWriter(dir, iwc2);
+ });
+ String message = e.getMessage();
+ assertTrue(message.contains("cannot change previous indexSort=<long: \"foo\">"));
+ assertTrue(message.contains("to new indexSort=<long: \"bar\">"));
+ dir.close();
+ }
+
+ static final class NormsSimilarity extends Similarity {
+
+ private final Similarity in;
+
+ public NormsSimilarity(Similarity in) {
+ this.in = in;
+ }
+
+ @Override
+ public long computeNorm(FieldInvertState state) {
+ if (state.getName().equals("norms")) {
+ return Float.floatToIntBits(state.getBoost());
+ } else {
+ return in.computeNorm(state);
+ }
+ }
+
+ @Override
+ public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
+ return in.computeWeight(collectionStats, termStats);
+ }
+
+ @Override
+ public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+ return in.simScorer(weight, context);
+ }
+
+ }
+
+ static final class PositionsTokenStream extends TokenStream {
+
+ private final CharTermAttribute term;
+ private final PayloadAttribute payload;
+ private final OffsetAttribute offset;
+
+ private int pos, off;
+
+ public PositionsTokenStream() {
+ term = addAttribute(CharTermAttribute.class);
+ payload = addAttribute(PayloadAttribute.class);
+ offset = addAttribute(OffsetAttribute.class);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (pos == 0) {
+ return false;
+ }
+
+ clearAttributes();
+ term.append("#all#");
+ payload.setPayload(new BytesRef(Integer.toString(pos)));
+ offset.setOffset(off, off);
+ --pos;
+ ++off;
+ return true;
+ }
+
+ void setId(int id) {
+ pos = id / 10 + 1;
+ off = 0;
+ }
+ }
+
+ public void testRandom2() throws Exception {
+ int numDocs = atLeast(100);
+
+ FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
+ POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ POSITIONS_TYPE.freeze();
+
+ FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
+ TERM_VECTORS_TYPE.setStoreTermVectors(true);
+ TERM_VECTORS_TYPE.freeze();
+
+ List<Document> docs = new ArrayList<>();
+ for (int i=0;i<numDocs;i++) {
+ int id = i * 10;
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(id), Store.YES));
+ doc.add(new StringField("docs", "#all#", Store.NO));
+ PositionsTokenStream positions = new PositionsTokenStream();
+ positions.setId(id);
+ doc.add(new Field("positions", positions, POSITIONS_TYPE));
+ doc.add(new NumericDocValuesField("numeric", id));
+ TextField norms = new TextField("norms", Integer.toString(id), Store.NO);
+ norms.setBoost(Float.intBitsToFloat(id));
+ doc.add(norms);
+ doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id + 1))));
+ doc.add(new SortedNumericDocValuesField("sorted_numeric", id));
+ doc.add(new SortedNumericDocValuesField("sorted_numeric", id + 1));
+ doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
+ byte[] bytes = new byte[4];
+ NumericUtils.intToSortableBytes(id, bytes, 0);
+ doc.add(new BinaryPoint("points", bytes));
+ docs.add(doc);
+ }
+
+ // Must use the same seed for both RandomIndexWriters so they behave identically
+ long seed = random().nextLong();
+
+ // We add document alread in ID order for the first writer:
+ Directory dir1 = newFSDirectory(createTempDir());
+
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
+ // preserve docIDs
+ iwc1.setMergePolicy(newLogMergePolicy());
+ if (VERBOSE) {
+ System.out.println("TEST: now index pre-sorted");
+ }
+ RandomIndexWriter w1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
+ for(Document doc : docs) {
+ ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
+ w1.addDocument(doc);
+ }
+
+ // We shuffle documents, but set index sort, for the second writer:
+ Directory dir2 = newFSDirectory(createTempDir());
+
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
+
+ Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
+ iwc2.setIndexSort(sort);
+
+ Collections.shuffle(docs, random());
+ if (VERBOSE) {
+ System.out.println("TEST: now index with index-time sorting");
+ }
+ RandomIndexWriter w2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
+ int count = 0;
+ int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1);
+ for(Document doc : docs) {
+ ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
+ if (count++ == commitAtCount) {
+ // Ensure forceMerge really does merge
+ w2.commit();
+ }
+ w2.addDocument(doc);
+ }
+ w2.forceMerge(1);
+
+ DirectoryReader r1 = w1.getReader();
+ DirectoryReader r2 = w2.getReader();
+ assertEquals(sort, getOnlyLeafReader(r2).getIndexSort());
+ assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
+ IOUtils.close(w1, w2, r1, r2, dir1, dir2);
+ }
+
+ private static final class RandomDoc {
+ public final int id;
+ public final int intValue;
+ public final long longValue;
+ public final float floatValue;
+ public final double doubleValue;
+ public final byte[] bytesValue;
+
+ public RandomDoc(int id) {
+ this.id = id;
+ intValue = random().nextInt();
+ longValue = random().nextLong();
+ floatValue = random().nextFloat();
+ doubleValue = random().nextDouble();
+ bytesValue = new byte[TestUtil.nextInt(random(), 1, 50)];
+ random().nextBytes(bytesValue);
+ }
+ }
+
+ private static Sort randomSort() {
+ int numFields = TestUtil.nextInt(random(), 1, 3);
+ SortField[] sortFields = new SortField[numFields];
+ for(int i=0;i<numFields-1;i++) {
+ boolean reversed = random().nextBoolean();
+ SortField sortField;
+ switch(random().nextInt(5)) {
+ case 0:
+ sortField = new SortField("int", SortField.Type.INT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextInt());
+ }
+ break;
+ case 1:
+ sortField = new SortField("long", SortField.Type.LONG, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextLong());
+ }
+ break;
+ case 2:
+ sortField = new SortField("float", SortField.Type.FLOAT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextFloat());
+ }
+ break;
+ case 3:
+ sortField = new SortField("double", SortField.Type.DOUBLE, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextDouble());
+ }
+ break;
+ case 4:
+ sortField = new SortField("bytes", SortField.Type.STRING, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(SortField.STRING_LAST);
+ }
+ break;
+ default:
+ throw new AssertionError();
+ }
+ sortFields[i] = sortField;
+ }
+
+ // tie-break by id:
+ sortFields[numFields-1] = new SortField("id", SortField.Type.INT);
+
+ return new Sort(sortFields);
+ }
+
+ // pits index time sorting against query time sorting
+ public void testRandom3() throws Exception {
+ int numDocs;
+ if (TEST_NIGHTLY) {
+ numDocs = atLeast(100000);
+ } else {
+ numDocs = atLeast(10000);
+ }
+ List<RandomDoc> docs = new ArrayList<>();
+
+ Sort sort = randomSort();
+ if (VERBOSE) {
+ System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort);
+ }
+
+ // no index sorting, all search-time sorting:
+ Directory dir1 = newFSDirectory(createTempDir());
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w1 = new IndexWriter(dir1, iwc1);
+
+ // use index sorting:
+ Directory dir2 = newFSDirectory(createTempDir());
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(sort);
+ IndexWriter w2 = new IndexWriter(dir2, iwc2);
+
+ Set<Integer> toDelete = new HashSet<>();
+
+ double deleteChance = random().nextDouble();
+
+ for(int id=0;id<numDocs;id++) {
+ RandomDoc docValues = new RandomDoc(id);
+ docs.add(docValues);
+ if (VERBOSE) {
+ System.out.println("TEST: doc id=" + id);
+ System.out.println(" int=" + docValues.intValue);
+ System.out.println(" long=" + docValues.longValue);
+ System.out.println(" float=" + docValues.floatValue);
+ System.out.println(" double=" + docValues.doubleValue);
+ System.out.println(" bytes=" + new BytesRef(docValues.bytesValue));
+ }
+
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
+ doc.add(new NumericDocValuesField("id", id));
+ doc.add(new NumericDocValuesField("int", docValues.intValue));
+ doc.add(new NumericDocValuesField("long", docValues.longValue));
+ doc.add(new DoubleDocValuesField("double", docValues.doubleValue));
+ doc.add(new FloatDocValuesField("float", docValues.floatValue));
+ doc.add(new SortedDocValuesField("bytes", new BytesRef(docValues.bytesValue)));
+ w1.addDocument(doc);
+ w2.addDocument(doc);
+ if (random().nextDouble() < deleteChance) {
+ toDelete.add(id);
+ }
+ }
+ for(int id : toDelete) {
+ w1.deleteDocuments(new Term("id", Integer.toString(id)));
+ w2.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+ DirectoryReader r1 = DirectoryReader.open(w1);
+ IndexSearcher s1 = newSearcher(r1);
+
+ if (random().nextBoolean()) {
+ int maxSegmentCount = TestUtil.nextInt(random(), 1, 5);
+ if (VERBOSE) {
+ System.out.println("TEST: now forceMerge(" + maxSegmentCount + ")");
+ }
+ w2.forceMerge(maxSegmentCount);
+ }
+
+ DirectoryReader r2 = DirectoryReader.open(w2);
+ IndexSearcher s2 = newSearcher(r2);
+
+ /*
+ System.out.println("TEST: full index:");
+ SortedDocValues docValues = MultiDocValues.getSortedValues(r2, "bytes");
+ for(int i=0;i<r2.maxDoc();i++) {
+ System.out.println(" doc " + i + " id=" + r2.document(i).get("id") + " bytes=" + docValues.get(i));
+ }
+ */
+
+ for(int iter=0;iter<100;iter++) {
+ int numHits = TestUtil.nextInt(random(), 1, numDocs);
+ if (VERBOSE) {
+ System.out.println("TEST: iter=" + iter + " numHits=" + numHits);
+ }
+
+ TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true);
+ s1.search(new MatchAllDocsQuery(), c1);
+ TopDocs hits1 = c1.topDocs();
+
+ TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true);
+ EarlyTerminatingSortingCollector c3 = new EarlyTerminatingSortingCollector(c2, sort, numHits);
+ s2.search(new MatchAllDocsQuery(), c3);
+
+ TopDocs hits2 = c2.topDocs();
+
+ if (VERBOSE) {
+ System.out.println(" topDocs query-time sort: totalHits=" + hits1.totalHits);
+ for(ScoreDoc scoreDoc : hits1.scoreDocs) {
+ System.out.println(" " + scoreDoc.doc);
+ }
+ System.out.println(" topDocs index-time sort: totalHits=" + hits2.totalHits);
+ for(ScoreDoc scoreDoc : hits2.scoreDocs) {
+ System.out.println(" " + scoreDoc.doc);
+ }
+ }
+
+ assertTrue(hits2.totalHits <= hits1.totalHits);
+ assertEquals(hits2.scoreDocs.length, hits1.scoreDocs.length);
+ for(int i=0;i<hits2.scoreDocs.length;i++) {
+ ScoreDoc hit1 = hits1.scoreDocs[i];
+ ScoreDoc hit2 = hits2.scoreDocs[i];
+ assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id"));
+ assertEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
+ }
+ }
+
+ IOUtils.close(r1, r2, w1, w2, dir1, dir2);
+ }
+
+ public void testTieBreak() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
+ iwc.setMergePolicy(newLogMergePolicy());
+ IndexWriter w = new IndexWriter(dir, iwc);
+ for(int id=0;id<1000;id++) {
+ Document doc = new Document();
+ doc.add(new StoredField("id", id));
+ String value;
+ if (id < 500) {
+ value = "bar2";
+ } else {
+ value = "bar1";
+ }
+ doc.add(new SortedDocValuesField("foo", new BytesRef(value)));
+ w.addDocument(doc);
+ if (id == 500) {
+ w.commit();
+ }
+ }
+ w.forceMerge(1);
+ DirectoryReader r = DirectoryReader.open(w);
+ for(int docID=0;docID<1000;docID++) {
+ int expectedID;
+ if (docID < 500) {
+ expectedID = 500 + docID;
+ } else {
+ expectedID = docID - 500;
+ }
+ assertEquals(expectedID, r.document(docID).getField("id").numericValue().intValue());
+ }
+ IOUtils.close(r, w, dir);
+ }
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java 2016-03-13 05:38:07.387183845 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java 2016-05-10 05:44:23.752471119 -0400
@@ -69,6 +69,8 @@
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BaseDirectoryWrapper;
@@ -2759,5 +2761,6 @@
w.close();
dir.close();
}
+
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java 2016-03-13 05:38:07.387183845 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestParallelLeafReader.java 2016-05-10 05:44:23.752471119 -0400
@@ -23,10 +23,11 @@
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@@ -314,4 +315,60 @@
return dir2;
}
+ // not ok to have one leaf w/ index sort and another with a different index sort
+ public void testWithIndexSort1() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ IndexWriter w1 = new IndexWriter(dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.forceMerge(1);
+ w1.close();
+ IndexReader r1 = DirectoryReader.open(dir1);
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.INT)));
+ IndexWriter w2 = new IndexWriter(dir2, iwc2);
+ w2.addDocument(new Document());
+ w2.commit();
+ w2.addDocument(new Document());
+ w2.forceMerge(1);
+ w2.close();
+ IndexReader r2 = DirectoryReader.open(dir2);
+
+ String message = expectThrows(IllegalArgumentException.class, () -> {
+ new ParallelLeafReader(getOnlyLeafReader(r1), getOnlyLeafReader(r2));
+ }).getMessage();
+ assertEquals("cannot combine LeafReaders that have different index sorts: saw both sort=<int: \"foo\"> and <int: \"bar\">", message);
+ IOUtils.close(r1, dir1, r2, dir2);
+ }
+
+ // ok to have one leaf w/ index sort and the other with no sort
+ public void testWithIndexSort2() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ IndexWriter w1 = new IndexWriter(dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.forceMerge(1);
+ w1.close();
+ IndexReader r1 = DirectoryReader.open(dir1);
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w2 = new IndexWriter(dir2, iwc2);
+ w2.addDocument(new Document());
+ w2.addDocument(new Document());
+ w2.close();
+
+ IndexReader r2 = DirectoryReader.open(dir2);
+ new ParallelLeafReader(false, getOnlyLeafReader(r1), getOnlyLeafReader(r2)).close();
+ new ParallelLeafReader(false, getOnlyLeafReader(r2), getOnlyLeafReader(r1)).close();
+ IOUtils.close(r1, dir1, r2, dir2);
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java 2016-05-03 07:31:51.560971608 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java 2016-05-10 05:44:23.752471119 -0400
@@ -394,11 +394,11 @@
dir.close();
}
- // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with SimpleText
+ // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with SimpleText
public void testDifferentCodecs1() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(Codec.forName("Lucene60"));
+ iwc.setCodec(Codec.forName("Lucene62"));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 1));
@@ -417,7 +417,7 @@
dir.close();
}
- // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with Lucene60
+ // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with Lucene60
public void testDifferentCodecs2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -429,7 +429,7 @@
w.close();
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(Codec.forName("Lucene60"));
+ iwc.setCodec(Codec.forName("Lucene62"));
w = new IndexWriter(dir, iwc);
doc = new Document();
doc.add(new IntPoint("int", 1));
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java 2016-05-03 07:38:23.468977947 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java 2016-05-10 05:44:23.752471119 -0400
@@ -51,7 +51,7 @@
SegmentInfos sis = new SegmentInfos();
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(),
- Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
+ Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
@@ -73,14 +73,14 @@
SegmentInfos sis = new SegmentInfos();
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(),
- Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
+ Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
sis.add(commitInfo);
info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_1", 1, false, Codec.getDefault(),
- Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
+ Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
--- trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java 2016-02-16 11:18:34.713021816 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java 2016-05-10 05:44:23.752471119 -0400
@@ -35,6 +35,7 @@
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.packed.PackedLongValues;
public class TestSegmentMerger extends LuceneTestCase {
//The variables for the new merged segment
@@ -83,7 +84,7 @@
public void testMerge() throws IOException {
final Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(reader1, reader2),
si, InfoStream.getDefault(), mergedDir,
@@ -144,22 +145,9 @@
mergedReader.close();
}
- private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) {
- if (map1.maxDoc() != map2.maxDoc()) {
- return false;
- }
- for (int i = 0; i < map1.maxDoc(); ++i) {
- if (map1.get(i) != map2.get(i)) {
- return false;
- }
- }
- return true;
- }
-
public void testBuildDocMap() {
final int maxDoc = TestUtil.nextInt(random(), 1, 128);
final int numDocs = TestUtil.nextInt(random(), 0, maxDoc);
- final int numDeletedDocs = maxDoc - numDocs;
final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
for (int i = 0; i < numDocs; ++i) {
while (true) {
@@ -171,15 +159,11 @@
}
}
- final MergeState.DocMap docMap = MergeState.DocMap.build(maxDoc, liveDocs);
+ final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs);
- assertEquals(maxDoc, docMap.maxDoc());
- assertEquals(numDocs, docMap.numDocs());
- assertEquals(numDeletedDocs, docMap.numDeletedDocs());
// assert the mapping is compact
for (int i = 0, del = 0; i < maxDoc; ++i) {
- if (!liveDocs.get(i)) {
- assertEquals(-1, docMap.get(i));
+ if (liveDocs.get(i) == false) {
++del;
} else {
assertEquals(i - del, docMap.get(i));
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java indexsort/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
--- trunk/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500
+++ indexsort/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 2016-05-10 05:44:23.752471119 -0400
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.ExitableDirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MockRandomMergePolicy;
+import org.apache.lucene.index.QueryTimeout;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SerialMergeScheduler;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+
+public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
+
+ private int numDocs;
+ private List<String> terms;
+ private Directory dir;
+ private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
+ private RandomIndexWriter iw;
+ private IndexReader reader;
+ private final int forceMergeMaxSegmentCount = 5;
+
+ private Document randomDocument() {
+ final Document doc = new Document();
+ doc.add(new NumericDocValuesField("ndv1", random().nextInt(10)));
+ doc.add(new NumericDocValuesField("ndv2", random().nextInt(10)));
+ doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
+ return doc;
+ }
+
+ private void createRandomIndex(boolean singleSortedSegment) throws IOException {
+ dir = newDirectory();
+ numDocs = atLeast(150);
+ final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
+ Set<String> randomTerms = new HashSet<>();
+ while (randomTerms.size() < numTerms) {
+ randomTerms.add(TestUtil.randomSimpleString(random()));
+ }
+ terms = new ArrayList<>(randomTerms);
+ final long seed = random().nextLong();
+ final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
+ if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
+ // MockRandomMP randomly wraps the leaf readers which makes merging angry
+ iwc.setMergePolicy(newTieredMergePolicy());
+ }
+ iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
+ iwc.setIndexSort(sort);
+ iw = new RandomIndexWriter(new Random(seed), dir, iwc);
+ iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
+ for (int i = 0; i < numDocs; ++i) {
+ final Document doc = randomDocument();
+ iw.addDocument(doc);
+ if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
+ iw.commit();
+ }
+ if (random().nextInt(15) == 0) {
+ final String term = RandomPicks.randomFrom(random(), terms);
+ iw.deleteDocuments(new Term("s", term));
+ }
+ }
+ if (singleSortedSegment) {
+ // because of deletions, there might still be a single flush segment in
+ // the index, although want want a sorted segment so it needs to be merged
+ iw.getReader().close(); // refresh
+ iw.addDocument(new Document());
+ iw.commit();
+ iw.addDocument(new Document());
+ iw.forceMerge(1);
+ }
+ else if (random().nextBoolean()) {
+ iw.forceMerge(forceMergeMaxSegmentCount);
+ }
+ reader = iw.getReader();
+ }
+
+ private void closeIndex() throws IOException {
+ reader.close();
+ iw.close();
+ dir.close();
+ }
+
+ public void testEarlyTermination() throws IOException {
+ final int iters = atLeast(8);
+ for (int i = 0; i < iters; ++i) {
+ createRandomIndex(false);
+ for (int j = 0; j < iters; ++j) {
+ final IndexSearcher searcher = newSearcher(reader);
+ final int numHits = TestUtil.nextInt(random(), 1, numDocs);
+ final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false));
+ final boolean fillFields = random().nextBoolean();
+ final boolean trackDocScores = random().nextBoolean();
+ final boolean trackMaxScore = random().nextBoolean();
+ final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
+ final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
+
+ final Query query;
+ if (random().nextBoolean()) {
+ query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
+ } else {
+ query = new MatchAllDocsQuery();
+ }
+ searcher.search(query, collector1);
+ searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
+ assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
+ assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
+ }
+ closeIndex();
+ }
+ }
+
+ public void testCanEarlyTerminate() {
+ assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG)),
+ new Sort(new SortField("a", SortField.Type.LONG))));
+
+ assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
+
+ assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG)),
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
+
+ assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG, true)),
+ new Sort(new SortField("a", SortField.Type.LONG, false))));
+
+ assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+ new Sort(new SortField("a", SortField.Type.LONG))));
+
+ assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
+
+ assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
+ new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+ new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
+ }
+
+ public void testEarlyTerminationDifferentSorter() throws IOException {
+ createRandomIndex(true);
+
+ Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false));
+ Collector c = new EarlyTerminatingSortingCollector(TopFieldCollector.create(sort, 10, true, true, true), sort, 10);
+ IndexSearcher searcher = newSearcher(reader);
+ Exception e = expectThrows(IllegalStateException.class,
+ () -> {
+ searcher.search(new MatchAllDocsQuery(), c);
+ });
+ assertEquals("Cannot early terminate with sort order <long: \"ndv2\"> if segments are sorted with <long: \"ndv1\">", e.getMessage());
+ closeIndex();
+ }
+
+ private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) {
+ assertEquals(scoreDocs1.length, scoreDocs2.length);
+ for (int i = 0; i < scoreDocs1.length; ++i) {
+ final ScoreDoc scoreDoc1 = scoreDocs1[i];
+ final ScoreDoc scoreDoc2 = scoreDocs2[i];
+ assertEquals(scoreDoc1.doc, scoreDoc2.doc);
+ assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f);
+ }
+ }
+
+ private class TestTerminatedEarlySimpleCollector extends SimpleCollector {
+ private boolean collectedSomething;
+ public boolean collectedSomething() {
+ return collectedSomething;
+ }
+ @Override
+ public void collect(int doc) throws IOException {
+ collectedSomething = true;
+ }
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+ }
+
+ private class TestEarlyTerminatingSortingcollectorQueryTimeout implements QueryTimeout {
+ final private boolean shouldExit;
+ public TestEarlyTerminatingSortingcollectorQueryTimeout(boolean shouldExit) {
+ this.shouldExit = shouldExit;
+ }
+ public boolean shouldExit() {
+ return shouldExit;
+ }
+ }
+
+ public void testTerminatedEarly() throws IOException {
+ final int iters = atLeast(8);
+ for (int i = 0; i < iters; ++i) {
+ createRandomIndex(true);
+
+ final IndexSearcher searcher = new IndexSearcher(reader); // future TODO: use newSearcher(reader);
+ final Query query = new MatchAllDocsQuery(); // search for everything/anything
+
+ final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector();
+ searcher.search(query, collector1);
+
+ final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector();
+ final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1);
+ searcher.search(query, etsCollector);
+
+ assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething());
+
+ if (collector1.collectedSomething()) {
+ // we collected something and since we modestly asked for just one document we should have terminated early
+ assertTrue("should have terminated early (searcher.reader="+searcher.reader+")", etsCollector.terminatedEarly());
+ }
+ closeIndex();
+ }
+ }
+
+}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java indexsort/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
--- trunk/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java 2016-04-24 06:00:33.345895727 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java 2016-05-10 05:44:23.752471119 -0400
@@ -1151,14 +1151,14 @@
}
private static Codec getCodec() {
- if (Codec.getDefault().getName().equals("Lucene60")) {
+ if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
- return new FilterCodec("Lucene60", Codec.getDefault()) {
+ return new FilterCodec("Lucene62", Codec.getDefault()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java indexsort/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
--- trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java 2016-04-24 06:00:46.369895938 -0400
+++ indexsort/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java 2016-05-10 05:44:23.752471119 -0400
@@ -25,6 +25,7 @@
import java.util.List;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.CorruptingIndexOutput;
@@ -554,7 +555,7 @@
}
List<Long> toMerge = null;
- List<Integer> docIDBases = null;
+ List<MergeState.DocMap> docMaps = null;
int seg = 0;
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false);
@@ -601,9 +602,15 @@
if (useMerge && segCount == valuesInThisSeg) {
if (toMerge == null) {
toMerge = new ArrayList<>();
- docIDBases = new ArrayList<>();
+ docMaps = new ArrayList<>();
}
- docIDBases.add(lastDocIDBase);
+ final int curDocIDBase = lastDocIDBase;
+ docMaps.add(new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ return curDocIDBase + docID;
+ }
+ });
toMerge.add(w.finish(out));
valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues/2);
segCount = 0;
@@ -620,8 +627,14 @@
if (toMerge != null) {
if (segCount > 0) {
- docIDBases.add(lastDocIDBase);
toMerge.add(w.finish(out));
+ final int curDocIDBase = lastDocIDBase;
+ docMaps.add(new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ return curDocIDBase + docID;
+ }
+ });
}
out.close();
in = dir.openInput("bkd", IOContext.DEFAULT);
@@ -633,7 +646,7 @@
readers.add(new BKDReader(in));
}
out = dir.createOutput("bkd2", IOContext.DEFAULT);
- indexFP = w.merge(out, null, readers, docIDBases);
+ indexFP = w.merge(out, docMaps, readers);
out.close();
in.close();
in = dir.openInput("bkd2", IOContext.DEFAULT);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java indexsort/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
--- trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java 2016-02-16 11:18:34.745021816 -0500
+++ indexsort/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java 2016-05-10 05:44:23.752471119 -0400
@@ -21,7 +21,6 @@
import java.util.Iterator;
import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -29,11 +28,13 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -178,4 +179,8 @@
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
}
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java indexsort/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
--- trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java 2016-04-24 06:00:46.369895938 -0400
+++ indexsort/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java 2016-05-10 05:44:23.752471119 -0400
@@ -40,6 +40,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.*;
@@ -1606,6 +1607,10 @@
return info.getNormDocValues();
}
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
}
/**
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java indexsort/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
--- trunk/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java 2016-01-24 13:09:50.028989954 -0500
+++ indexsort/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java 2016-05-10 05:44:23.752471119 -0400
@@ -140,7 +140,7 @@
SegmentInfo info = infoPerCommit.info;
// Same info just changing the dir:
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.maxDoc(),
- info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>());
+ info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>(), null);
destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(),
infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(),
infoPerCommit.getDocValuesGen()));
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java indexsort/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java
--- trunk/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java 2016-02-16 11:18:34.749021816 -0500
+++ indexsort/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,259 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-
-import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.NormsProducer;
-import org.apache.lucene.codecs.StoredFieldsReader;
-import org.apache.lucene.codecs.TermVectorsReader;
-import org.apache.lucene.util.Bits;
-
-/** this is a hack to make SortingMP fast! */
-class MergeReaderWrapper extends LeafReader {
- final SegmentReader in;
- final FieldsProducer fields;
- final NormsProducer norms;
- final DocValuesProducer docValues;
- final StoredFieldsReader store;
- final TermVectorsReader vectors;
-
- MergeReaderWrapper(SegmentReader in) throws IOException {
- this.in = in;
-
- FieldsProducer fields = in.getPostingsReader();
- if (fields != null) {
- fields = fields.getMergeInstance();
- }
- this.fields = fields;
-
- NormsProducer norms = in.getNormsReader();
- if (norms != null) {
- norms = norms.getMergeInstance();
- }
- this.norms = norms;
-
- DocValuesProducer docValues = in.getDocValuesReader();
- if (docValues != null) {
- docValues = docValues.getMergeInstance();
- }
- this.docValues = docValues;
-
- StoredFieldsReader store = in.getFieldsReader();
- if (store != null) {
- store = store.getMergeInstance();
- }
- this.store = store;
-
- TermVectorsReader vectors = in.getTermVectorsReader();
- if (vectors != null) {
- vectors = vectors.getMergeInstance();
- }
- this.vectors = vectors;
- }
-
- @Override
- public void addCoreClosedListener(CoreClosedListener listener) {
- in.addCoreClosedListener(listener);
- }
-
- @Override
- public void removeCoreClosedListener(CoreClosedListener listener) {
- in.removeCoreClosedListener(listener);
- }
-
- @Override
- public Fields fields() throws IOException {
- return fields;
- }
-
- @Override
- public NumericDocValues getNumericDocValues(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null) {
- // Field does not exist
- return null;
- }
- if (fi.getDocValuesType() != DocValuesType.NUMERIC) {
- // Field was not indexed with doc values
- return null;
- }
- return docValues.getNumeric(fi);
- }
-
- @Override
- public BinaryDocValues getBinaryDocValues(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null) {
- // Field does not exist
- return null;
- }
- if (fi.getDocValuesType() != DocValuesType.BINARY) {
- // Field was not indexed with doc values
- return null;
- }
- return docValues.getBinary(fi);
- }
-
- @Override
- public SortedDocValues getSortedDocValues(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null) {
- // Field does not exist
- return null;
- }
- if (fi.getDocValuesType() != DocValuesType.SORTED) {
- // Field was not indexed with doc values
- return null;
- }
- return docValues.getSorted(fi);
- }
-
- @Override
- public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null) {
- // Field does not exist
- return null;
- }
- if (fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
- // Field was not indexed with doc values
- return null;
- }
- return docValues.getSortedNumeric(fi);
- }
-
- @Override
- public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null) {
- // Field does not exist
- return null;
- }
- if (fi.getDocValuesType() != DocValuesType.SORTED_SET) {
- // Field was not indexed with doc values
- return null;
- }
- return docValues.getSortedSet(fi);
- }
-
- @Override
- public Bits getDocsWithField(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null) {
- // Field does not exist
- return null;
- }
- if (fi.getDocValuesType() == DocValuesType.NONE) {
- // Field was not indexed with doc values
- return null;
- }
- return docValues.getDocsWithField(fi);
- }
-
- @Override
- public NumericDocValues getNormValues(String field) throws IOException {
- ensureOpen();
- FieldInfo fi = getFieldInfos().fieldInfo(field);
- if (fi == null || !fi.hasNorms()) {
- // Field does not exist or does not index norms
- return null;
- }
- return norms.getNorms(fi);
- }
-
- @Override
- public FieldInfos getFieldInfos() {
- return in.getFieldInfos();
- }
-
- @Override
- public Bits getLiveDocs() {
- return in.getLiveDocs();
- }
-
- @Override
- public void checkIntegrity() throws IOException {
- in.checkIntegrity();
- }
-
- @Override
- public Fields getTermVectors(int docID) throws IOException {
- ensureOpen();
- checkBounds(docID);
- if (vectors == null) {
- return null;
- }
- return vectors.get(docID);
- }
-
- @Override
- public PointValues getPointValues() {
- return in.getPointValues();
- }
-
- @Override
- public int numDocs() {
- return in.numDocs();
- }
-
- @Override
- public int maxDoc() {
- return in.maxDoc();
- }
-
- @Override
- public void document(int docID, StoredFieldVisitor visitor) throws IOException {
- ensureOpen();
- checkBounds(docID);
- store.visitDocument(docID, visitor);
- }
-
- @Override
- protected void doClose() throws IOException {
- in.close();
- }
-
- @Override
- public Object getCoreCacheKey() {
- return in.getCoreCacheKey();
- }
-
- @Override
- public Object getCombinedCoreAndDeletesKey() {
- return in.getCombinedCoreAndDeletesKey();
- }
-
- private void checkBounds(int docID) {
- if (docID < 0 || docID >= maxDoc()) {
- throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
- }
- }
-
- @Override
- public String toString() {
- return "MergeReaderWrapper(" + in + ")";
- }
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java indexsort/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
--- trunk/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java 2016-03-13 05:38:07.391183845 -0400
+++ indexsort/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java 2016-05-10 05:44:23.752471119 -0400
@@ -24,6 +24,7 @@
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -67,6 +68,11 @@
if (getFieldInfos().hasPointValues()) {
throw new IllegalArgumentException("cannot wrap points");
}
+ for(LeafReaderContext context : reader.leaves()) {
+ if (context.reader().getIndexSort() != null) {
+ throw new IllegalArgumentException("cannot use index sort");
+ }
+ }
fields = MultiFields.getFields(in);
in.registerParentReader(this);
this.merging = merging;
@@ -272,4 +278,9 @@
ctx.reader().checkIntegrity();
}
}
+
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/Sorter.java indexsort/lucene/misc/src/java/org/apache/lucene/index/Sorter.java
--- trunk/lucene/misc/src/java/org/apache/lucene/index/Sorter.java 2016-05-06 05:10:21.045026439 -0400
+++ indexsort/lucene/misc/src/java/org/apache/lucene/index/Sorter.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,287 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.Comparator;
-
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.LeafFieldComparator;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.util.TimSorter;
-import org.apache.lucene.util.packed.PackedInts;
-import org.apache.lucene.util.packed.PackedLongValues;
-
-/**
- * Sorts documents of a given index by returning a permutation on the document
- * IDs.
- * @lucene.experimental
- */
-final class Sorter {
- final Sort sort;
-
- /** Creates a new Sorter to sort the index with {@code sort} */
- Sorter(Sort sort) {
- if (sort.needsScores()) {
- throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
- }
- this.sort = sort;
- }
-
- /**
- * A permutation of doc IDs. For every document ID between <tt>0</tt> and
- * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
- * return <code>docID</code>.
- */
- static abstract class DocMap {
-
- /** Given a doc ID from the original index, return its ordinal in the
- * sorted index. */
- abstract int oldToNew(int docID);
-
- /** Given the ordinal of a doc ID, return its doc ID in the original index. */
- abstract int newToOld(int docID);
-
- /** Return the number of documents in this map. This must be equal to the
- * {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
- * {@link org.apache.lucene.index.LeafReader} which is sorted. */
- abstract int size();
- }
-
- /** Check consistency of a {@link DocMap}, useful for assertions. */
- static boolean isConsistent(DocMap docMap) {
- final int maxDoc = docMap.size();
- for (int i = 0; i < maxDoc; ++i) {
- final int newID = docMap.oldToNew(i);
- final int oldID = docMap.newToOld(newID);
- assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
- assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
- if (i != oldID || newID < 0 || newID >= maxDoc) {
- return false;
- }
- }
- return true;
- }
-
- /** A comparator of doc IDs. */
- static abstract class DocComparator {
-
- /** Compare docID1 against docID2. The contract for the return value is the
- * same as {@link Comparator#compare(Object, Object)}. */
- public abstract int compare(int docID1, int docID2);
-
- }
-
- private static final class DocValueSorter extends TimSorter {
-
- private final int[] docs;
- private final Sorter.DocComparator comparator;
- private final int[] tmp;
-
- DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
- super(docs.length / 64);
- this.docs = docs;
- this.comparator = comparator;
- tmp = new int[docs.length / 64];
- }
-
- @Override
- protected int compare(int i, int j) {
- return comparator.compare(docs[i], docs[j]);
- }
-
- @Override
- protected void swap(int i, int j) {
- int tmpDoc = docs[i];
- docs[i] = docs[j];
- docs[j] = tmpDoc;
- }
-
- @Override
- protected void copy(int src, int dest) {
- docs[dest] = docs[src];
- }
-
- @Override
- protected void save(int i, int len) {
- System.arraycopy(docs, i, tmp, 0, len);
- }
-
- @Override
- protected void restore(int i, int j) {
- docs[j] = tmp[i];
- }
-
- @Override
- protected int compareSaved(int i, int j) {
- return comparator.compare(tmp[i], docs[j]);
- }
- }
-
- /** Computes the old-to-new permutation over the given comparator. */
- private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
- // check if the index is sorted
- boolean sorted = true;
- for (int i = 1; i < maxDoc; ++i) {
- if (comparator.compare(i-1, i) > 0) {
- sorted = false;
- break;
- }
- }
- if (sorted) {
- return null;
- }
-
- // sort doc IDs
- final int[] docs = new int[maxDoc];
- for (int i = 0; i < maxDoc; i++) {
- docs[i] = i;
- }
-
- DocValueSorter sorter = new DocValueSorter(docs, comparator);
- // It can be common to sort a reader, add docs, sort it again, ... and in
- // that case timSort can save a lot of time
- sorter.sort(0, docs.length); // docs is now the newToOld mapping
-
- // The reason why we use MonotonicAppendingLongBuffer here is that it
- // wastes very little memory if the index is in random order but can save
- // a lot of memory if the index is already "almost" sorted
- final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- for (int i = 0; i < maxDoc; ++i) {
- newToOldBuilder.add(docs[i]);
- }
- final PackedLongValues newToOld = newToOldBuilder.build();
-
- for (int i = 0; i < maxDoc; ++i) {
- docs[(int) newToOld.get(i)] = i;
- } // docs is now the oldToNew mapping
-
- final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- for (int i = 0; i < maxDoc; ++i) {
- oldToNewBuilder.add(docs[i]);
- }
- final PackedLongValues oldToNew = oldToNewBuilder.build();
-
- return new Sorter.DocMap() {
-
- @Override
- public int oldToNew(int docID) {
- return (int) oldToNew.get(docID);
- }
-
- @Override
- public int newToOld(int docID) {
- return (int) newToOld.get(docID);
- }
-
- @Override
- public int size() {
- return maxDoc;
- }
- };
- }
-
- /**
- * Returns a mapping from the old document ID to its new location in the
- * sorted index. Implementations can use the auxiliary
- * {@link #sort(int, DocComparator)} to compute the old-to-new permutation
- * given a list of documents and their corresponding values.
- * <p>
- * A return value of <tt>null</tt> is allowed and means that
- * <code>reader</code> is already sorted.
- * <p>
- * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
- * well, they will however be marked as deleted in the sorted view.
- */
- DocMap sort(LeafReader reader) throws IOException {
- SortField fields[] = sort.getSort();
- final int reverseMul[] = new int[fields.length];
- final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length];
-
- for (int i = 0; i < fields.length; i++) {
- reverseMul[i] = fields[i].getReverse() ? -1 : 1;
- comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext());
- comparators[i].setScorer(FAKESCORER);
- }
- final DocComparator comparator = new DocComparator() {
- @Override
- public int compare(int docID1, int docID2) {
- try {
- for (int i = 0; i < comparators.length; i++) {
- // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
- // the segments are always the same here...
- comparators[i].copy(0, docID1);
- comparators[i].setBottom(0);
- int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
- if (comp != 0) {
- return comp;
- }
- }
- return Integer.compare(docID1, docID2); // docid order tiebreak
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- };
- return sort(reader.maxDoc(), comparator);
- }
-
- /**
- * Returns the identifier of this {@link Sorter}.
- * <p>This identifier is similar to {@link Object#hashCode()} and should be
- * chosen so that two instances of this class that sort documents likewise
- * will have the same identifier. On the contrary, this identifier should be
- * different on different {@link Sort sorts}.
- */
- public String getID() {
- return sort.toString();
- }
-
- @Override
- public String toString() {
- return getID();
- }
-
- static final Scorer FAKESCORER = new Scorer(null) {
-
- float score;
- int doc = -1;
- int freq = 1;
-
- @Override
- public int docID() {
- return doc;
- }
-
- public DocIdSetIterator iterator() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int freq() throws IOException {
- return freq;
- }
-
- @Override
- public float score() throws IOException {
- return score;
- }
- };
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java
--- trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java 2016-03-08 17:22:26.836938630 -0500
+++ indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,940 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.index.Sorter.DocMap;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMFile;
-import org.apache.lucene.store.RAMInputStream;
-import org.apache.lucene.store.RAMOutputStream;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.TimSorter;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-
-/**
- * An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given
- * {@link Sort}. You can use this class to sort an index as follows:
- *
- * <pre class="prettyprint">
- * IndexWriter writer; // writer to which the sorted index will be added
- * DirectoryReader reader; // reader on the input index
- * Sort sort; // determines how the documents are sorted
- * LeafReader sortingReader = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
- * writer.addIndexes(reader);
- * writer.close();
- * reader.close();
- * </pre>
- *
- * @lucene.experimental
- */
-public class SortingLeafReader extends FilterLeafReader {
-
- private static class SortingFields extends FilterFields {
-
- private final Sorter.DocMap docMap;
- private final FieldInfos infos;
-
- public SortingFields(final Fields in, FieldInfos infos, Sorter.DocMap docMap) {
- super(in);
- this.docMap = docMap;
- this.infos = infos;
- }
-
- @Override
- public Terms terms(final String field) throws IOException {
- Terms terms = in.terms(field);
- if (terms == null) {
- return null;
- } else {
- return new SortingTerms(terms, infos.fieldInfo(field).getIndexOptions(), docMap);
- }
- }
-
- }
-
- private static class SortingTerms extends FilterTerms {
-
- private final Sorter.DocMap docMap;
- private final IndexOptions indexOptions;
-
- public SortingTerms(final Terms in, IndexOptions indexOptions, final Sorter.DocMap docMap) {
- super(in);
- this.docMap = docMap;
- this.indexOptions = indexOptions;
- }
-
- @Override
- public TermsEnum iterator() throws IOException {
- return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions());
- }
-
- @Override
- public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm)
- throws IOException {
- return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions());
- }
-
- }
-
- private static class SortingTermsEnum extends FilterTermsEnum {
-
- final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
- private final IndexOptions indexOptions;
- private final boolean hasPositions;
-
- public SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) {
- super(in);
- this.docMap = docMap;
- this.indexOptions = indexOptions;
- this.hasPositions = hasPositions;
- }
-
- Bits newToOld(final Bits liveDocs) {
- if (liveDocs == null) {
- return null;
- }
- return new Bits() {
-
- @Override
- public boolean get(int index) {
- return liveDocs.get(docMap.oldToNew(index));
- }
-
- @Override
- public int length() {
- return liveDocs.length();
- }
-
- };
- }
-
- @Override
- public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException {
-
- if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
- final PostingsEnum inReuse;
- final SortingPostingsEnum wrapReuse;
- if (reuse != null && reuse instanceof SortingPostingsEnum) {
- // if we're asked to reuse the given DocsEnum and it is Sorting, return
- // the wrapped one, since some Codecs expect it.
- wrapReuse = (SortingPostingsEnum) reuse;
- inReuse = wrapReuse.getWrapped();
- } else {
- wrapReuse = null;
- inReuse = reuse;
- }
-
- final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags);
- // we ignore the fact that offsets may be stored but not asked for,
- // since this code is expected to be used during addIndexes which will
- // ask for everything. if that assumption changes in the future, we can
- // factor in whether 'flags' says offsets are not required.
- final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- return new SortingPostingsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets);
- }
-
- final PostingsEnum inReuse;
- final SortingDocsEnum wrapReuse;
- if (reuse != null && reuse instanceof SortingDocsEnum) {
- // if we're asked to reuse the given DocsEnum and it is Sorting, return
- // the wrapped one, since some Codecs expect it.
- wrapReuse = (SortingDocsEnum) reuse;
- inReuse = wrapReuse.getWrapped();
- } else {
- wrapReuse = null;
- inReuse = reuse;
- }
-
- final PostingsEnum inDocs = in.postings(inReuse, flags);
- final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
- return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
- }
-
- }
-
- private static class SortingBinaryDocValues extends BinaryDocValues {
-
- private final BinaryDocValues in;
- private final Sorter.DocMap docMap;
-
- SortingBinaryDocValues(BinaryDocValues in, Sorter.DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public BytesRef get(int docID) {
- return in.get(docMap.newToOld(docID));
- }
- }
-
- private static class SortingNumericDocValues extends NumericDocValues {
-
- private final NumericDocValues in;
- private final Sorter.DocMap docMap;
-
- public SortingNumericDocValues(final NumericDocValues in, Sorter.DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public long get(int docID) {
- return in.get(docMap.newToOld(docID));
- }
- }
-
- private static class SortingSortedNumericDocValues extends SortedNumericDocValues {
-
- private final SortedNumericDocValues in;
- private final Sorter.DocMap docMap;
-
- SortingSortedNumericDocValues(SortedNumericDocValues in, DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public int count() {
- return in.count();
- }
-
- @Override
- public void setDocument(int doc) {
- in.setDocument(docMap.newToOld(doc));
- }
-
- @Override
- public long valueAt(int index) {
- return in.valueAt(index);
- }
- }
-
- private static class SortingBits implements Bits {
-
- private final Bits in;
- private final Sorter.DocMap docMap;
-
- public SortingBits(final Bits in, Sorter.DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public boolean get(int index) {
- return in.get(docMap.newToOld(index));
- }
-
- @Override
- public int length() {
- return in.length();
- }
- }
-
- private static class SortingPointValues extends PointValues {
-
- private final PointValues in;
- private final Sorter.DocMap docMap;
-
- public SortingPointValues(final PointValues in, Sorter.DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
- in.intersect(fieldName,
- new IntersectVisitor() {
- @Override
- public void visit(int docID) throws IOException {
- visitor.visit(docMap.oldToNew(docID));
- }
-
- @Override
- public void visit(int docID, byte[] packedValue) throws IOException {
- visitor.visit(docMap.oldToNew(docID), packedValue);
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
- return visitor.compare(minPackedValue, maxPackedValue);
- }
- });
- }
-
- @Override
- public byte[] getMinPackedValue(String fieldName) throws IOException {
- return in.getMinPackedValue(fieldName);
- }
-
- @Override
- public byte[] getMaxPackedValue(String fieldName) throws IOException {
- return in.getMaxPackedValue(fieldName);
- }
-
- @Override
- public int getNumDimensions(String fieldName) throws IOException {
- return in.getNumDimensions(fieldName);
- }
-
- @Override
- public int getBytesPerDimension(String fieldName) throws IOException {
- return in.getBytesPerDimension(fieldName);
- }
-
- @Override
- public long size(String fieldName) {
- return in.size(fieldName);
- }
-
- @Override
- public int getDocCount(String fieldName) {
- return in.getDocCount(fieldName);
- }
- }
-
- private static class SortingSortedDocValues extends SortedDocValues {
-
- private final SortedDocValues in;
- private final Sorter.DocMap docMap;
-
- SortingSortedDocValues(SortedDocValues in, Sorter.DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public int getOrd(int docID) {
- return in.getOrd(docMap.newToOld(docID));
- }
-
- @Override
- public BytesRef lookupOrd(int ord) {
- return in.lookupOrd(ord);
- }
-
- @Override
- public int getValueCount() {
- return in.getValueCount();
- }
-
- @Override
- public BytesRef get(int docID) {
- return in.get(docMap.newToOld(docID));
- }
-
- @Override
- public int lookupTerm(BytesRef key) {
- return in.lookupTerm(key);
- }
- }
-
- private static class SortingSortedSetDocValues extends SortedSetDocValues {
-
- private final SortedSetDocValues in;
- private final Sorter.DocMap docMap;
-
- SortingSortedSetDocValues(SortedSetDocValues in, Sorter.DocMap docMap) {
- this.in = in;
- this.docMap = docMap;
- }
-
- @Override
- public long nextOrd() {
- return in.nextOrd();
- }
-
- @Override
- public void setDocument(int docID) {
- in.setDocument(docMap.newToOld(docID));
- }
-
- @Override
- public BytesRef lookupOrd(long ord) {
- return in.lookupOrd(ord);
- }
-
- @Override
- public long getValueCount() {
- return in.getValueCount();
- }
-
- @Override
- public long lookupTerm(BytesRef key) {
- return in.lookupTerm(key);
- }
- }
-
- static class SortingDocsEnum extends FilterPostingsEnum {
-
- private static final class DocFreqSorter extends TimSorter {
-
- private int[] docs;
- private int[] freqs;
- private final int[] tmpDocs;
- private int[] tmpFreqs;
-
- public DocFreqSorter(int maxDoc) {
- super(maxDoc / 64);
- this.tmpDocs = new int[maxDoc / 64];
- }
-
- public void reset(int[] docs, int[] freqs) {
- this.docs = docs;
- this.freqs = freqs;
- if (freqs != null && tmpFreqs == null) {
- tmpFreqs = new int[tmpDocs.length];
- }
- }
-
- @Override
- protected int compare(int i, int j) {
- return docs[i] - docs[j];
- }
-
- @Override
- protected void swap(int i, int j) {
- int tmpDoc = docs[i];
- docs[i] = docs[j];
- docs[j] = tmpDoc;
-
- if (freqs != null) {
- int tmpFreq = freqs[i];
- freqs[i] = freqs[j];
- freqs[j] = tmpFreq;
- }
- }
-
- @Override
- protected void copy(int src, int dest) {
- docs[dest] = docs[src];
- if (freqs != null) {
- freqs[dest] = freqs[src];
- }
- }
-
- @Override
- protected void save(int i, int len) {
- System.arraycopy(docs, i, tmpDocs, 0, len);
- if (freqs != null) {
- System.arraycopy(freqs, i, tmpFreqs, 0, len);
- }
- }
-
- @Override
- protected void restore(int i, int j) {
- docs[j] = tmpDocs[i];
- if (freqs != null) {
- freqs[j] = tmpFreqs[i];
- }
- }
-
- @Override
- protected int compareSaved(int i, int j) {
- return tmpDocs[i] - docs[j];
- }
- }
-
- private final int maxDoc;
- private final DocFreqSorter sorter;
- private int[] docs;
- private int[] freqs;
- private int docIt = -1;
- private final int upto;
- private final boolean withFreqs;
-
- SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
- super(in);
- this.maxDoc = maxDoc;
- this.withFreqs = withFreqs;
- if (reuse != null) {
- if (reuse.maxDoc == maxDoc) {
- sorter = reuse.sorter;
- } else {
- sorter = new DocFreqSorter(maxDoc);
- }
- docs = reuse.docs;
- freqs = reuse.freqs; // maybe null
- } else {
- docs = new int[64];
- sorter = new DocFreqSorter(maxDoc);
- }
- docIt = -1;
- int i = 0;
- int doc;
- if (withFreqs) {
- if (freqs == null || freqs.length < docs.length) {
- freqs = new int[docs.length];
- }
- while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
- if (i >= docs.length) {
- docs = ArrayUtil.grow(docs, docs.length + 1);
- freqs = ArrayUtil.grow(freqs, freqs.length + 1);
- }
- docs[i] = docMap.oldToNew(doc);
- freqs[i] = in.freq();
- ++i;
- }
- } else {
- freqs = null;
- while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
- if (i >= docs.length) {
- docs = ArrayUtil.grow(docs, docs.length + 1);
- }
- docs[i++] = docMap.oldToNew(doc);
- }
- }
- // TimSort can save much time compared to other sorts in case of
- // reverse sorting, or when sorting a concatenation of sorted readers
- sorter.reset(docs, freqs);
- sorter.sort(0, i);
- upto = i;
- }
-
- // for testing
- boolean reused(PostingsEnum other) {
- if (other == null || !(other instanceof SortingDocsEnum)) {
- return false;
- }
- return docs == ((SortingDocsEnum) other).docs;
- }
-
- @Override
- public int advance(final int target) throws IOException {
- // need to support it for checkIndex, but in practice it won't be called, so
- // don't bother to implement efficiently for now.
- return slowAdvance(target);
- }
-
- @Override
- public int docID() {
- return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
- }
-
- @Override
- public int freq() throws IOException {
- return withFreqs && docIt < upto ? freqs[docIt] : 1;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (++docIt >= upto) return NO_MORE_DOCS;
- return docs[docIt];
- }
-
- /** Returns the wrapped {@link PostingsEnum}. */
- PostingsEnum getWrapped() {
- return in;
- }
-
- // we buffer up docs/freqs only, don't forward any positions requests to underlying enum
-
- @Override
- public int nextPosition() throws IOException {
- return -1;
- }
-
- @Override
- public int startOffset() throws IOException {
- return -1;
- }
-
- @Override
- public int endOffset() throws IOException {
- return -1;
- }
-
- @Override
- public BytesRef getPayload() throws IOException {
- return null;
- }
- }
-
- static class SortingPostingsEnum extends FilterPostingsEnum {
-
- /**
- * A {@link TimSorter} which sorts two parallel arrays of doc IDs and
- * offsets in one go. Everytime a doc ID is 'swapped', its corresponding offset
- * is swapped too.
- */
- private static final class DocOffsetSorter extends TimSorter {
-
- private int[] docs;
- private long[] offsets;
- private final int[] tmpDocs;
- private final long[] tmpOffsets;
-
- public DocOffsetSorter(int maxDoc) {
- super(maxDoc / 64);
- this.tmpDocs = new int[maxDoc / 64];
- this.tmpOffsets = new long[maxDoc / 64];
- }
-
- public void reset(int[] docs, long[] offsets) {
- this.docs = docs;
- this.offsets = offsets;
- }
-
- @Override
- protected int compare(int i, int j) {
- return docs[i] - docs[j];
- }
-
- @Override
- protected void swap(int i, int j) {
- int tmpDoc = docs[i];
- docs[i] = docs[j];
- docs[j] = tmpDoc;
-
- long tmpOffset = offsets[i];
- offsets[i] = offsets[j];
- offsets[j] = tmpOffset;
- }
-
- @Override
- protected void copy(int src, int dest) {
- docs[dest] = docs[src];
- offsets[dest] = offsets[src];
- }
-
- @Override
- protected void save(int i, int len) {
- System.arraycopy(docs, i, tmpDocs, 0, len);
- System.arraycopy(offsets, i, tmpOffsets, 0, len);
- }
-
- @Override
- protected void restore(int i, int j) {
- docs[j] = tmpDocs[i];
- offsets[j] = tmpOffsets[i];
- }
-
- @Override
- protected int compareSaved(int i, int j) {
- return tmpDocs[i] - docs[j];
- }
- }
-
- private final int maxDoc;
- private final DocOffsetSorter sorter;
- private int[] docs;
- private long[] offsets;
- private final int upto;
-
- private final IndexInput postingInput;
- private final boolean storeOffsets;
-
- private int docIt = -1;
- private int pos;
- private int startOffset = -1;
- private int endOffset = -1;
- private final BytesRef payload;
- private int currFreq;
-
- private final RAMFile file;
-
- SortingPostingsEnum(int maxDoc, SortingPostingsEnum reuse, final PostingsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException {
- super(in);
- this.maxDoc = maxDoc;
- this.storeOffsets = storeOffsets;
- if (reuse != null) {
- docs = reuse.docs;
- offsets = reuse.offsets;
- payload = reuse.payload;
- file = reuse.file;
- if (reuse.maxDoc == maxDoc) {
- sorter = reuse.sorter;
- } else {
- sorter = new DocOffsetSorter(maxDoc);
- }
- } else {
- docs = new int[32];
- offsets = new long[32];
- payload = new BytesRef(32);
- file = new RAMFile();
- sorter = new DocOffsetSorter(maxDoc);
- }
- final IndexOutput out = new RAMOutputStream(file, false);
- int doc;
- int i = 0;
- while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (i == docs.length) {
- final int newLength = ArrayUtil.oversize(i + 1, 4);
- docs = Arrays.copyOf(docs, newLength);
- offsets = Arrays.copyOf(offsets, newLength);
- }
- docs[i] = docMap.oldToNew(doc);
- offsets[i] = out.getFilePointer();
- addPositions(in, out);
- i++;
- }
- upto = i;
- sorter.reset(docs, offsets);
- sorter.sort(0, upto);
- out.close();
- this.postingInput = new RAMInputStream("", file);
- }
-
- // for testing
- boolean reused(PostingsEnum other) {
- if (other == null || !(other instanceof SortingPostingsEnum)) {
- return false;
- }
- return docs == ((SortingPostingsEnum) other).docs;
- }
-
- private void addPositions(final PostingsEnum in, final IndexOutput out) throws IOException {
- int freq = in.freq();
- out.writeVInt(freq);
- int previousPosition = 0;
- int previousEndOffset = 0;
- for (int i = 0; i < freq; i++) {
- final int pos = in.nextPosition();
- final BytesRef payload = in.getPayload();
- // The low-order bit of token is set only if there is a payload, the
- // previous bits are the delta-encoded position.
- final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
- out.writeVInt(token);
- previousPosition = pos;
- if (storeOffsets) { // don't encode offsets if they are not stored
- final int startOffset = in.startOffset();
- final int endOffset = in.endOffset();
- out.writeVInt(startOffset - previousEndOffset);
- out.writeVInt(endOffset - startOffset);
- previousEndOffset = endOffset;
- }
- if (payload != null) {
- out.writeVInt(payload.length);
- out.writeBytes(payload.bytes, payload.offset, payload.length);
- }
- }
- }
-
- @Override
- public int advance(final int target) throws IOException {
- // need to support it for checkIndex, but in practice it won't be called, so
- // don't bother to implement efficiently for now.
- return slowAdvance(target);
- }
-
- @Override
- public int docID() {
- return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
- }
-
- @Override
- public int endOffset() throws IOException {
- return endOffset;
- }
-
- @Override
- public int freq() throws IOException {
- return currFreq;
- }
-
- @Override
- public BytesRef getPayload() throws IOException {
- return payload.length == 0 ? null : payload;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS;
- postingInput.seek(offsets[docIt]);
- currFreq = postingInput.readVInt();
- // reset variables used in nextPosition
- pos = 0;
- endOffset = 0;
- return docs[docIt];
- }
-
- @Override
- public int nextPosition() throws IOException {
- final int token = postingInput.readVInt();
- pos += token >>> 1;
- if (storeOffsets) {
- startOffset = endOffset + postingInput.readVInt();
- endOffset = startOffset + postingInput.readVInt();
- }
- if ((token & 1) != 0) {
- payload.offset = 0;
- payload.length = postingInput.readVInt();
- if (payload.length > payload.bytes.length) {
- payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)];
- }
- postingInput.readBytes(payload.bytes, 0, payload.length);
- } else {
- payload.length = 0;
- }
- return pos;
- }
-
- @Override
- public int startOffset() throws IOException {
- return startOffset;
- }
-
- /** Returns the wrapped {@link PostingsEnum}. */
- PostingsEnum getWrapped() {
- return in;
- }
- }
-
- /** Return a sorted view of <code>reader</code> according to the order
- * defined by <code>sort</code>. If the reader is already sorted, this
- * method might return the reader as-is. */
- public static LeafReader wrap(LeafReader reader, Sort sort) throws IOException {
- return wrap(reader, new Sorter(sort).sort(reader));
- }
-
- /** Expert: same as {@link #wrap(org.apache.lucene.index.LeafReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
- static LeafReader wrap(LeafReader reader, Sorter.DocMap docMap) {
- if (docMap == null) {
- // the reader is already sorted
- return reader;
- }
- if (reader.maxDoc() != docMap.size()) {
- throw new IllegalArgumentException("reader.maxDoc() should be equal to docMap.size(), got" + reader.maxDoc() + " != " + docMap.size());
- }
- assert Sorter.isConsistent(docMap);
- return new SortingLeafReader(reader, docMap);
- }
-
- final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
-
- private SortingLeafReader(final LeafReader in, final Sorter.DocMap docMap) {
- super(in);
- this.docMap = docMap;
- }
-
- @Override
- public void document(final int docID, final StoredFieldVisitor visitor) throws IOException {
- in.document(docMap.newToOld(docID), visitor);
- }
-
- @Override
- public Fields fields() throws IOException {
- return new SortingFields(in.fields(), in.getFieldInfos(), docMap);
- }
-
- @Override
- public BinaryDocValues getBinaryDocValues(String field) throws IOException {
- BinaryDocValues oldDocValues = in.getBinaryDocValues(field);
- if (oldDocValues == null) {
- return null;
- } else {
- return new SortingBinaryDocValues(oldDocValues, docMap);
- }
- }
-
- @Override
- public Bits getLiveDocs() {
- final Bits inLiveDocs = in.getLiveDocs();
- if (inLiveDocs == null) {
- return null;
- } else {
- return new SortingBits(inLiveDocs, docMap);
- }
- }
-
- @Override
- public PointValues getPointValues() {
- final PointValues inPointValues = in.getPointValues();
- if (inPointValues == null) {
- return null;
- } else {
- // TODO: this is untested!
- return new SortingPointValues(inPointValues, docMap);
- }
- }
-
- @Override
- public NumericDocValues getNormValues(String field) throws IOException {
- final NumericDocValues norm = in.getNormValues(field);
- if (norm == null) {
- return null;
- } else {
- return new SortingNumericDocValues(norm, docMap);
- }
- }
-
- @Override
- public NumericDocValues getNumericDocValues(String field) throws IOException {
- final NumericDocValues oldDocValues = in.getNumericDocValues(field);
- if (oldDocValues == null) return null;
- return new SortingNumericDocValues(oldDocValues, docMap);
- }
-
- @Override
- public SortedNumericDocValues getSortedNumericDocValues(String field)
- throws IOException {
- final SortedNumericDocValues oldDocValues = in.getSortedNumericDocValues(field);
- if (oldDocValues == null) {
- return null;
- } else {
- return new SortingSortedNumericDocValues(oldDocValues, docMap);
- }
- }
-
- @Override
- public SortedDocValues getSortedDocValues(String field) throws IOException {
- SortedDocValues sortedDV = in.getSortedDocValues(field);
- if (sortedDV == null) {
- return null;
- } else {
- return new SortingSortedDocValues(sortedDV, docMap);
- }
- }
-
- @Override
- public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
- SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field);
- if (sortedSetDV == null) {
- return null;
- } else {
- return new SortingSortedSetDocValues(sortedSetDV, docMap);
- }
- }
-
- @Override
- public Bits getDocsWithField(String field) throws IOException {
- Bits bits = in.getDocsWithField(field);
- if (bits == null || bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) {
- return bits;
- } else {
- return new SortingBits(bits, docMap);
- }
- }
-
- @Override
- public Fields getTermVectors(final int docID) throws IOException {
- return in.getTermVectors(docMap.newToOld(docID));
- }
-
- @Override
- public String toString() {
- return "SortingLeafReader(" + in + ")";
- }
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java
--- trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MergeTrigger;
-import org.apache.lucene.index.MultiReader;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentInfos;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.packed.PackedInts;
-import org.apache.lucene.util.packed.PackedLongValues;
-
-/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
- * before merging them. As a consequence, all segments resulting from a merge
- * will be sorted while segments resulting from a flush will be in the order
- * in which documents have been added.
- * <p><b>NOTE</b>: Never use this policy if you rely on
- * {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments}
- * to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
- * <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
- * so that the order of segments is predictable. For example, using
- * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make
- * the order of documents in a segment depend on the number of times the segment
- * has been merged.
- * @lucene.experimental */
-public final class SortingMergePolicy extends MergePolicyWrapper {
-
- /**
- * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that
- * this segment is sorted.
- */
- public static final String SORTER_ID_PROP = "sorter";
-
- class SortingOneMerge extends OneMerge {
-
- List<CodecReader> unsortedReaders;
- Sorter.DocMap docMap;
- LeafReader sortedView;
- final InfoStream infoStream;
-
- SortingOneMerge(List<SegmentCommitInfo> segments, InfoStream infoStream) {
- super(segments);
- this.infoStream = infoStream;
- }
-
- @Override
- public List<CodecReader> getMergeReaders() throws IOException {
- if (unsortedReaders == null) {
- unsortedReaders = super.getMergeReaders();
- if (infoStream.isEnabled("SMP")) {
- infoStream.message("SMP", "sorting " + unsortedReaders);
- for (LeafReader leaf : unsortedReaders) {
- String sortDescription = getSortDescription(leaf);
- if (sortDescription == null) {
- sortDescription = "not sorted";
- }
- infoStream.message("SMP", "seg=" + leaf + " " + sortDescription);
- }
- }
- // wrap readers, to be optimal for merge;
- List<LeafReader> wrapped = new ArrayList<>(unsortedReaders.size());
- for (LeafReader leaf : unsortedReaders) {
- if (leaf instanceof SegmentReader) {
- leaf = new MergeReaderWrapper((SegmentReader)leaf);
- }
- wrapped.add(leaf);
- }
- final LeafReader atomicView;
- if (wrapped.size() == 1) {
- atomicView = wrapped.get(0);
- } else {
- final CompositeReader multiReader = new MultiReader(wrapped.toArray(new LeafReader[wrapped.size()]));
- atomicView = new SlowCompositeReaderWrapper(multiReader, true);
- }
- docMap = sorter.sort(atomicView);
- sortedView = SortingLeafReader.wrap(atomicView, docMap);
- }
- // a null doc map means that the readers are already sorted
- if (docMap == null) {
- if (infoStream.isEnabled("SMP")) {
- infoStream.message("SMP", "readers already sorted, omitting sort");
- }
- return unsortedReaders;
- } else {
- if (infoStream.isEnabled("SMP")) {
- infoStream.message("SMP", "sorting readers by " + sort);
- }
- return Collections.singletonList(SlowCodecReaderWrapper.wrap(sortedView));
- }
- }
-
- @Override
- public void setMergeInfo(SegmentCommitInfo info) {
- Map<String,String> diagnostics = info.info.getDiagnostics();
- diagnostics.put(SORTER_ID_PROP, sorter.getID());
- super.setMergeInfo(info);
- }
-
- private PackedLongValues getDeletes(List<CodecReader> readers) {
- PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- int deleteCount = 0;
- for (LeafReader reader : readers) {
- final int maxDoc = reader.maxDoc();
- final Bits liveDocs = reader.getLiveDocs();
- for (int i = 0; i < maxDoc; ++i) {
- if (liveDocs != null && !liveDocs.get(i)) {
- ++deleteCount;
- } else {
- deletes.add(deleteCount);
- }
- }
- }
- return deletes.build();
- }
-
- @Override
- public MergePolicy.DocMap getDocMap(final MergeState mergeState) {
- if (unsortedReaders == null) {
- throw new IllegalStateException();
- }
- if (docMap == null) {
- return super.getDocMap(mergeState);
- }
- assert mergeState.docMaps.length == 1; // we returned a singleton reader
- final PackedLongValues deletes = getDeletes(unsortedReaders);
- return new MergePolicy.DocMap() {
- @Override
- public int map(int old) {
- final int oldWithDeletes = old + (int) deletes.get(old);
- final int newWithDeletes = docMap.oldToNew(oldWithDeletes);
- return mergeState.docMaps[0].get(newWithDeletes);
- }
- };
- }
-
- @Override
- public String toString() {
- return "SortingMergePolicy.SortingOneMerge(segments=" + segString() + " sort=" + sort + ")";
- }
- }
-
- class SortingMergeSpecification extends MergeSpecification {
- final InfoStream infoStream;
-
- SortingMergeSpecification(InfoStream infoStream) {
- this.infoStream = infoStream;
- }
-
- @Override
- public void add(OneMerge merge) {
- super.add(new SortingOneMerge(merge.segments, infoStream));
- }
-
- @Override
- public String segString(Directory dir) {
- return "SortingMergeSpec(" + super.segString(dir) + ", sorter=" + sorter + ")";
- }
-
- }
-
- /** Returns {@code true} if the given {@code reader} is sorted by the
- * {@code sort} given. Typically the given {@code sort} would be the
- * {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */
- public static boolean isSorted(LeafReader reader, Sort sort) {
- String description = getSortDescription(reader);
- if (description != null && description.equals(sort.toString())) {
- return true;
- }
- return false;
- }
-
- private static String getSortDescription(LeafReader reader) {
- if (reader instanceof SegmentReader) {
- final SegmentReader segReader = (SegmentReader) reader;
- final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
- if (diagnostics != null) {
- return diagnostics.get(SORTER_ID_PROP);
- }
- } else if (reader instanceof FilterLeafReader) {
- return getSortDescription(FilterLeafReader.unwrap(reader));
- }
- return null;
- }
-
- private MergeSpecification sortedMergeSpecification(MergeSpecification specification, InfoStream infoStream) {
- if (specification == null) {
- return null;
- }
- MergeSpecification sortingSpec = new SortingMergeSpecification(infoStream);
- for (OneMerge merge : specification.merges) {
- sortingSpec.add(merge);
- }
- return sortingSpec;
- }
-
- final Sorter sorter;
- final Sort sort;
-
- /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
- public SortingMergePolicy(MergePolicy in, Sort sort) {
- super(in);
- this.sorter = new Sorter(sort);
- this.sort = sort;
- }
-
- /** Return the {@link Sort} order that is used to sort segments when merging. */
- public Sort getSort() {
- return sort;
- }
-
- @Override
- public MergeSpecification findMerges(MergeTrigger mergeTrigger,
- SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
- return sortedMergeSpecification(in.findMerges(mergeTrigger, segmentInfos, writer), writer.infoStream);
- }
-
- @Override
- public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
- int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer)
- throws IOException {
- return sortedMergeSpecification(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer), writer.infoStream);
- }
-
- @Override
- public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer)
- throws IOException {
- return sortedMergeSpecification(in.findForcedDeletesMerges(segmentInfos, writer), writer.infoStream);
- }
-
- @Override
- public String toString() {
- return "SortingMergePolicy(" + in + ", sorter=" + sorter + ")";
- }
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java indexsort/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java
--- trunk/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,224 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.SortingMergePolicy;
-import org.apache.lucene.util.BitSet;
-
-/**
- * Helper class to sort readers that contain blocks of documents.
- * <p>
- * Note that this class is intended to used with {@link SortingMergePolicy},
- * and for other purposes has some limitations:
- * <ul>
- * <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
- * <li>Filling sort field values is not yet supported.
- * </ul>
- * @lucene.experimental
- */
-// TODO: can/should we clean this thing up (e.g. return a proper sort value)
-// and move to the join/ module?
-public class BlockJoinComparatorSource extends FieldComparatorSource {
- final Query parentsFilter;
- final Sort parentSort;
- final Sort childSort;
-
- /**
- * Create a new BlockJoinComparatorSource, sorting only blocks of documents
- * with {@code parentSort} and not reordering children with a block.
- *
- * @param parentsFilter Filter identifying parent documents
- * @param parentSort Sort for parent documents
- */
- public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort) {
- this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
- }
-
- /**
- * Create a new BlockJoinComparatorSource, specifying the sort order for both
- * blocks of documents and children within a block.
- *
- * @param parentsFilter Filter identifying parent documents
- * @param parentSort Sort for parent documents
- * @param childSort Sort for child documents in the same block
- */
- public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort, Sort childSort) {
- this.parentsFilter = parentsFilter;
- this.parentSort = parentSort;
- this.childSort = childSort;
- }
-
- @Override
- @SuppressWarnings({"unchecked", "rawtypes"})
- public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
- // we keep parallel slots: the parent ids and the child ids
- final int parentSlots[] = new int[numHits];
- final int childSlots[] = new int[numHits];
-
- SortField parentFields[] = parentSort.getSort();
- final int parentReverseMul[] = new int[parentFields.length];
- final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
- for (int i = 0; i < parentFields.length; i++) {
- parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
- parentComparators[i] = parentFields[i].getComparator(1, i);
- }
-
- SortField childFields[] = childSort.getSort();
- final int childReverseMul[] = new int[childFields.length];
- final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
- for (int i = 0; i < childFields.length; i++) {
- childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
- childComparators[i] = childFields[i].getComparator(1, i);
- }
-
- // NOTE: we could return parent ID as value but really our sort "value" is more complex...
- // So we throw UOE for now. At the moment you really should only use this at indexing time.
- return new FieldComparator<Integer>() {
- int bottomParent;
- int bottomChild;
- BitSet parentBits;
- LeafFieldComparator[] parentLeafComparators;
- LeafFieldComparator[] childLeafComparators;
-
- @Override
- public int compare(int slot1, int slot2) {
- try {
- return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public void setTopValue(Integer value) {
- // we dont have enough information (the docid is needed)
- throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
- }
-
- @Override
- public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
- if (parentBits != null) {
- throw new IllegalStateException("This comparator can only be used on a single segment");
- }
- IndexSearcher searcher = new IndexSearcher(ReaderUtil.getTopLevelContext(context));
- searcher.setQueryCache(null);
- final Weight weight = searcher.createNormalizedWeight(parentsFilter, false);
- final Scorer parents = weight.scorer(context);
- if (parents == null) {
- throw new IllegalStateException("LeafReader " + context.reader() + " contains no parents!");
- }
- parentBits = BitSet.of(parents.iterator(), context.reader().maxDoc());
- parentLeafComparators = new LeafFieldComparator[parentComparators.length];
- for (int i = 0; i < parentComparators.length; i++) {
- parentLeafComparators[i] = parentComparators[i].getLeafComparator(context);
- }
- childLeafComparators = new LeafFieldComparator[childComparators.length];
- for (int i = 0; i < childComparators.length; i++) {
- childLeafComparators[i] = childComparators[i].getLeafComparator(context);
- }
-
- return new LeafFieldComparator() {
-
- @Override
- public int compareBottom(int doc) throws IOException {
- return compare(bottomChild, bottomParent, doc, parent(doc));
- }
-
- @Override
- public int compareTop(int doc) throws IOException {
- // we dont have enough information (the docid is needed)
- throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
- }
-
- @Override
- public void copy(int slot, int doc) throws IOException {
- childSlots[slot] = doc;
- parentSlots[slot] = parent(doc);
- }
-
- @Override
- public void setBottom(int slot) {
- bottomParent = parentSlots[slot];
- bottomChild = childSlots[slot];
- }
-
- @Override
- public void setScorer(Scorer scorer) {
- for (LeafFieldComparator comp : parentLeafComparators) {
- comp.setScorer(scorer);
- }
- for (LeafFieldComparator comp : childLeafComparators) {
- comp.setScorer(scorer);
- }
- }
-
- };
- }
-
- @Override
- public Integer value(int slot) {
- // really our sort "value" is more complex...
- throw new UnsupportedOperationException("filling sort field values is not yet supported");
- }
-
- int parent(int doc) {
- return parentBits.nextSetBit(doc);
- }
-
- int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
- if (parent1 == parent2) { // both are in the same block
- if (docID1 == parent1 || docID2 == parent2) {
- // keep parents at the end of blocks
- return docID1 - docID2;
- } else {
- return compare(docID1, docID2, childLeafComparators, childReverseMul);
- }
- } else {
- int cmp = compare(parent1, parent2, parentLeafComparators, parentReverseMul);
- if (cmp == 0) {
- return parent1 - parent2;
- } else {
- return cmp;
- }
- }
- }
-
- int compare(int docID1, int docID2, LeafFieldComparator comparators[], int reverseMul[]) throws IOException {
- for (int i = 0; i < comparators.length; i++) {
- // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
- // the segments are always the same here...
- comparators[i].copy(0, docID1);
- comparators[i].setBottom(0);
- int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
- if (comp != 0) {
- return comp;
- }
- }
- return 0; // no need to docid tiebreak
- }
- };
- }
-
- @Override
- public String toString() {
- return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
- }
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java indexsort/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
--- trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.SortingMergePolicy;
-import org.apache.lucene.search.LeafCollector;
-import org.apache.lucene.search.CollectionTerminatedException;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FilterLeafCollector;
-import org.apache.lucene.search.FilterCollector;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TopDocsCollector;
-import org.apache.lucene.search.TotalHitCountCollector;
-
-/**
- * A {@link Collector} that early terminates collection of documents on a
- * per-segment basis, if the segment was sorted according to the given
- * {@link Sort}.
- *
- * <p>
- * <b>NOTE:</b> the {@code Collector} detects segments sorted according to a
- * {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction
- * with a {@link SortingMergePolicy}. Also,it collects up to a specified
- * {@code numDocsToCollect} from each segment, and therefore is mostly suitable
- * for use in conjunction with collectors such as {@link TopDocsCollector}, and
- * not e.g. {@link TotalHitCountCollector}.
- * <p>
- * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
- * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
- * will be correct. However the total of {@link TopDocsCollector#getTotalHits()
- * hit count} will be underestimated since not all matching documents will have
- * been collected.
- * <p>
- * <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect
- * whether a segment was sorted with the same {@code Sort}. This has
- * two implications:
- * <ul>
- * <li>if a custom comparator is not implemented correctly and returns
- * different identifiers for equivalent instances, this collector will not
- * detect sorted segments,</li>
- * <li>if you suddenly change the {@link IndexWriter}'s
- * {@code SortingMergePolicy} to sort according to another criterion and if both
- * the old and the new {@code Sort}s have the same identifier, this
- * {@code Collector} will incorrectly detect sorted segments.</li>
- * </ul>
- *
- * @lucene.experimental
- */
-public class EarlyTerminatingSortingCollector extends FilterCollector {
-
- /** Returns whether collection can be early-terminated if it sorts with the
- * provided {@link Sort} and if segments are merged with the provided
- * {@link Sort}. */
- public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) {
- final SortField[] fields1 = searchSort.getSort();
- final SortField[] fields2 = mergePolicySort.getSort();
- // early termination is possible if fields1 is a prefix of fields2
- if (fields1.length > fields2.length) {
- return false;
- }
- return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
- }
-
- /** Sort used to sort the search results */
- protected final Sort sort;
- /** Number of documents to collect in each segment */
- protected final int numDocsToCollect;
- private final Sort mergePolicySort;
- private final AtomicBoolean terminatedEarly = new AtomicBoolean(false);
-
- /**
- * Create a new {@link EarlyTerminatingSortingCollector} instance.
- *
- * @param in
- * the collector to wrap
- * @param sort
- * the sort you are sorting the search results on
- * @param numDocsToCollect
- * the number of documents to collect on each segment. When wrapping
- * a {@link TopDocsCollector}, this number should be the number of
- * hits.
- * @param mergePolicySort
- * the sort your {@link SortingMergePolicy} uses
- * @throws IllegalArgumentException if the sort order doesn't allow for early
- * termination with the given merge policy.
- */
- public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) {
- super(in);
- if (numDocsToCollect <= 0) {
- throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
- }
- if (canEarlyTerminate(sort, mergePolicySort) == false) {
- throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort);
- }
- this.sort = sort;
- this.numDocsToCollect = numDocsToCollect;
- this.mergePolicySort = mergePolicySort;
- }
-
- @Override
- public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) {
- // segment is sorted, can early-terminate
- return new FilterLeafCollector(super.getLeafCollector(context)) {
- private int numCollected;
-
- @Override
- public void collect(int doc) throws IOException {
- super.collect(doc);
- if (++numCollected >= numDocsToCollect) {
- terminatedEarly.set(true);
- throw new CollectionTerminatedException();
- }
- }
-
- };
- } else {
- return super.getLeafCollector(context);
- }
- }
-
- public boolean terminatedEarly() {
- return terminatedEarly.get();
- }
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java indexsort/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java
--- trunk/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.TestUtil;
-import org.junit.BeforeClass;
-
-public class IndexSortingTest extends SorterTestBase {
-
- private static final Sort[] SORT = new Sort[] {
- new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
- new Sort(new SortField(null, SortField.Type.DOC, true))
- };
-
- @BeforeClass
- public static void beforeClassSorterUtilTest() throws Exception {
- // NOTE: index was created by by super's @BeforeClass
-
- // only read the values of the undeleted documents, since after addIndexes,
- // the deleted ones will be dropped from the index.
- Bits liveDocs = unsortedReader.getLiveDocs();
- List<Integer> values = new ArrayList<>();
- for (int i = 0; i < unsortedReader.maxDoc(); i++) {
- if (liveDocs == null || liveDocs.get(i)) {
- values.add(Integer.valueOf(unsortedReader.document(i).get(ID_FIELD)));
- }
- }
- int idx = random().nextInt(SORT.length);
- Sort sorter = SORT[idx];
- if (idx == 1) { // reverse doc sort
- Collections.reverse(values);
- } else {
- Collections.sort(values);
- if (random().nextBoolean()) {
- sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
- Collections.reverse(values);
- }
- }
- sortedValues = values.toArray(new Integer[values.size()]);
- if (VERBOSE) {
- System.out.println("sortedValues: " + sortedValues);
- System.out.println("Sorter: " + sorter);
- }
-
- Directory target = newDirectory();
- IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(null));
- LeafReader reader = SortingLeafReader.wrap(unsortedReader, sorter);
- writer.addIndexes(SlowCodecReaderWrapper.wrap(reader));
- writer.close();
- // NOTE: also closes unsortedReader
- reader.close();
- dir.close();
-
- // CheckIndex the target directory
- dir = target;
- TestUtil.checkIndex(dir);
-
- // set reader for tests
- sortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
- assertFalse("index should not have deletions", sortedReader.hasDeletions());
- }
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java indexsort/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java
--- trunk/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java 2016-03-08 17:22:26.836938630 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,405 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.document.BinaryDocValuesField;
-import org.apache.lucene.document.BinaryPoint;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.SortedNumericDocValuesField;
-import org.apache.lucene.document.SortedSetDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.PointValues.IntersectVisitor;
-import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.index.SortingLeafReader.SortingDocsEnum;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
-import org.apache.lucene.search.CollectionStatistics;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.TermStatistics;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.util.TestUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-public abstract class SorterTestBase extends LuceneTestCase {
-
- static final class NormsSimilarity extends Similarity {
-
- private final Similarity in;
-
- public NormsSimilarity(Similarity in) {
- this.in = in;
- }
-
- @Override
- public long computeNorm(FieldInvertState state) {
- if (state.getName().equals(NORMS_FIELD)) {
- return Float.floatToIntBits(state.getBoost());
- } else {
- return in.computeNorm(state);
- }
- }
-
- @Override
- public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
- return in.computeWeight(collectionStats, termStats);
- }
-
- @Override
- public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
- return in.simScorer(weight, context);
- }
-
- }
-
- static final class PositionsTokenStream extends TokenStream {
-
- private final CharTermAttribute term;
- private final PayloadAttribute payload;
- private final OffsetAttribute offset;
-
- private int pos, off;
-
- public PositionsTokenStream() {
- term = addAttribute(CharTermAttribute.class);
- payload = addAttribute(PayloadAttribute.class);
- offset = addAttribute(OffsetAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (pos == 0) {
- return false;
- }
-
- clearAttributes();
- term.append(DOC_POSITIONS_TERM);
- payload.setPayload(new BytesRef(Integer.toString(pos)));
- offset.setOffset(off, off);
- --pos;
- ++off;
- return true;
- }
-
- void setId(int id) {
- pos = id / 10 + 1;
- off = 0;
- }
- }
-
- protected static final String ID_FIELD = "id";
- protected static final String DOCS_ENUM_FIELD = "docs";
- protected static final String DOCS_ENUM_TERM = "$all$";
- protected static final String DOC_POSITIONS_FIELD = "positions";
- protected static final String DOC_POSITIONS_TERM = "$all$";
- protected static final String NUMERIC_DV_FIELD = "numeric";
- protected static final String SORTED_NUMERIC_DV_FIELD = "sorted_numeric";
- protected static final String NORMS_FIELD = "norm";
- protected static final String BINARY_DV_FIELD = "binary";
- protected static final String SORTED_DV_FIELD = "sorted";
- protected static final String SORTED_SET_DV_FIELD = "sorted_set";
- protected static final String TERM_VECTORS_FIELD = "term_vectors";
- protected static final String DIMENSIONAL_FIELD = "numeric1d";
-
- private static final FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
- static {
- TERM_VECTORS_TYPE.setStoreTermVectors(true);
- TERM_VECTORS_TYPE.freeze();
- }
-
- private static final FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
- static {
- POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- POSITIONS_TYPE.freeze();
- }
-
- protected static Directory dir;
- protected static LeafReader unsortedReader;
- protected static LeafReader sortedReader;
- protected static Integer[] sortedValues;
-
- private static Document doc(final int id, PositionsTokenStream positions) {
- final Document doc = new Document();
- doc.add(new StringField(ID_FIELD, Integer.toString(id), Store.YES));
- doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO));
- positions.setId(id);
- doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE));
- doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id));
- TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO);
- norms.setBoost(Float.intBitsToFloat(id));
- doc.add(norms);
- doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1))));
- doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id));
- doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1));
- doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE));
- byte[] bytes = new byte[4];
- NumericUtils.intToSortableBytes(id, bytes, 0);
- // TODO: index time sorting doesn't yet support points
- //doc.add(new BinaryPoint(DIMENSIONAL_FIELD, bytes));
- return doc;
- }
-
- /** Creates an unsorted index; subclasses then sort this index and open sortedReader. */
- private static void createIndex(Directory dir, int numDocs, Random random) throws IOException {
- List<Integer> ids = new ArrayList<>();
- for (int i = 0; i < numDocs; i++) {
- ids.add(Integer.valueOf(i * 10));
- }
- // shuffle them for indexing
- Collections.shuffle(ids, random);
- if (VERBOSE) {
- System.out.println("Shuffled IDs for indexing: " + Arrays.toString(ids.toArray()));
- }
-
- PositionsTokenStream positions = new PositionsTokenStream();
- IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
- conf.setMaxBufferedDocs(4); // create some segments
- conf.setSimilarity(new NormsSimilarity(conf.getSimilarity())); // for testing norms field
- RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf);
- writer.setDoRandomForceMerge(false);
- for (int id : ids) {
- writer.addDocument(doc(id, positions));
- }
- // delete some documents
- writer.commit();
- for (Integer id : ids) {
- if (random.nextDouble() < 0.2) {
- if (VERBOSE) {
- System.out.println("delete doc_id " + id);
- }
- writer.deleteDocuments(new Term(ID_FIELD, id.toString()));
- }
- }
- writer.close();
- }
-
- @BeforeClass
- public static void beforeClassSorterTestBase() throws Exception {
- dir = newDirectory();
- int numDocs = atLeast(20);
- createIndex(dir, numDocs, random());
-
- unsortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
- }
-
- @AfterClass
- public static void afterClassSorterTestBase() throws Exception {
- unsortedReader.close();
- sortedReader.close();
- dir.close();
- unsortedReader = sortedReader = null;
- dir = null;
- }
-
- public void testBinaryDocValuesField() throws Exception {
- BinaryDocValues dv = sortedReader.getBinaryDocValues(BINARY_DV_FIELD);
- for (int i = 0; i < sortedReader.maxDoc(); i++) {
- final BytesRef bytes = dv.get(i);
- assertEquals("incorrect binary DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString());
- }
- }
-
- public void testDocsAndPositionsEnum() throws Exception {
- TermsEnum termsEnum = sortedReader.terms(DOC_POSITIONS_FIELD).iterator();
- assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
- PostingsEnum sortedPositions = termsEnum.postings(null, PostingsEnum.ALL);
- int doc;
-
- // test nextDoc()
- while ((doc = sortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- int freq = sortedPositions.freq();
- assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq);
- for (int i = 0; i < freq; i++) {
- assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition());
- assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset());
- assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset());
- assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString()));
- }
- }
-
- // test advance()
- final PostingsEnum reuse = sortedPositions;
- sortedPositions = termsEnum.postings(reuse, PostingsEnum.ALL);
- if (sortedPositions instanceof SortingDocsEnum) {
- assertTrue(((SortingDocsEnum) sortedPositions).reused(reuse)); // make sure reuse worked
- }
- doc = 0;
- while ((doc = sortedPositions.advance(doc + TestUtil.nextInt(random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) {
- int freq = sortedPositions.freq();
- assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq);
- for (int i = 0; i < freq; i++) {
- assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition());
- assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset());
- assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset());
- assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString()));
- }
- }
- }
-
- Bits randomLiveDocs(int maxDoc) {
- if (rarely()) {
- if (random().nextBoolean()) {
- return null;
- } else {
- return new Bits.MatchNoBits(maxDoc);
- }
- }
- final FixedBitSet bits = new FixedBitSet(maxDoc);
- final int bitsSet = TestUtil.nextInt(random(), 1, maxDoc - 1);
- for (int i = 0; i < bitsSet; ++i) {
- while (true) {
- final int index = random().nextInt(maxDoc);
- if (!bits.get(index)) {
- bits.set(index);
- break;
- }
- }
- }
- return bits;
- }
-
- public void testDocsEnum() throws Exception {
- TermsEnum termsEnum = sortedReader.terms(DOCS_ENUM_FIELD).iterator();
- assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOCS_ENUM_TERM)));
- PostingsEnum docs = termsEnum.postings(null);
-
- int doc;
- while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD)));
- }
-
- PostingsEnum reuse = docs;
- docs = termsEnum.postings(reuse);
- if (docs instanceof SortingDocsEnum) {
- assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
- }
- doc = -1;
- while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
- assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD)));
- }
- }
-
- public void testNormValues() throws Exception {
- NumericDocValues dv = sortedReader.getNormValues(NORMS_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- assertEquals("incorrect norm value for doc " + i, sortedValues[i].intValue(), dv.get(i));
- }
- }
-
- public void testNumericDocValuesField() throws Exception {
- NumericDocValues dv = sortedReader.getNumericDocValues(NUMERIC_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- assertEquals("incorrect numeric DocValues for doc " + i, sortedValues[i].intValue(), dv.get(i));
- }
- }
-
- public void testSortedDocValuesField() throws Exception {
- SortedDocValues dv = sortedReader.getSortedDocValues(SORTED_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- final BytesRef bytes = dv.get(i);
- assertEquals("incorrect sorted DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString());
- }
- }
-
- public void testSortedSetDocValuesField() throws Exception {
- SortedSetDocValues dv = sortedReader.getSortedSetDocValues(SORTED_SET_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- dv.setDocument(i);
- BytesRef bytes = dv.lookupOrd(dv.nextOrd());
- int value = sortedValues[i].intValue();
- assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString());
- bytes = dv.lookupOrd(dv.nextOrd());
- assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString());
- assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
- }
- }
-
- public void testSortedNumericDocValuesField() throws Exception {
- SortedNumericDocValues dv = sortedReader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- dv.setDocument(i);
- assertEquals(2, dv.count());
- int value = sortedValues[i].intValue();
- assertEquals("incorrect sorted-numeric DocValues for doc " + i, value, dv.valueAt(0));
- assertEquals("incorrect sorted-numeric DocValues for doc " + i, value + 1, dv.valueAt(1));
- }
- }
-
- public void testTermVectors() throws Exception {
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- Terms terms = sortedReader.getTermVector(i, TERM_VECTORS_FIELD);
- assertNotNull("term vectors not found for doc " + i + " field [" + TERM_VECTORS_FIELD + "]", terms);
- assertEquals("incorrect term vector for doc " + i, sortedValues[i].toString(), terms.iterator().next().utf8ToString());
- }
- }
-
- // TODO: index sorting doesn't yet support points
- /*
- public void testPoints() throws Exception {
- PointValues values = sortedReader.getPointValues();
- values.intersect(DIMENSIONAL_FIELD,
- new IntersectVisitor() {
- @Override
- public void visit(int docID) {
- throw new IllegalStateException();
- }
-
- @Override
- public void visit(int docID, byte[] packedValues) {
- assertEquals(sortedValues[docID].intValue(), NumericUtils.bytesToInt(packedValues, 0));
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
- return Relation.CELL_CROSSES_QUERY;
- }
- });
- }
- */
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java indexsort/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java
--- trunk/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java 2016-03-02 04:32:40.451807337 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.util.Arrays;
-
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.TestUtil;
-import org.junit.BeforeClass;
-
-public class SortingLeafReaderTest extends SorterTestBase {
-
- @BeforeClass
- public static void beforeClassSortingLeafReaderTest() throws Exception {
- // NOTE: index was created by by super's @BeforeClass
-
- // sort the index by id (as integer, in NUMERIC_DV_FIELD)
- Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
- final Sorter.DocMap docMap = new Sorter(sort).sort(unsortedReader);
-
- // Sorter.compute also sorts the values
- NumericDocValues dv = unsortedReader.getNumericDocValues(NUMERIC_DV_FIELD);
- sortedValues = new Integer[unsortedReader.maxDoc()];
- for (int i = 0; i < unsortedReader.maxDoc(); ++i) {
- sortedValues[docMap.oldToNew(i)] = (int)dv.get(i);
- }
- if (VERBOSE) {
- System.out.println("docMap: " + docMap);
- System.out.println("sortedValues: " + Arrays.toString(sortedValues));
- }
-
- // sort the index by id (as integer, in NUMERIC_DV_FIELD)
- sortedReader = SortingLeafReader.wrap(unsortedReader, sort);
-
- if (VERBOSE) {
- System.out.print("mapped-deleted-docs: ");
- Bits mappedLiveDocs = sortedReader.getLiveDocs();
- for (int i = 0; i < mappedLiveDocs.length(); i++) {
- if (!mappedLiveDocs.get(i)) {
- System.out.print(i + " ");
- }
- }
- System.out.println();
- }
-
- TestUtil.checkReader(sortedReader);
- }
-
- public void testBadSort() throws Exception {
- IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
- SortingLeafReader.wrap(sortedReader, Sort.RELEVANCE);
- });
- assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage());
- }
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java indexsort/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
--- trunk/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.search.BlockJoinComparatorSource;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitSet;
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestBlockJoinSorter extends LuceneTestCase {
-
- public void test() throws IOException {
- final int numParents = atLeast(200);
- IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
- cfg.setMergePolicy(newLogMergePolicy());
- final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), cfg);
- final Document parentDoc = new Document();
- final NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L);
- parentDoc.add(parentVal);
- final StringField parent = new StringField("parent", "true", Store.YES);
- parentDoc.add(parent);
- for (int i = 0; i < numParents; ++i) {
- List<Document> documents = new ArrayList<>();
- final int numChildren = random().nextInt(10);
- for (int j = 0; j < numChildren; ++j) {
- final Document childDoc = new Document();
- childDoc.add(new NumericDocValuesField("child_val", random().nextInt(5)));
- documents.add(childDoc);
- }
- parentVal.setLongValue(random().nextInt(50));
- documents.add(parentDoc);
- writer.addDocuments(documents);
- }
- writer.forceMerge(1);
- IndexReader indexReader = writer.getReader();
- writer.close();
-
- IndexSearcher searcher = newSearcher(indexReader);
- indexReader = searcher.getIndexReader(); // newSearcher may have wrapped it
- assertEquals(1, indexReader.leaves().size());
- final LeafReader reader = indexReader.leaves().get(0).reader();
- final Query parentsFilter = new TermQuery(new Term("parent", "true"));
-
- final Weight weight = searcher.createNormalizedWeight(parentsFilter, false);
- final Scorer parents = weight.scorer(indexReader.leaves().get(0));
- final BitSet parentBits = BitSet.of(parents.iterator(), reader.maxDoc());
- final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
- final NumericDocValues childValues = reader.getNumericDocValues("child_val");
-
- final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
- final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
-
- final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
- final Sorter sorter = new Sorter(sort);
- final Sorter.DocMap docMap = sorter.sort(reader);
- assertEquals(reader.maxDoc(), docMap.size());
-
- int[] children = new int[1];
- int numChildren = 0;
- int previousParent = -1;
- for (int i = 0; i < docMap.size(); ++i) {
- final int oldID = docMap.newToOld(i);
- if (parentBits.get(oldID)) {
- // check that we have the right children
- for (int j = 0; j < numChildren; ++j) {
- assertEquals(oldID, parentBits.nextSetBit(children[j]));
- }
- // check that children are sorted
- for (int j = 1; j < numChildren; ++j) {
- final int doc1 = children[j-1];
- final int doc2 = children[j];
- if (childValues.get(doc1) == childValues.get(doc2)) {
- assertTrue(doc1 < doc2); // sort is stable
- } else {
- assertTrue(childValues.get(doc1) < childValues.get(doc2));
- }
- }
- // check that parents are sorted
- if (previousParent != -1) {
- if (parentValues.get(previousParent) == parentValues.get(oldID)) {
- assertTrue(previousParent < oldID);
- } else {
- assertTrue(parentValues.get(previousParent) < parentValues.get(oldID));
- }
- }
- // reset
- previousParent = oldID;
- numChildren = 0;
- } else {
- children = ArrayUtil.grow(children, numChildren+1);
- children[numChildren++] = oldID;
- }
- }
- indexReader.close();
- writer.w.getDirectory().close();
- }
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java indexsort/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java
--- trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java 2016-03-02 04:32:40.451807337 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
-import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TieredMergePolicy;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-
-public class TestSortingMergePolicy extends BaseMergePolicyTestCase {
-
- private List<String> terms;
- private Directory dir1, dir2;
- private Sort sort;
- private boolean reversedSort;
- private IndexReader reader;
- private IndexReader sortedReader;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- final Boolean reverse = (random().nextBoolean() ? null : new Boolean(random().nextBoolean()));
- final SortField sort_field = (reverse == null
- ? new SortField("ndv", SortField.Type.LONG)
- : new SortField("ndv", SortField.Type.LONG, reverse.booleanValue()));
- sort = new Sort(sort_field);
- reversedSort = (null != reverse && reverse.booleanValue());
- createRandomIndexes();
- }
-
- private Document randomDocument() {
- final Document doc = new Document();
- doc.add(new NumericDocValuesField("ndv", random().nextLong()));
- doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
- return doc;
- }
-
- public MergePolicy mergePolicy() {
- return newSortingMergePolicy(sort);
- }
-
- public static SortingMergePolicy newSortingMergePolicy(Sort sort) {
- // usually create a MP with a low merge factor so that many merges happen
- MergePolicy mp;
- int thingToDo = random().nextInt(3);
- if (thingToDo == 0) {
- TieredMergePolicy tmp = newTieredMergePolicy(random());
- final int numSegs = TestUtil.nextInt(random(), 3, 5);
- tmp.setSegmentsPerTier(numSegs);
- tmp.setMaxMergeAtOnce(TestUtil.nextInt(random(), 2, numSegs));
- mp = tmp;
- } else if (thingToDo == 1) {
- LogMergePolicy lmp = newLogMergePolicy(random());
- lmp.setMergeFactor(TestUtil.nextInt(random(), 3, 5));
- mp = lmp;
- } else {
- // just a regular random one from LTC (could be alcoholic etc)
- mp = newMergePolicy();
- }
- // wrap it with a sorting mp
- if (VERBOSE) {
- System.out.println("TEST: return SortingMergePolicy(mp=" + mp + " sort=" + sort + ")");
- }
- return new SortingMergePolicy(mp, sort);
- }
-
- private void createRandomIndexes() throws IOException {
- dir1 = newDirectory();
- dir2 = newDirectory();
- final int numDocs = atLeast(150);
- final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
- Set<String> randomTerms = new HashSet<>();
- while (randomTerms.size() < numTerms) {
- randomTerms.add(TestUtil.randomSimpleString(random()));
- }
- terms = new ArrayList<>(randomTerms);
- final long seed = random().nextLong();
- final IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
- final IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
- iwc2.setMergePolicy(mergePolicy());
- final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
- final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
- for (int i = 0; i < numDocs; ++i) {
- if (random().nextInt(5) == 0 && i != numDocs - 1) {
- final String term = RandomPicks.randomFrom(random(), terms);
- iw1.deleteDocuments(new Term("s", term));
- iw2.deleteDocuments(new Term("s", term));
- }
- final Document doc = randomDocument();
- iw1.addDocument(doc);
- iw2.addDocument(doc);
- if (random().nextInt(8) == 0) {
- iw1.commit();
- iw2.commit();
- }
- }
- // Make sure we have something to merge
- iw1.commit();
- iw2.commit();
- final Document doc = randomDocument();
- // NOTE: don't use RIW.addDocument directly, since it sometimes commits
- // which may trigger a merge, at which case forceMerge may not do anything.
- // With field updates this is a problem, since the updates can go into the
- // single segment in the index, and threefore the index won't be sorted.
- // This hurts the assumption of the test later on, that the index is sorted
- // by SortingMP.
- iw1.w.addDocument(doc);
- iw2.w.addDocument(doc);
-
- // update NDV of docs belonging to one term (covers many documents)
- final long value = random().nextLong();
- final String term = RandomPicks.randomFrom(random(), terms);
- iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value);
- iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value);
-
- iw1.forceMerge(1);
- iw2.forceMerge(1);
- iw1.close();
- iw2.close();
- reader = DirectoryReader.open(dir1);
- sortedReader = DirectoryReader.open(dir2);
- }
-
- @Override
- public void tearDown() throws Exception {
- reader.close();
- sortedReader.close();
- dir1.close();
- dir2.close();
- super.tearDown();
- }
-
- private static void assertSorted(LeafReader reader, boolean reverse) throws IOException {
- final NumericDocValues ndv = reader.getNumericDocValues("ndv");
- for (int i = 1; i < reader.maxDoc(); ++i) {
- final int lhs = (!reverse ? i-1 : i);
- final int rhs = (!reverse ? i : i-1);
- assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i)+",reverse="+reverse, ndv.get(lhs) <= ndv.get(rhs));
- }
- }
-
- public void testSortingMP() throws IOException {
- final LeafReader sortedReader1 = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
- final LeafReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
-
- assertSorted(sortedReader1, reversedSort);
- assertSorted(sortedReader2, reversedSort);
-
- assertReaderEquals("", sortedReader1, sortedReader2);
- }
-
- public void testBadSort() throws Exception {
- IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
- new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE);
- });
- assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage());
- }
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java indexsort/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java
--- trunk/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java 2016-05-10 05:44:23.752471119 -0400
@@ -32,9 +32,9 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -367,8 +367,7 @@
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
- LeafReader ar = SlowCompositeReaderWrapper.wrap(reader);
- artistDocValues = ar.getSortedDocValues("artist");
+ artistDocValues = MultiDocValues.getSortedValues(reader, "artist");
// All searches sort by song popularity
final Similarity base = searcher.getSimilarity(true);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java indexsort/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
--- trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 2016-02-16 11:18:34.753021816 -0500
+++ indexsort/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java 1969-12-31 19:00:00.000000000 -0500
@@ -1,305 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.ExitableDirectoryReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.QueryTimeout;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SerialMergeScheduler;
-import org.apache.lucene.index.SortingMergePolicy;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TestSortingMergePolicy;
-import org.apache.lucene.search.LeafCollector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopFieldCollector;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.uninverting.UninvertingReader;
-import org.apache.lucene.uninverting.UninvertingReader.Type;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-
-public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
-
- private int numDocs;
- private List<String> terms;
- private Directory dir;
- private Sort sort;
- private RandomIndexWriter iw;
- private IndexReader reader;
- private SortingMergePolicy mergePolicy;
- private final int forceMergeMaxSegmentCount = 5;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
- }
-
- private Document randomDocument() {
- final Document doc = new Document();
- doc.add(new NumericDocValuesField("ndv1", random().nextInt(10)));
- doc.add(new NumericDocValuesField("ndv2", random().nextInt(10)));
- doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
- return doc;
- }
-
- private void createRandomIndex(boolean singleSortedSegment) throws IOException {
- dir = newDirectory();
- numDocs = atLeast(150);
- final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
- Set<String> randomTerms = new HashSet<>();
- while (randomTerms.size() < numTerms) {
- randomTerms.add(TestUtil.randomSimpleString(random()));
- }
- terms = new ArrayList<>(randomTerms);
- final long seed = random().nextLong();
- final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
- iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
- mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort);
- iwc.setMergePolicy(mergePolicy);
- iw = new RandomIndexWriter(new Random(seed), dir, iwc);
- iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
- for (int i = 0; i < numDocs; ++i) {
- final Document doc = randomDocument();
- iw.addDocument(doc);
- if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
- iw.commit();
- }
- if (random().nextInt(15) == 0) {
- final String term = RandomPicks.randomFrom(random(), terms);
- iw.deleteDocuments(new Term("s", term));
- }
- }
- if (singleSortedSegment) {
- // because of deletions, there might still be a single flush segment in
- // the index, although want want a sorted segment so it needs to be merged
- iw.getReader().close(); // refresh
- iw.addDocument(new Document());
- iw.commit();
- iw.addDocument(new Document());
- iw.forceMerge(1);
- }
- else if (random().nextBoolean()) {
- iw.forceMerge(forceMergeMaxSegmentCount);
- }
- reader = iw.getReader();
- }
-
- private void closeIndex() throws IOException {
- reader.close();
- iw.close();
- dir.close();
- }
-
- public void testEarlyTermination() throws IOException {
- final int iters = atLeast(8);
- for (int i = 0; i < iters; ++i) {
- createRandomIndex(false);
- for (int j = 0; j < iters; ++j) {
- final IndexSearcher searcher = newSearcher(reader);
- final int numHits = TestUtil.nextInt(random(), 1, numDocs);
- final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false));
- final boolean fillFields = random().nextBoolean();
- final boolean trackDocScores = random().nextBoolean();
- final boolean trackMaxScore = random().nextBoolean();
- final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
- final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
-
- final Query query;
- if (random().nextBoolean()) {
- query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
- } else {
- query = new MatchAllDocsQuery();
- }
- searcher.search(query, collector1);
- searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort()));
- assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
- assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
- }
- closeIndex();
- }
- }
-
- public void testCanEarlyTerminate() {
- assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG)),
- new Sort(new SortField("a", SortField.Type.LONG))));
-
- assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
-
- assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG)),
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
-
- assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG, true)),
- new Sort(new SortField("a", SortField.Type.LONG, false))));
-
- assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
- new Sort(new SortField("a", SortField.Type.LONG))));
-
- assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
-
- assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
- new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
- new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
- }
-
- public void testEarlyTerminationDifferentSorter() throws IOException {
- createRandomIndex(false);
- final int iters = atLeast(3);
- for (int i = 0; i < iters; ++i) {
- final IndexSearcher searcher = newSearcher(reader);
- // test that the collector works correctly when the index was sorted by a
- // different sorter than the one specified in the ctor.
- final int numHits = TestUtil.nextInt(random(), 1, numDocs);
- final Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false));
- final boolean fillFields = random().nextBoolean();
- final boolean trackDocScores = random().nextBoolean();
- final boolean trackMaxScore = random().nextBoolean();
- final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
- final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
-
- final Query query;
- if (random().nextBoolean()) {
- query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
- } else {
- query = new MatchAllDocsQuery();
- }
- searcher.search(query, collector1);
- Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
-
- searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) {
- @Override
- public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- final LeafCollector ret = super.getLeafCollector(context);
- assertTrue("segment should not be recognized as sorted as different sorter was used", ret.getClass() == in.getLeafCollector(context).getClass());
- return ret;
- }
- });
- assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
- assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
- }
- closeIndex();
- }
-
- private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) {
- assertEquals(scoreDocs1.length, scoreDocs2.length);
- for (int i = 0; i < scoreDocs1.length; ++i) {
- final ScoreDoc scoreDoc1 = scoreDocs1[i];
- final ScoreDoc scoreDoc2 = scoreDocs2[i];
- assertEquals(scoreDoc1.doc, scoreDoc2.doc);
- assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f);
- }
- }
-
- private class TestTerminatedEarlySimpleCollector extends SimpleCollector {
- private boolean collectedSomething;
- public boolean collectedSomething() {
- return collectedSomething;
- }
- @Override
- public void collect(int doc) throws IOException {
- collectedSomething = true;
- }
- @Override
- public boolean needsScores() {
- return false;
- }
- }
-
- private class TestEarlyTerminatingSortingcollectorQueryTimeout implements QueryTimeout {
- final private boolean shouldExit;
- public TestEarlyTerminatingSortingcollectorQueryTimeout(boolean shouldExit) {
- this.shouldExit = shouldExit;
- }
- public boolean shouldExit() {
- return shouldExit;
- }
- }
-
- private IndexSearcher newSearcherForTestTerminatedEarly(IndexReader r) throws IOException {
- switch(random().nextInt(2)) {
- case 0:
- return new IndexSearcher(r);
- case 1:
- assertTrue(r+" is not a DirectoryReader", (r instanceof DirectoryReader));
- final DirectoryReader directoryReader = ExitableDirectoryReader.wrap(
- UninvertingReader.wrap((DirectoryReader) r, new HashMap<String,Type>()),
- new TestEarlyTerminatingSortingcollectorQueryTimeout(false));
- return new IndexSearcher(directoryReader);
- }
- fail("newSearcherForTestTerminatedEarly("+r+") fell through switch");
- return null;
- }
-
- public void testTerminatedEarly() throws IOException {
- final int iters = atLeast(8);
- for (int i = 0; i < iters; ++i) {
- createRandomIndex(true);
-
- final IndexSearcher searcher = newSearcherForTestTerminatedEarly(reader); // future TODO: use newSearcher(reader);
- final Query query = new MatchAllDocsQuery(); // search for everything/anything
-
- final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector();
- searcher.search(query, collector1);
-
- final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector();
- final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1, mergePolicy.getSort());
- searcher.search(query, etsCollector);
-
- assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething());
-
- if (collector1.collectedSomething()) {
- // we collected something and since we modestly asked for just one document we should have terminated early
- assertTrue("should have terminated early (searcher.reader="+searcher.reader+")", etsCollector.terminatedEarly());
- }
- closeIndex();
- }
- }
-
-}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java indexsort/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java
--- trunk/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java 2016-05-03 07:31:51.560971608 -0400
+++ indexsort/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java 2016-05-10 05:44:23.756471119 -0400
@@ -247,7 +247,7 @@
private IndexWriterConfig getIndexWriterConfig() {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(Codec.forName("Lucene60"));
+ iwc.setCodec(Codec.forName("Lucene62"));
return iwc;
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java indexsort/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java
--- trunk/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java 2016-05-03 07:31:51.564971608 -0400
+++ indexsort/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java 2016-05-10 05:44:23.756471119 -0400
@@ -85,14 +85,14 @@
public class TestGeo3DPoint extends LuceneTestCase {
private static Codec getCodec() {
- if (Codec.getDefault().getName().equals("Lucene60")) {
+ if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
- return new FilterCodec("Lucene60", Codec.getDefault()) {
+ return new FilterCodec("Lucene62", Codec.getDefault()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java indexsort/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
--- trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java 2016-02-16 11:18:34.833021818 -0500
+++ indexsort/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java 2016-05-10 05:44:23.756471119 -0400
@@ -56,7 +56,6 @@
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
@@ -232,7 +231,7 @@
// This way all merged segments will be sorted at
// merge time, allow for per-segment early termination
// when those segments are searched:
- iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
+ iwc.setIndexSort(SORT);
return iwc;
}
@@ -586,10 +585,9 @@
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
- final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy();
- Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort());
- IndexSearcher searcher = searcherMgr.acquire();
+ Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List<LookupResult> results = null;
+ IndexSearcher searcher = searcherMgr.acquire();
try {
//System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
--- trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java 2016-03-08 17:22:26.848938631 -0500
+++ indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java 2016-05-10 05:44:23.756471119 -0400
@@ -66,7 +66,7 @@
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
- assertEquals(new BytesRef("foobaz"), results.get(0).payload);
+ assertEquals("foobaz", results.get(0).payload.utf8ToString());
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
--- trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java 2016-03-08 17:22:26.848938631 -0500
+++ indexsort/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java 2016-05-10 05:44:23.756471119 -0400
@@ -32,7 +32,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -646,7 +646,7 @@
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
- Codec filterCodec = new Lucene60Codec() {
+ Codec filterCodec = new Lucene62Codec() {
PostingsFormat postingsFormat = new Completion50PostingsFormat();
@Override
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java 2016-05-03 07:31:51.564971608 -0400
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java 2016-05-10 05:44:23.756471119 -0400
@@ -1242,7 +1242,7 @@
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
- iwc.setCodec(new FilterCodec("Lucene60", TestUtil.getDefaultCodec()) {
+ iwc.setCodec(new FilterCodec("Lucene62", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java 2016-03-02 04:32:40.483807337 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400
@@ -627,7 +627,7 @@
/** Returns a new fake segment */
protected static SegmentInfo newSegmentInfo(Directory dir, String name) {
- return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
}
/** Creates a file of the specified size with random data. */
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java 2016-03-02 04:32:40.483807337 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400
@@ -347,7 +347,7 @@
/** Returns a new fake segment */
protected static SegmentInfo newSegmentInfo(Directory dir, String name) {
- return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
}
@Override
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java 2016-03-13 05:38:07.395183845 -0400
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400
@@ -303,7 +303,7 @@
Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
Codec codec = getCodec();
- SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(),
proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(),
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java 2016-03-02 04:32:40.483807337 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java 2016-05-10 05:44:23.756471119 -0400
@@ -26,7 +26,8 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
@@ -52,7 +53,7 @@
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -66,7 +67,7 @@
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
Set<String> originalFiles = Collections.singleton("_123.a");
info.setFiles(originalFiles);
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
@@ -95,7 +96,7 @@
diagnostics.put("key1", "value1");
diagnostics.put("key2", "value2");
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- diagnostics, id, new HashMap<>());
+ diagnostics, id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -118,7 +119,7 @@
attributes.put("key1", "value1");
attributes.put("key2", "value2");
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, attributes);
+ Collections.emptyMap(), id, attributes, null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -138,7 +139,7 @@
Directory dir = newDirectory();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -153,7 +154,7 @@
Directory dir = newDirectory();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, v, "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -161,7 +162,57 @@
dir.close();
}
}
-
+
+ protected boolean supportsIndexSort() {
+ return true;
+ }
+
+ /** Test sort */
+ public void testSort() throws IOException {
+ assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort());
+
+ final int iters = atLeast(5);
+ for (int i = 0; i < iters; ++i) {
+ Sort sort;
+ if (i == 0) {
+ sort = null;
+ } else {
+ final int numSortFields = TestUtil.nextInt(random(), 1, 3);
+ SortField[] sortFields = new SortField[numSortFields];
+ for (int j = 0; j < numSortFields; ++j) {
+ sortFields[j] = new SortField(
+ TestUtil.randomSimpleString(random()),
+ random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING,
+ random().nextBoolean());
+ if (random().nextBoolean()) {
+ switch (sortFields[j].getType()) {
+ case LONG:
+ sortFields[j].setMissingValue(random().nextLong());
+ break;
+ case STRING:
+ sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST);
+ break;
+ default:
+ fail();
+ }
+ }
+ }
+ sort = new Sort(sortFields);
+ }
+
+ Directory dir = newDirectory();
+ Codec codec = getCodec();
+ byte id[] = StringHelper.randomId();
+ SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), sort);
+ info.setFiles(Collections.<String>emptySet());
+ codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
+ SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
+ assertEquals(sort, info2.getIndexSort());
+ dir.close();
+ }
+ }
+
/**
* Test segment infos write that hits exception immediately on open.
* make sure we get our exception back, no file handle leaks, etc.
@@ -183,7 +234,7 @@
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
fail.setDoFail();
@@ -216,7 +267,7 @@
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
fail.setDoFail();
@@ -249,7 +300,7 @@
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
@@ -283,7 +334,7 @@
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.<String,String>emptyMap(), id, new HashMap<>());
+ Collections.<String,String>emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
@@ -332,7 +383,7 @@
TestUtil.randomUnicodeString(random()));
}
- SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes);
+ SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null);
info.setFiles(files);
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT);
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java 2016-03-08 17:22:26.848938631 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java 2016-05-10 05:44:23.756471119 -0400
@@ -138,7 +138,6 @@
static class MockRandomOneMerge extends OneMerge {
final Random r;
- ArrayList<CodecReader> readers;
MockRandomOneMerge(List<SegmentCommitInfo> segments, long seed) {
super(segments);
@@ -146,34 +145,31 @@
}
@Override
- public List<CodecReader> getMergeReaders() throws IOException {
- if (readers == null) {
- readers = new ArrayList<CodecReader>(super.getMergeReaders());
- for (int i = 0; i < readers.size(); i++) {
- // wrap it (e.g. prevent bulk merge etc)
- // TODO: cut this over to FilterCodecReader api, we can explicitly
- // enable/disable bulk merge for portions of the index we want.
- int thingToDo = r.nextInt(7);
- if (thingToDo == 0) {
- // simple no-op FilterReader
- if (LuceneTestCase.VERBOSE) {
- System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + readers.get(i));
- }
- readers.set(i, SlowCodecReaderWrapper.wrap(new FilterLeafReader(readers.get(i)) {}));
- } else if (thingToDo == 1) {
- // renumber fields
- // NOTE: currently this only "blocks" bulk merges just by
- // being a FilterReader. But it might find bugs elsewhere,
- // and maybe the situation can be improved in the future.
- if (LuceneTestCase.VERBOSE) {
- System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + readers.get(i));
- }
- readers.set(i, SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(readers.get(i), r)));
- }
- // otherwise, reader is unchanged
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+
+ // wrap it (e.g. prevent bulk merge etc)
+ // TODO: cut this over to FilterCodecReader api, we can explicitly
+ // enable/disable bulk merge for portions of the index we want.
+ int thingToDo = r.nextInt(7);
+ if (thingToDo == 0) {
+ // simple no-op FilterReader
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + reader);
+ }
+ return SlowCodecReaderWrapper.wrap(new FilterLeafReader(reader) {});
+ } else if (thingToDo == 1) {
+ // renumber fields
+ // NOTE: currently this only "blocks" bulk merges just by
+ // being a FilterReader. But it might find bugs elsewhere,
+ // and maybe the situation can be improved in the future.
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader);
}
+ return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(reader, r));
+ } else {
+ // otherwise, reader is unchanged
+ return reader;
}
- return readers;
}
}
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java indexsort/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java 2016-03-02 04:32:40.483807337 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java 2016-05-10 05:44:23.756471119 -0400
@@ -611,7 +611,7 @@
// maxAllowed = the "highest" we can index, but we will still
// randomly index at lower IndexOption
public FieldsProducer buildIndex(Codec codec, Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
- SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
if (LuceneTestCase.VERBOSE) {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java indexsort/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java 2016-03-13 05:38:07.399183845 -0400
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java 2016-05-10 05:44:23.756471119 -0400
@@ -283,6 +283,11 @@
@Override
protected void doClose() throws IOException {}
+
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
};
}
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java 2016-02-16 11:18:34.853021818 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java 2016-05-10 05:44:23.756471119 -0400
@@ -32,7 +32,7 @@
import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
import org.apache.lucene.codecs.compressing.CompressingCodec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.RandomCodec;
@@ -181,8 +181,8 @@
codec = new AssertingCodec();
} else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
codec = CompressingCodec.randomInstance(random);
- } else if ("Lucene60".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene60"))) {
- codec = new Lucene60Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()));
+ } else if ("Lucene62".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene62"))) {
+ codec = new Lucene62Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()));
} else if (!"random".equals(TEST_CODEC)) {
codec = Codec.forName(TEST_CODEC);
} else if ("random".equals(TEST_POSTINGSFORMAT)) {
diff -ruN -x .svn -x .git -x build -x dist -x .caches -x .idea -x idea-build -x eclipse-build -x .settings trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
--- trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java 2016-03-10 16:23:24.703676109 -0500
+++ indexsort/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java 2016-05-10 05:44:23.756471119 -0400
@@ -54,7 +54,7 @@
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
@@ -911,7 +911,7 @@
* This may be different than {@link Codec#getDefault()} because that is randomized.
*/
public static Codec getDefaultCodec() {
- return new Lucene60Codec();
+ return new Lucene62Codec();
}
/**