| Index: lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java |
| =================================================================== |
| --- lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java (revision 1344053) |
| +++ lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java (working copy) |
| @@ -48,7 +48,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| - boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseDV = true; |
| boolean useDv = canUseDV && random().nextBoolean(); |
| |
| // 0 |
| @@ -360,7 +360,7 @@ |
| new MockAnalyzer(random) |
| ) |
| ); |
| - boolean canUseDV = !"Lucene3x".equals(writer.w.getConfig().getCodec().getName()); |
| + boolean canUseDV = true; |
| boolean useDv = canUseDV && random.nextBoolean(); |
| |
| Document doc = new Document(); |
| Index: lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java |
| =================================================================== |
| --- lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java (revision 1344053) |
| +++ lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java (working copy) |
| @@ -61,7 +61,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); |
| - boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseDV = true; |
| DocValues.Type dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null; |
| |
| Document doc = new Document(); |
| @@ -417,7 +417,7 @@ |
| new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()) |
| ); |
| |
| - boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseDV = true; |
| DocValues.Type dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null; |
| |
| int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER; |
| Index: lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java |
| =================================================================== |
| --- lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1344053) |
| +++ lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (working copy) |
| @@ -73,7 +73,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| - boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseIDV = true; |
| // 0 |
| Document doc = new Document(); |
| addGroupField(doc, groupField, "author1", canUseIDV); |
| @@ -701,8 +701,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random()))); |
| - final boolean preFlex = "Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| - boolean canUseIDV = !preFlex; |
| + boolean canUseIDV = true; |
| |
| Document doc = new Document(); |
| Document docNoGroup = new Document(); |
| @@ -778,7 +777,7 @@ |
| if (SlowCompositeReaderWrapper.class.isAssignableFrom(s.getIndexReader().getClass())) { |
| canUseIDV = false; |
| } else { |
| - canUseIDV = !preFlex; |
| + canUseIDV = true; |
| } |
| final ShardState shards = new ShardState(s); |
| |
| @@ -953,7 +952,7 @@ |
| |
| ValueHolder<Boolean> idvBasedImplsUsedSharded = new ValueHolder<Boolean>(false); |
| final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, |
| - groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, canUseIDV, preFlex, idvBasedImplsUsedSharded); |
| + groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, canUseIDV, false, idvBasedImplsUsedSharded); |
| final AbstractSecondPassGroupingCollector<?> c2; |
| if (topGroups != null) { |
| |
| Index: lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java |
| =================================================================== |
| --- lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision 1344053) |
| +++ lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (working copy) |
| @@ -50,7 +50,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| - boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseIDV = true; |
| |
| // 0 |
| Document doc = new Document(); |
| Index: lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java |
| =================================================================== |
| --- lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java (revision 1344053) |
| +++ lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java (working copy) |
| @@ -51,7 +51,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| - boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseIDV = true; |
| List<Document> documents = new ArrayList<Document>(); |
| // 0 |
| Document doc = new Document(); |
| Index: lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java |
| =================================================================== |
| --- lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (revision 1344053) |
| +++ lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (working copy) |
| @@ -54,7 +54,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| - boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| + boolean canUseIDV = true; |
| Type valueType = vts[random().nextInt(vts.length)]; |
| |
| // 0 |
| @@ -202,8 +202,7 @@ |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random()))); |
| - boolean preFlex = "Lucene3x".equals(w.w.getConfig().getCodec().getName()); |
| - boolean canUseIDV = !preFlex; |
| + boolean canUseIDV = true; |
| Type valueType = vts[random().nextInt(vts.length)]; |
| |
| Document doc = new Document(); |
| @@ -301,7 +300,7 @@ |
| if (SlowCompositeReaderWrapper.class.isAssignableFrom(s.getIndexReader().getClass())) { |
| canUseIDV = false; |
| } else { |
| - canUseIDV = !preFlex; |
| + canUseIDV = true; |
| } |
| |
| for (int contentID = 0; contentID < 3; contentID++) { |
| Index: lucene/misc/src/test/org/apache/lucene/index/TestBalancedSegmentMergePolicy.java |
| =================================================================== |
| --- lucene/misc/src/test/org/apache/lucene/index/TestBalancedSegmentMergePolicy.java (revision 1344053) |
| +++ lucene/misc/src/test/org/apache/lucene/index/TestBalancedSegmentMergePolicy.java (working copy) |
| @@ -44,7 +44,7 @@ |
| mp.setMergePolicyParams(newMergePolicyParams(random())); |
| iwc.setMergePolicy(mp); |
| iw = new RandomIndexWriter(random(), dir, iwc); |
| - LineFileDocs docs = new LineFileDocs(random(), !Codec.getDefault().getName().equals("Lucene3x")); |
| + LineFileDocs docs = new LineFileDocs(random(), true); |
| int numDocs = atLeast(200); |
| for (int i = 0; i < numDocs; i++) { |
| iw.addDocument(docs.nextDoc()); |
| Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (working copy) |
| @@ -206,7 +206,7 @@ |
| DocValues.Type[] values = DocValues.Type.values(); |
| DocValues.Type type = values[r.nextInt(values.length)]; |
| String name = "random_" + type.name() + "" + docValuesFieldPrefix; |
| - if ("Lucene3x".equals(codec.getName()) || doc.getField(name) != null) { |
| + if (doc.getField(name) != null) { |
| return; |
| } |
| final Field f; |
| @@ -403,10 +403,7 @@ |
| if (r.nextInt(20) == 2) { |
| doRandomForceMerge(); |
| } |
| - // If we are writing with PreFlexRW, force a full |
| - // IndexReader.open so terms are sorted in codepoint |
| - // order during searching: |
| - if (!applyDeletions || !codec.getName().equals("Lucene3x") && r.nextBoolean()) { |
| + if (!applyDeletions || r.nextBoolean()) { |
| if (LuceneTestCase.VERBOSE) { |
| System.out.println("RIW.getReader: use NRT reader"); |
| } |
| Index: lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java (working copy) |
| @@ -415,7 +415,7 @@ |
| final long t0 = System.currentTimeMillis(); |
| |
| Random random = new Random(random().nextLong()); |
| - final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random, true); |
| final File tempDir = _TestUtil.getTempDir(testName); |
| dir = newFSDirectory(tempDir); |
| ((MockDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves. |
| @@ -620,12 +620,10 @@ |
| private int runQuery(IndexSearcher s, Query q) throws Exception { |
| s.search(q, 10); |
| int hitCount = s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits; |
| - if (defaultCodecSupportsDocValues()) { |
| - final Sort dvSort = new Sort(new SortField("title", SortField.Type.STRING)); |
| - dvSort.getSort()[0].setUseIndexValues(true); |
| - int hitCount2 = s.search(q, null, 10, dvSort).totalHits; |
| - assertEquals(hitCount, hitCount2); |
| - } |
| + final Sort dvSort = new Sort(new SortField("title", SortField.Type.STRING)); |
| + dvSort.getSort()[0].setUseIndexValues(true); |
| + int hitCount2 = s.search(q, null, 10, dvSort).totalHits; |
| + assertEquals(hitCount, hitCount2); |
| return hitCount; |
| } |
| |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (working copy) |
| @@ -900,10 +900,6 @@ |
| } |
| } |
| |
| - public static boolean defaultCodecSupportsDocValues() { |
| - return !Codec.getDefault().getName().equals("Lucene3x"); |
| - } |
| - |
| private static Directory newFSDirectoryImpl( |
| Class<? extends FSDirectory> clazz, File file) |
| throws IOException { |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java (working copy) |
| @@ -13,7 +13,6 @@ |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.appending.AppendingCodec; |
| -import org.apache.lucene.codecs.lucene3x.PreFlexRWCodec; |
| import org.apache.lucene.codecs.lucene40.Lucene40Codec; |
| import org.apache.lucene.codecs.simpletext.SimpleTextCodec; |
| import org.apache.lucene.index.RandomCodec; |
| @@ -91,6 +90,7 @@ |
| modifiableServicesField.setAccessible(true); |
| @SuppressWarnings({"unchecked","rawtypes"}) final Map<String,Codec> serviceMap = |
| (Map) modifiableServicesField.get(spiLoader); |
| + /* note: re-enable this if we make a Lucene4x impersonator |
| if (!(Codec.forName("Lucene3x") instanceof PreFlexRWCodec)) { |
| if (Constants.JAVA_VENDOR.startsWith("IBM")) { |
| // definitely a buggy version |
| @@ -103,7 +103,7 @@ |
| " and does not respect classpath order, please report this to the vendor."); |
| } |
| serviceMap.put("Lucene3x", new PreFlexRWCodec()); |
| - } |
| + } */ |
| } catch (Exception e) { |
| throw new RuntimeException("Cannot access internals of Codec and NamedSPILoader classes", e); |
| } |
| @@ -156,11 +156,12 @@ |
| savedCodec = Codec.getDefault(); |
| final Codec codec; |
| int randomVal = random.nextInt(10); |
| - if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal < 2 && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup |
| + /* note: re-enable this if we make a 4.x impersonator |
| + * if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal < 2 && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup |
| codec = Codec.forName("Lucene3x"); |
| assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; |
| PREFLEX_IMPERSONATION_IS_ACTIVE = true; |
| - } else if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && !shouldAvoidCodec("SimpleText"))) { |
| + } else */if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && !shouldAvoidCodec("SimpleText"))) { |
| codec = new SimpleTextCodec(); |
| } else if ("Appending".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("Appending"))) { |
| codec = new AppendingCodec(); |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSegmentInfoFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSegmentInfoFormat.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSegmentInfoFormat.java (working copy) |
| @@ -1,32 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -import org.apache.lucene.codecs.SegmentInfoWriter; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -/** |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWSegmentInfoFormat extends Lucene3xSegmentInfoFormat { |
| - private final SegmentInfoWriter writer = new PreFlexRWSegmentInfoWriter(); |
| - |
| - @Override |
| - public SegmentInfoWriter getSegmentInfosWriter() { |
| - return writer; |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosFormat.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosFormat.java (working copy) |
| @@ -1,41 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.FieldInfosReader; |
| -import org.apache.lucene.codecs.FieldInfosWriter; |
| - |
| -/** |
| - * |
| - * @lucene.internal |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWFieldInfosFormat extends Lucene3xFieldInfosFormat { |
| - |
| - @Override |
| - public FieldInfosReader getFieldInfosReader() throws IOException { |
| - return new PreFlexRWFieldInfosReader(); |
| - } |
| - |
| - @Override |
| - public FieldInfosWriter getFieldInfosWriter() throws IOException { |
| - return new PreFlexRWFieldInfosWriter(); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSkipListWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSkipListWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSkipListWriter.java (working copy) |
| @@ -1,127 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Arrays; |
| - |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.codecs.MultiLevelSkipListWriter; |
| - |
| - |
| -/** |
| - * PreFlexRW skiplist implementation. |
| - * @lucene.experimental |
| - */ |
| -public class PreFlexRWSkipListWriter extends MultiLevelSkipListWriter { |
| - private int[] lastSkipDoc; |
| - private int[] lastSkipPayloadLength; |
| - private long[] lastSkipFreqPointer; |
| - private long[] lastSkipProxPointer; |
| - |
| - private IndexOutput freqOutput; |
| - private IndexOutput proxOutput; |
| - |
| - private int curDoc; |
| - private boolean curStorePayloads; |
| - private int curPayloadLength; |
| - private long curFreqPointer; |
| - private long curProxPointer; |
| - |
| - public PreFlexRWSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) { |
| - super(skipInterval, numberOfSkipLevels, docCount); |
| - this.freqOutput = freqOutput; |
| - this.proxOutput = proxOutput; |
| - |
| - lastSkipDoc = new int[numberOfSkipLevels]; |
| - lastSkipPayloadLength = new int[numberOfSkipLevels]; |
| - lastSkipFreqPointer = new long[numberOfSkipLevels]; |
| - lastSkipProxPointer = new long[numberOfSkipLevels]; |
| - } |
| - |
| - /** |
| - * Sets the values for the current skip data. |
| - */ |
| - public void setSkipData(int doc, boolean storePayloads, int payloadLength) { |
| - this.curDoc = doc; |
| - this.curStorePayloads = storePayloads; |
| - this.curPayloadLength = payloadLength; |
| - this.curFreqPointer = freqOutput.getFilePointer(); |
| - if (proxOutput != null) |
| - this.curProxPointer = proxOutput.getFilePointer(); |
| - } |
| - |
| - @Override |
| - public void resetSkip() { |
| - super.resetSkip(); |
| - Arrays.fill(lastSkipDoc, 0); |
| - Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list |
| - Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer()); |
| - if (proxOutput != null) |
| - Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer()); |
| - } |
| - |
| - @Override |
| - protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException { |
| - // To efficiently store payloads in the posting lists we do not store the length of |
| - // every payload. Instead we omit the length for a payload if the previous payload had |
| - // the same length. |
| - // However, in order to support skipping the payload length at every skip point must be known. |
| - // So we use the same length encoding that we use for the posting lists for the skip data as well: |
| - // Case 1: current field does not store payloads |
| - // SkipDatum --> DocSkip, FreqSkip, ProxSkip |
| - // DocSkip,FreqSkip,ProxSkip --> VInt |
| - // DocSkip records the document number before every SkipInterval th document in TermFreqs. |
| - // Document numbers are represented as differences from the previous value in the sequence. |
| - // Case 2: current field stores payloads |
| - // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip |
| - // DocSkip,FreqSkip,ProxSkip --> VInt |
| - // PayloadLength --> VInt |
| - // In this case DocSkip/2 is the difference between |
| - // the current and the previous value. If DocSkip |
| - // is odd, then a PayloadLength encoded as VInt follows, |
| - // if DocSkip is even, then it is assumed that the |
| - // current payload length equals the length at the previous |
| - // skip point |
| - if (curStorePayloads) { |
| - int delta = curDoc - lastSkipDoc[level]; |
| - if (curPayloadLength == lastSkipPayloadLength[level]) { |
| - // the current payload length equals the length at the previous skip point, |
| - // so we don't store the length again |
| - skipBuffer.writeVInt(delta * 2); |
| - } else { |
| - // the payload length is different from the previous one. We shift the DocSkip, |
| - // set the lowest bit and store the current payload length as VInt. |
| - skipBuffer.writeVInt(delta * 2 + 1); |
| - skipBuffer.writeVInt(curPayloadLength); |
| - lastSkipPayloadLength[level] = curPayloadLength; |
| - } |
| - } else { |
| - // current field does not store payloads |
| - skipBuffer.writeVInt(curDoc - lastSkipDoc[level]); |
| - } |
| - |
| - skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level])); |
| - skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level])); |
| - |
| - lastSkipDoc[level] = curDoc; |
| - |
| - lastSkipFreqPointer[level] = curFreqPointer; |
| - lastSkipProxPointer[level] = curProxPointer; |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWStoredFieldsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWStoredFieldsFormat.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWStoredFieldsFormat.java (working copy) |
| @@ -1,33 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.StoredFieldsWriter; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| - |
| -class PreFlexRWStoredFieldsFormat extends Lucene3xStoredFieldsFormat { |
| - |
| - @Override |
| - public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException { |
| - return new PreFlexRWStoredFieldsWriter(directory, segmentInfo.name, context); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java (working copy) |
| @@ -1,224 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Comparator; |
| - |
| -import org.apache.lucene.codecs.FieldsConsumer; |
| -import org.apache.lucene.codecs.PostingsConsumer; |
| -import org.apache.lucene.codecs.TermStats; |
| -import org.apache.lucene.codecs.TermsConsumer; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.SegmentWriteState; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -class PreFlexRWFieldsWriter extends FieldsConsumer { |
| - |
| - private final TermInfosWriter termsOut; |
| - private final IndexOutput freqOut; |
| - private final IndexOutput proxOut; |
| - private final PreFlexRWSkipListWriter skipListWriter; |
| - private final int totalNumDocs; |
| - |
| - public PreFlexRWFieldsWriter(SegmentWriteState state) throws IOException { |
| - termsOut = new TermInfosWriter(state.directory, |
| - state.segmentInfo.name, |
| - state.fieldInfos, |
| - state.termIndexInterval); |
| - |
| - boolean success = false; |
| - try { |
| - final String freqFile = IndexFileNames.segmentFileName(state.segmentInfo.name, "", Lucene3xPostingsFormat.FREQ_EXTENSION); |
| - freqOut = state.directory.createOutput(freqFile, state.context); |
| - totalNumDocs = state.segmentInfo.getDocCount(); |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(termsOut); |
| - } |
| - } |
| - |
| - success = false; |
| - try { |
| - if (state.fieldInfos.hasProx()) { |
| - final String proxFile = IndexFileNames.segmentFileName(state.segmentInfo.name, "", Lucene3xPostingsFormat.PROX_EXTENSION); |
| - proxOut = state.directory.createOutput(proxFile, state.context); |
| - } else { |
| - proxOut = null; |
| - } |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(termsOut, freqOut); |
| - } |
| - } |
| - |
| - skipListWriter = new PreFlexRWSkipListWriter(termsOut.skipInterval, |
| - termsOut.maxSkipLevels, |
| - totalNumDocs, |
| - freqOut, |
| - proxOut); |
| - //System.out.println("\nw start seg=" + segment); |
| - } |
| - |
| - @Override |
| - public TermsConsumer addField(FieldInfo field) throws IOException { |
| - assert field.number != -1; |
| - if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) { |
| - throw new UnsupportedOperationException("this codec cannot index offsets"); |
| - } |
| - //System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number); |
| - return new PreFlexTermsWriter(field); |
| - } |
| - |
| - @Override |
| - public void close() throws IOException { |
| - IOUtils.close(termsOut, freqOut, proxOut); |
| - } |
| - |
| - private class PreFlexTermsWriter extends TermsConsumer { |
| - private final FieldInfo fieldInfo; |
| - private final boolean omitTF; |
| - private final boolean storePayloads; |
| - |
| - private final TermInfo termInfo = new TermInfo(); |
| - private final PostingsWriter postingsWriter = new PostingsWriter(); |
| - |
| - public PreFlexTermsWriter(FieldInfo fieldInfo) { |
| - this.fieldInfo = fieldInfo; |
| - omitTF = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY; |
| - storePayloads = fieldInfo.hasPayloads(); |
| - } |
| - |
| - private class PostingsWriter extends PostingsConsumer { |
| - private int lastDocID; |
| - private int lastPayloadLength = -1; |
| - private int lastPosition; |
| - private int df; |
| - |
| - public PostingsWriter reset() { |
| - df = 0; |
| - lastDocID = 0; |
| - lastPayloadLength = -1; |
| - return this; |
| - } |
| - |
| - @Override |
| - public void startDoc(int docID, int termDocFreq) throws IOException { |
| - //System.out.println(" w doc=" + docID); |
| - |
| - final int delta = docID - lastDocID; |
| - if (docID < 0 || (df > 0 && delta <= 0)) { |
| - throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )"); |
| - } |
| - |
| - if ((++df % termsOut.skipInterval) == 0) { |
| - skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); |
| - skipListWriter.bufferSkip(df); |
| - } |
| - |
| - lastDocID = docID; |
| - |
| - assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs; |
| - |
| - if (omitTF) { |
| - freqOut.writeVInt(delta); |
| - } else { |
| - final int code = delta << 1; |
| - if (termDocFreq == 1) { |
| - freqOut.writeVInt(code|1); |
| - } else { |
| - freqOut.writeVInt(code); |
| - freqOut.writeVInt(termDocFreq); |
| - } |
| - } |
| - lastPosition = 0; |
| - } |
| - |
| - @Override |
| - public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException { |
| - assert proxOut != null; |
| - assert startOffset == -1; |
| - assert endOffset == -1; |
| - //System.out.println(" w pos=" + position + " payl=" + payload); |
| - final int delta = position - lastPosition; |
| - lastPosition = position; |
| - |
| - if (storePayloads) { |
| - final int payloadLength = payload == null ? 0 : payload.length; |
| - if (payloadLength != lastPayloadLength) { |
| - //System.out.println(" write payload len=" + payloadLength); |
| - lastPayloadLength = payloadLength; |
| - proxOut.writeVInt((delta<<1)|1); |
| - proxOut.writeVInt(payloadLength); |
| - } else { |
| - proxOut.writeVInt(delta << 1); |
| - } |
| - if (payloadLength > 0) { |
| - proxOut.writeBytes(payload.bytes, payload.offset, payload.length); |
| - } |
| - } else { |
| - proxOut.writeVInt(delta); |
| - } |
| - } |
| - |
| - @Override |
| - public void finishDoc() throws IOException { |
| - } |
| - } |
| - |
| - @Override |
| - public PostingsConsumer startTerm(BytesRef text) throws IOException { |
| - //System.out.println(" w term=" + text.utf8ToString()); |
| - skipListWriter.resetSkip(); |
| - termInfo.freqPointer = freqOut.getFilePointer(); |
| - if (proxOut != null) { |
| - termInfo.proxPointer = proxOut.getFilePointer(); |
| - } |
| - return postingsWriter.reset(); |
| - } |
| - |
| - @Override |
| - public void finishTerm(BytesRef text, TermStats stats) throws IOException { |
| - if (stats.docFreq > 0) { |
| - long skipPointer = skipListWriter.writeSkip(freqOut); |
| - termInfo.docFreq = stats.docFreq; |
| - termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer); |
| - //System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number); |
| - termsOut.add(fieldInfo.number, |
| - text, |
| - termInfo); |
| - } |
| - } |
| - |
| - @Override |
| - public void finish(long sumTotalTermCount, long sumDocFreq, int docCount) throws IOException { |
| - } |
| - |
| - @Override |
| - public Comparator<BytesRef> getComparator() throws IOException { |
| - return BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - } |
| - } |
| -} |
| \ No newline at end of file |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java (working copy) |
| @@ -1,281 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| - |
| -import java.io.Closeable; |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.CharsRef; |
| -import org.apache.lucene.util.IOUtils; |
| -import org.apache.lucene.util.UnicodeUtil; |
| - |
| - |
| -/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a |
| - Directory. A TermInfos can be written once, in order. */ |
| - |
| -final class TermInfosWriter implements Closeable { |
| - /** The file format version, a negative number. */ |
| - public static final int FORMAT = -3; |
| - |
| - // Changed strings to true utf8 with length-in-bytes not |
| - // length-in-chars |
| - public static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; |
| - |
| - // NOTE: always change this if you switch to a new format! |
| - public static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; |
| - |
| - private FieldInfos fieldInfos; |
| - private IndexOutput output; |
| - private TermInfo lastTi = new TermInfo(); |
| - private long size; |
| - |
| - // TODO: the default values for these two parameters should be settable from |
| - // IndexWriter. However, once that's done, folks will start setting them to |
| - // ridiculous values and complaining that things don't work well, as with |
| - // mergeFactor. So, let's wait until a number of folks find that alternate |
| - // values work better. Note that both of these values are stored in the |
| - // segment, so that it's safe to change these w/o rebuilding all indexes. |
| - |
| - /** Expert: The fraction of terms in the "dictionary" which should be stored |
| - * in RAM. Smaller values use more memory, but make searching slightly |
| - * faster, while larger values use less memory and make searching slightly |
| - * slower. Searching is typically not dominated by dictionary lookup, so |
| - * tweaking this is rarely useful.*/ |
| - int indexInterval = 128; |
| - |
| - /** Expert: The fraction of {@link TermDocs} entries stored in skip tables, |
| - * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in |
| - * smaller indexes, greater acceleration, but fewer accelerable cases, while |
| - * smaller values result in bigger indexes, less acceleration and more |
| - * accelerable cases. More detailed experiments would be useful here. */ |
| - int skipInterval = 16; |
| - |
| - /** Expert: The maximum number of skip levels. Smaller values result in |
| - * slightly smaller indexes, but slower skipping in big posting lists. |
| - */ |
| - int maxSkipLevels = 10; |
| - |
| - private long lastIndexPointer; |
| - private boolean isIndex; |
| - private final BytesRef lastTerm = new BytesRef(); |
| - private int lastFieldNumber = -1; |
| - |
| - private TermInfosWriter other; |
| - |
| - TermInfosWriter(Directory directory, String segment, FieldInfos fis, |
| - int interval) |
| - throws IOException { |
| - initialize(directory, segment, fis, interval, false); |
| - boolean success = false; |
| - try { |
| - other = new TermInfosWriter(directory, segment, fis, interval, true); |
| - other.other = this; |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(output); |
| - |
| - try { |
| - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", |
| - (isIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION |
| - : Lucene3xPostingsFormat.TERMS_EXTENSION))); |
| - } catch (IOException ignored) { |
| - } |
| - } |
| - } |
| - } |
| - |
| - private TermInfosWriter(Directory directory, String segment, FieldInfos fis, |
| - int interval, boolean isIndex) throws IOException { |
| - initialize(directory, segment, fis, interval, isIndex); |
| - } |
| - |
| - private void initialize(Directory directory, String segment, FieldInfos fis, |
| - int interval, boolean isi) throws IOException { |
| - indexInterval = interval; |
| - fieldInfos = fis; |
| - isIndex = isi; |
| - output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", |
| - (isIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION |
| - : Lucene3xPostingsFormat.TERMS_EXTENSION)), IOContext.DEFAULT); |
| - boolean success = false; |
| - try { |
| - output.writeInt(FORMAT_CURRENT); // write format |
| - output.writeLong(0); // leave space for size |
| - output.writeInt(indexInterval); // write indexInterval |
| - output.writeInt(skipInterval); // write skipInterval |
| - output.writeInt(maxSkipLevels); // write maxSkipLevels |
| - assert initUTF16Results(); |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(output); |
| - |
| - try { |
| - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", |
| - (isIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION |
| - : Lucene3xPostingsFormat.TERMS_EXTENSION))); |
| - } catch (IOException ignored) { |
| - } |
| - } |
| - } |
| - } |
| - |
| - // Currently used only by assert statements |
| - CharsRef utf16Result1; |
| - CharsRef utf16Result2; |
| - private final BytesRef scratchBytes = new BytesRef(); |
| - |
| - // Currently used only by assert statements |
| - private boolean initUTF16Results() { |
| - utf16Result1 = new CharsRef(10); |
| - utf16Result2 = new CharsRef(10); |
| - return true; |
| - } |
| - |
| - /** note: -1 is the empty field: "" !!!! */ |
| - static String fieldName(FieldInfos infos, int fieldNumber) { |
| - FieldInfo fi = infos.fieldInfo(fieldNumber); |
| - return (fi != null) ? fi.name : ""; |
| - } |
| - |
| - // Currently used only by assert statement |
| - private int compareToLastTerm(int fieldNumber, BytesRef term) { |
| - |
| - if (lastFieldNumber != fieldNumber) { |
| - final int cmp = fieldName(fieldInfos, lastFieldNumber).compareTo(fieldName(fieldInfos, fieldNumber)); |
| - // If there is a field named "" (empty string) then we |
| - // will get 0 on this comparison, yet, it's "OK". But |
| - // it's not OK if two different field numbers map to |
| - // the same name. |
| - if (cmp != 0 || lastFieldNumber != -1) |
| - return cmp; |
| - } |
| - |
| - scratchBytes.copyBytes(term); |
| - assert lastTerm.offset == 0; |
| - UnicodeUtil.UTF8toUTF16(lastTerm.bytes, 0, lastTerm.length, utf16Result1); |
| - |
| - assert scratchBytes.offset == 0; |
| - UnicodeUtil.UTF8toUTF16(scratchBytes.bytes, 0, scratchBytes.length, utf16Result2); |
| - |
| - final int len; |
| - if (utf16Result1.length < utf16Result2.length) |
| - len = utf16Result1.length; |
| - else |
| - len = utf16Result2.length; |
| - |
| - for(int i=0;i<len;i++) { |
| - final char ch1 = utf16Result1.chars[i]; |
| - final char ch2 = utf16Result2.chars[i]; |
| - if (ch1 != ch2) |
| - return ch1-ch2; |
| - } |
| - if (utf16Result1.length == 0 && lastFieldNumber == -1) { |
| - // If there is a field named "" (empty string) with a term text of "" (empty string) then we |
| - // will get 0 on this comparison, yet, it's "OK". |
| - return -1; |
| - } |
| - return utf16Result1.length - utf16Result2.length; |
| - } |
| - |
| - /** Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set. |
| - Term must be lexicographically greater than all previous Terms added. |
| - TermInfo pointers must be positive and greater than all previous.*/ |
| - public void add(int fieldNumber, BytesRef term, TermInfo ti) |
| - throws IOException { |
| - |
| - assert compareToLastTerm(fieldNumber, term) < 0 || |
| - (isIndex && term.length == 0 && lastTerm.length == 0) : |
| - "Terms are out of order: field=" + fieldName(fieldInfos, fieldNumber) + " (number " + fieldNumber + ")" + |
| - " lastField=" + fieldName(fieldInfos, lastFieldNumber) + " (number " + lastFieldNumber + ")" + |
| - " text=" + term.utf8ToString() + " lastText=" + lastTerm.utf8ToString(); |
| - |
| - assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"; |
| - assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"; |
| - |
| - if (!isIndex && size % indexInterval == 0) { |
| - other.add(lastFieldNumber, lastTerm, lastTi); // add an index term |
| - } |
| - writeTerm(fieldNumber, term); // write term |
| - |
| - output.writeVInt(ti.docFreq); // write doc freq |
| - output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers |
| - output.writeVLong(ti.proxPointer - lastTi.proxPointer); |
| - |
| - if (ti.docFreq >= skipInterval) { |
| - output.writeVInt(ti.skipOffset); |
| - } |
| - |
| - if (isIndex) { |
| - output.writeVLong(other.output.getFilePointer() - lastIndexPointer); |
| - lastIndexPointer = other.output.getFilePointer(); // write pointer |
| - } |
| - |
| - lastFieldNumber = fieldNumber; |
| - lastTi.set(ti); |
| - size++; |
| - } |
| - |
| - private void writeTerm(int fieldNumber, BytesRef term) |
| - throws IOException { |
| - |
| - //System.out.println(" tiw.write field=" + fieldNumber + " term=" + term.utf8ToString()); |
| - |
| - // TODO: UTF16toUTF8 could tell us this prefix |
| - // Compute prefix in common with last term: |
| - int start = 0; |
| - final int limit = term.length < lastTerm.length ? term.length : lastTerm.length; |
| - while(start < limit) { |
| - if (term.bytes[start+term.offset] != lastTerm.bytes[start+lastTerm.offset]) |
| - break; |
| - start++; |
| - } |
| - |
| - final int length = term.length - start; |
| - output.writeVInt(start); // write shared prefix length |
| - output.writeVInt(length); // write delta length |
| - output.writeBytes(term.bytes, start+term.offset, length); // write delta bytes |
| - output.writeVInt(fieldNumber); // write field num |
| - lastTerm.copyBytes(term); |
| - } |
| - |
| - /** Called to complete TermInfos creation. */ |
| - public void close() throws IOException { |
| - try { |
| - output.seek(4); // write size after format |
| - output.writeLong(size); |
| - } finally { |
| - try { |
| - output.close(); |
| - } finally { |
| - if (!isIndex) { |
| - other.close(); |
| - } |
| - } |
| - } |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosReader.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosReader.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosReader.java (working copy) |
| @@ -1,112 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -import java.io.IOException; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.FieldInfosReader; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.DocValues.Type; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexFormatTooNewException; |
| -import org.apache.lucene.index.IndexFormatTooOldException; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| - |
| -/** |
| - * @lucene.internal |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWFieldInfosReader extends FieldInfosReader { |
| - static final int FORMAT_MINIMUM = PreFlexRWFieldInfosWriter.FORMAT_START; |
| - |
| - @Override |
| - public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException { |
| - final String fileName = IndexFileNames.segmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); |
| - IndexInput input = directory.openInput(fileName, iocontext); |
| - |
| - try { |
| - final int format = input.readVInt(); |
| - |
| - if (format > FORMAT_MINIMUM) { |
| - throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); |
| - } |
| - if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) { |
| - throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); |
| - } |
| - |
| - final int size = input.readVInt(); //read in the size |
| - FieldInfo infos[] = new FieldInfo[size]; |
| - |
| - for (int i = 0; i < size; i++) { |
| - String name = input.readString(); |
| - final int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.readInt() : i; |
| - byte bits = input.readByte(); |
| - boolean isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; |
| - boolean storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; |
| - boolean omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; |
| - boolean storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; |
| - final IndexOptions indexOptions; |
| - if (!isIndexed) { |
| - indexOptions = null; |
| - } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { |
| - indexOptions = IndexOptions.DOCS_ONLY; |
| - } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) { |
| - if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) { |
| - indexOptions = IndexOptions.DOCS_AND_FREQS; |
| - } else { |
| - throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); |
| - } |
| - } else { |
| - indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| - } |
| - |
| - // LUCENE-3027: past indices were able to write |
| - // storePayloads=true when omitTFAP is also true, |
| - // which is invalid. We correct that, here: |
| - if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { |
| - storePayloads = false; |
| - } |
| - |
| - Type normType = isIndexed && !omitNorms ? Type.FIXED_INTS_8 : null; |
| - if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) { |
| - // RW can have norms but doesn't write them |
| - normType = input.readByte() != 0 ? Type.FIXED_INTS_8 : null; |
| - } |
| - |
| - infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, |
| - omitNorms, storePayloads, indexOptions, null, normType, null); |
| - } |
| - |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| - return new FieldInfos(infos); |
| - } finally { |
| - input.close(); |
| - } |
| - } |
| - |
| - public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException { |
| - files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION)); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWNormsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWNormsFormat.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWNormsFormat.java (working copy) |
| @@ -1,35 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.PerDocConsumer; |
| -import org.apache.lucene.index.PerDocWriteState; |
| - |
| -/** |
| - * @lucene.internal |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWNormsFormat extends Lucene3xNormsFormat { |
| - |
| - @Override |
| - public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { |
| - return new PreFlexRWNormsConsumer(state.directory, state.segmentInfo.name, state.context); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWNormsConsumer.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWNormsConsumer.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWNormsConsumer.java (working copy) |
| @@ -1,293 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Arrays; |
| - |
| -import org.apache.lucene.codecs.DocValuesConsumer; |
| -import org.apache.lucene.codecs.PerDocConsumer; |
| -import org.apache.lucene.index.DocValues; |
| -import org.apache.lucene.index.DocValues.Source; |
| -import org.apache.lucene.index.DocValues.Type; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexableField; |
| -import org.apache.lucene.index.MergeState; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.util.ArrayUtil; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -/** |
| - * Writes and Merges Lucene 3.x norms format |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWNormsConsumer extends PerDocConsumer { |
| - |
| - /** norms header placeholder */ |
| - private static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; |
| - |
| - /** Extension of norms file */ |
| - private static final String NORMS_EXTENSION = "nrm"; |
| - |
| - /** Extension of separate norms file |
| - * @deprecated */ |
| - @Deprecated |
| - private static final String SEPARATE_NORMS_EXTENSION = "s"; |
| - |
| - private final Directory directory; |
| - |
| - private final String segment; |
| - |
| - private final IOContext context; |
| - |
| - private NormsWriter writer; |
| - |
| - public PreFlexRWNormsConsumer(Directory directory, String segment, IOContext context){ |
| - this.directory = directory; |
| - this.segment = segment; |
| - this.context = context; |
| - } |
| - |
| - @Override |
| - public void merge(MergeState mergeState) throws IOException { |
| - getNormsWriter().merge(mergeState); |
| - } |
| - |
| - @Override |
| - public void close() throws IOException { |
| - if (writer != null) { |
| - writer.finish(); |
| - } |
| - } |
| - |
| - @Override |
| - protected boolean canMerge(FieldInfo info) { |
| - return info.hasNorms(); |
| - } |
| - |
| - @Override |
| - protected Type getDocValuesType(FieldInfo info) { |
| - return info.getNormType(); |
| - } |
| - |
| - @Override |
| - public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo) |
| - throws IOException { |
| - if (type != Type.FIXED_INTS_8) { |
| - throw new UnsupportedOperationException("Codec only supports single byte norm values. Type give: " + type); |
| - } |
| - return new Lucene3xNormsDocValuesConsumer(fieldInfo); |
| - } |
| - |
| - class Lucene3xNormsDocValuesConsumer extends DocValuesConsumer { |
| - // Holds all docID/norm pairs we've seen |
| - private int[] docIDs = new int[1]; |
| - private byte[] norms = new byte[1]; |
| - private int upto; |
| - private final FieldInfo fi; |
| - |
| - Lucene3xNormsDocValuesConsumer(FieldInfo fieldInfo) { |
| - fi = fieldInfo; |
| - } |
| - |
| - @Override |
| - public void finish(int docCount) throws IOException { |
| - final NormsWriter normsWriter = getNormsWriter(); |
| - boolean success = false; |
| - try { |
| - int uptoDoc = 0; |
| - normsWriter.setNumTotalDocs(docCount); |
| - if (upto > 0) { |
| - normsWriter.startField(fi); |
| - int docID = 0; |
| - for (; docID < docCount; docID++) { |
| - if (uptoDoc < upto && docIDs[uptoDoc] == docID) { |
| - normsWriter.writeNorm(norms[uptoDoc]); |
| - uptoDoc++; |
| - } else { |
| - normsWriter.writeNorm((byte) 0); |
| - } |
| - } |
| - // we should have consumed every norm |
| - assert uptoDoc == upto; |
| - |
| - } else { |
| - // Fill entire field with default norm: |
| - normsWriter.startField(fi); |
| - for (; upto < docCount; upto++) |
| - normsWriter.writeNorm((byte) 0); |
| - } |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - normsWriter.abort(); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public void add(int docID, IndexableField docValue) throws IOException { |
| - add(docID, docValue.numericValue().longValue()); |
| - } |
| - |
| - protected void add(int docID, long value) { |
| - if (docIDs.length <= upto) { |
| - assert docIDs.length == upto; |
| - docIDs = ArrayUtil.grow(docIDs, 1 + upto); |
| - } |
| - if (norms.length <= upto) { |
| - assert norms.length == upto; |
| - norms = ArrayUtil.grow(norms, 1 + upto); |
| - } |
| - norms[upto] = (byte) value; |
| - |
| - docIDs[upto] = docID; |
| - upto++; |
| - } |
| - |
| - @Override |
| - protected Type getType() { |
| - return Type.FIXED_INTS_8; |
| - } |
| - |
| - |
| - } |
| - |
| - public NormsWriter getNormsWriter() throws IOException { |
| - if (writer == null) { |
| - writer = new NormsWriter(directory, segment, context); |
| - } |
| - return writer; |
| - } |
| - |
| - private static class NormsWriter { |
| - |
| - private final IndexOutput output; |
| - private int normCount = 0; |
| - private int numTotalDocs = 0; |
| - |
| - public NormsWriter(Directory directory, String segment, IOContext context) throws IOException { |
| - final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION); |
| - boolean success = false; |
| - IndexOutput out = null; |
| - try { |
| - out = directory.createOutput(normsFileName, context); |
| - output = out; |
| - output.writeBytes(NORMS_HEADER, 0, NORMS_HEADER.length); |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(out); |
| - } |
| - } |
| - |
| - } |
| - |
| - |
| - public void setNumTotalDocs(int numTotalDocs) { |
| - assert this.numTotalDocs == 0 || numTotalDocs == this.numTotalDocs; |
| - this.numTotalDocs = numTotalDocs; |
| - } |
| - |
| - public void startField(FieldInfo info) throws IOException { |
| - assert info.omitsNorms() == false; |
| - normCount++; |
| - } |
| - |
| - public void writeNorm(byte norm) throws IOException { |
| - output.writeByte(norm); |
| - } |
| - |
| - public void abort() throws IOException { |
| - IOUtils.close(output); |
| - } |
| - |
| - public void finish() throws IOException { |
| - IOUtils.close(output); |
| - |
| - if (4+normCount*(long)numTotalDocs != output.getFilePointer()) { |
| - throw new IOException(".nrm file size mismatch: expected=" + (4+normCount*(long)numTotalDocs) + " actual=" + output.getFilePointer()); |
| - } |
| - } |
| - // TODO: we can actually use the defaul DV merge here and drop this specific stuff entirely |
| - /** we override merge and bulk-merge norms when there are no deletions */ |
| - public void merge(MergeState mergeState) throws IOException { |
| - int numMergedDocs = 0; |
| - for (FieldInfo fi : mergeState.fieldInfos) { |
| - if (fi.hasNorms()) { |
| - startField(fi); |
| - int numMergedDocsForField = 0; |
| - for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { |
| - final int maxDoc = reader.reader.maxDoc(); |
| - byte[] normBuffer; |
| - DocValues normValues = reader.reader.normValues(fi.name); |
| - if (normValues == null) { |
| - // Can be null if this segment doesn't have |
| - // any docs with this field |
| - normBuffer = new byte[maxDoc]; |
| - Arrays.fill(normBuffer, (byte)0); |
| - } else { |
| - Source directSource = normValues.getDirectSource(); |
| - assert directSource.hasArray(); |
| - normBuffer = (byte[]) directSource.getArray(); |
| - } |
| - if (reader.liveDocs == null) { |
| - //optimized case for segments without deleted docs |
| - output.writeBytes(normBuffer, maxDoc); |
| - numMergedDocsForField += maxDoc; |
| - } else { |
| - // this segment has deleted docs, so we have to |
| - // check for every doc if it is deleted or not |
| - final Bits liveDocs = reader.liveDocs; |
| - for (int k = 0; k < maxDoc; k++) { |
| - if (liveDocs.get(k)) { |
| - numMergedDocsForField++; |
| - output.writeByte(normBuffer[k]); |
| - } |
| - } |
| - } |
| - mergeState.checkAbort.work(maxDoc); |
| - } |
| - assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField; |
| - numMergedDocs = numMergedDocsForField; |
| - } |
| - } |
| - this.numTotalDocs = numMergedDocs; |
| - } |
| - } |
| - |
| - @Override |
| - public void abort() { |
| - try { |
| - try { |
| - if (writer != null) { |
| - writer.abort(); |
| - } |
| - } finally { |
| - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", |
| - NORMS_EXTENSION)); |
| - } |
| - } catch (IOException e) { |
| - // ignore |
| - } |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsWriter.java (working copy) |
| @@ -1,221 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Comparator; |
| - |
| -import org.apache.lucene.codecs.TermVectorsWriter; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.store.DataInput; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.util.ArrayUtil; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| -import org.apache.lucene.util.StringHelper; |
| - |
| -final class PreFlexRWTermVectorsWriter extends TermVectorsWriter { |
| - private final Directory directory; |
| - private final String segment; |
| - private IndexOutput tvx = null, tvd = null, tvf = null; |
| - |
| - public PreFlexRWTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException { |
| - this.directory = directory; |
| - this.segment = segment; |
| - boolean success = false; |
| - try { |
| - // Open files for TermVector storage |
| - tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), context); |
| - tvx.writeInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); |
| - tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context); |
| - tvd.writeInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); |
| - tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION), context); |
| - tvf.writeInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - abort(); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public void startDocument(int numVectorFields) throws IOException { |
| - lastFieldName = null; |
| - this.numVectorFields = numVectorFields; |
| - tvx.writeLong(tvd.getFilePointer()); |
| - tvx.writeLong(tvf.getFilePointer()); |
| - tvd.writeVInt(numVectorFields); |
| - fieldCount = 0; |
| - fps = ArrayUtil.grow(fps, numVectorFields); |
| - } |
| - |
| - private long fps[] = new long[10]; // pointers to the tvf before writing each field |
| - private int fieldCount = 0; // number of fields we have written so far for this document |
| - private int numVectorFields = 0; // total number of fields we will write for this document |
| - private String lastFieldName; |
| - |
| - @Override |
| - public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException { |
| - assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName; |
| - lastFieldName = info.name; |
| - this.positions = positions; |
| - this.offsets = offsets; |
| - lastTerm.length = 0; |
| - fps[fieldCount++] = tvf.getFilePointer(); |
| - tvd.writeVInt(info.number); |
| - tvf.writeVInt(numTerms); |
| - byte bits = 0x0; |
| - if (positions) |
| - bits |= Lucene3xTermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; |
| - if (offsets) |
| - bits |= Lucene3xTermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; |
| - tvf.writeByte(bits); |
| - |
| - assert fieldCount <= numVectorFields; |
| - if (fieldCount == numVectorFields) { |
| - // last field of the document |
| - // this is crazy because the file format is crazy! |
| - for (int i = 1; i < fieldCount; i++) { |
| - tvd.writeVLong(fps[i] - fps[i-1]); |
| - } |
| - } |
| - } |
| - |
| - private final BytesRef lastTerm = new BytesRef(10); |
| - |
| - // NOTE: we override addProx, so we don't need to buffer when indexing. |
| - // we also don't buffer during bulk merges. |
| - private int offsetStartBuffer[] = new int[10]; |
| - private int offsetEndBuffer[] = new int[10]; |
| - private int offsetIndex = 0; |
| - private int offsetFreq = 0; |
| - private boolean positions = false; |
| - private boolean offsets = false; |
| - |
| - @Override |
| - public void startTerm(BytesRef term, int freq) throws IOException { |
| - final int prefix = StringHelper.bytesDifference(lastTerm, term); |
| - final int suffix = term.length - prefix; |
| - tvf.writeVInt(prefix); |
| - tvf.writeVInt(suffix); |
| - tvf.writeBytes(term.bytes, term.offset + prefix, suffix); |
| - tvf.writeVInt(freq); |
| - lastTerm.copyBytes(term); |
| - lastPosition = lastOffset = 0; |
| - |
| - if (offsets && positions) { |
| - // we might need to buffer if its a non-bulk merge |
| - offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq); |
| - offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq); |
| - offsetIndex = 0; |
| - offsetFreq = freq; |
| - } |
| - } |
| - |
| - int lastPosition = 0; |
| - int lastOffset = 0; |
| - |
| - @Override |
| - public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException { |
| - // TODO: technically we could just copy bytes and not re-encode if we knew the length... |
| - if (positions != null) { |
| - for (int i = 0; i < numProx; i++) { |
| - tvf.writeVInt(positions.readVInt()); |
| - } |
| - } |
| - |
| - if (offsets != null) { |
| - for (int i = 0; i < numProx; i++) { |
| - tvf.writeVInt(offsets.readVInt()); |
| - tvf.writeVInt(offsets.readVInt()); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public void addPosition(int position, int startOffset, int endOffset) throws IOException { |
| - if (positions && offsets) { |
| - // write position delta |
| - tvf.writeVInt(position - lastPosition); |
| - lastPosition = position; |
| - |
| - // buffer offsets |
| - offsetStartBuffer[offsetIndex] = startOffset; |
| - offsetEndBuffer[offsetIndex] = endOffset; |
| - offsetIndex++; |
| - |
| - // dump buffer if we are done |
| - if (offsetIndex == offsetFreq) { |
| - for (int i = 0; i < offsetIndex; i++) { |
| - tvf.writeVInt(offsetStartBuffer[i] - lastOffset); |
| - tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]); |
| - lastOffset = offsetEndBuffer[i]; |
| - } |
| - } |
| - } else if (positions) { |
| - // write position delta |
| - tvf.writeVInt(position - lastPosition); |
| - lastPosition = position; |
| - } else if (offsets) { |
| - // write offset deltas |
| - tvf.writeVInt(startOffset - lastOffset); |
| - tvf.writeVInt(endOffset - startOffset); |
| - lastOffset = endOffset; |
| - } |
| - } |
| - |
| - @Override |
| - public void abort() { |
| - try { |
| - close(); |
| - } catch (IOException ignored) {} |
| - IOUtils.deleteFilesIgnoringExceptions(directory, IndexFileNames.segmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), |
| - IndexFileNames.segmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), |
| - IndexFileNames.segmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); |
| - } |
| - |
| - @Override |
| - public void finish(FieldInfos fis, int numDocs) throws IOException { |
| - if (4+((long) numDocs)*16 != tvx.getFilePointer()) |
| - // This is most likely a bug in Sun JRE 1.6.0_04/_05; |
| - // we detect that the bug has struck, here, and |
| - // throw an exception to prevent the corruption from |
| - // entering the index. See LUCENE-1282 for |
| - // details. |
| - throw new RuntimeException("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.getFilePointer() + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption"); |
| - } |
| - |
| - /** Close all streams. */ |
| - @Override |
| - public void close() throws IOException { |
| - // make an effort to close all streams we can but remember and re-throw |
| - // the first exception encountered in this process |
| - IOUtils.close(tvx, tvd, tvf); |
| - tvx = tvd = tvf = null; |
| - } |
| - |
| - @Override |
| - public Comparator<BytesRef> getComparator() throws IOException { |
| - return BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSegmentInfoWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSegmentInfoWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSegmentInfoWriter.java (working copy) |
| @@ -1,45 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.SegmentInfoWriter; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.SegmentInfos; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| - |
| -/** |
| - * PreFlex implementation of {@link SegmentInfoWriter}. |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWSegmentInfoWriter extends SegmentInfoWriter { |
| - |
| - // NOTE: this is not "really" 3.x format, because we are |
| - // writing each SI to its own file, vs 3.x where the list |
| - // of segments and SI for each segment is written into a |
| - // single segments_N file |
| - |
| - /** Save a single segment's info. */ |
| - @Override |
| - public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { |
| - SegmentInfos.write3xInfo(dir, si, ioContext); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWCodec.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWCodec.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWCodec.java (working copy) |
| @@ -1,106 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.codecs.FieldInfosFormat; |
| -import org.apache.lucene.codecs.LiveDocsFormat; |
| -import org.apache.lucene.codecs.NormsFormat; |
| -import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.SegmentInfoFormat; |
| -import org.apache.lucene.codecs.StoredFieldsFormat; |
| -import org.apache.lucene.codecs.TermVectorsFormat; |
| -import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -/** |
| - * Writes 3.x-like indexes (not perfect emulation yet) for testing only! |
| - * @lucene.experimental |
| - */ |
| -public class PreFlexRWCodec extends Lucene3xCodec { |
| - private final PostingsFormat postings = new PreFlexRWPostingsFormat(); |
| - private final Lucene3xNormsFormat norms = new PreFlexRWNormsFormat(); |
| - private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat(); |
| - private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat(); |
| - private final SegmentInfoFormat segmentInfos = new PreFlexRWSegmentInfoFormat(); |
| - private final StoredFieldsFormat storedFields = new PreFlexRWStoredFieldsFormat(); |
| - // TODO: this should really be a different impl |
| - private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat(); |
| - |
| - @Override |
| - public PostingsFormat postingsFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return postings; |
| - } else { |
| - return super.postingsFormat(); |
| - } |
| - } |
| - |
| - @Override |
| - public NormsFormat normsFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return norms; |
| - } else { |
| - return super.normsFormat(); |
| - } |
| - } |
| - |
| - @Override |
| - public SegmentInfoFormat segmentInfoFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return segmentInfos ; |
| - } else { |
| - return super.segmentInfoFormat(); |
| - } |
| - } |
| - |
| - @Override |
| - public FieldInfosFormat fieldInfosFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return fieldInfos; |
| - } else { |
| - return super.fieldInfosFormat(); |
| - } |
| - } |
| - |
| - @Override |
| - public TermVectorsFormat termVectorsFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return termVectors; |
| - } else { |
| - return super.termVectorsFormat(); |
| - } |
| - } |
| - |
| - @Override |
| - public LiveDocsFormat liveDocsFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return liveDocs; |
| - } else { |
| - return super.liveDocsFormat(); |
| - } |
| - } |
| - |
| - @Override |
| - public StoredFieldsFormat storedFieldsFormat() { |
| - if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { |
| - return storedFields; |
| - } else { |
| - return super.storedFieldsFormat(); |
| - } |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosWriter.java (working copy) |
| @@ -1,98 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.FieldInfosWriter; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| - |
| -/** |
| - * @lucene.internal |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWFieldInfosWriter extends FieldInfosWriter { |
| - // TODO move to test-framework preflex RW? |
| - |
| - /** Extension of field infos */ |
| - static final String FIELD_INFOS_EXTENSION = "fnm"; |
| - |
| - // First used in 2.9; prior to 2.9 there was no format header |
| - static final int FORMAT_START = -2; |
| - // First used in 3.4: omit only positional information |
| - static final int FORMAT_OMIT_POSITIONS = -3; |
| - |
| - static final int FORMAT_PREFLEX_RW = Integer.MIN_VALUE; |
| - |
| - // whenever you add a new format, make it 1 smaller (negative version logic)! |
| - static final int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS; |
| - |
| - static final byte IS_INDEXED = 0x1; |
| - static final byte STORE_TERMVECTOR = 0x2; |
| - static final byte OMIT_NORMS = 0x10; |
| - static final byte STORE_PAYLOADS = 0x20; |
| - static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; |
| - static final byte OMIT_POSITIONS = -128; |
| - |
| - @Override |
| - public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException { |
| - final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); |
| - IndexOutput output = directory.createOutput(fileName, context); |
| - try { |
| - output.writeVInt(FORMAT_PREFLEX_RW); |
| - output.writeVInt(infos.size()); |
| - for (FieldInfo fi : infos) { |
| - byte bits = 0x0; |
| - if (fi.hasVectors()) bits |= STORE_TERMVECTOR; |
| - if (fi.omitsNorms()) bits |= OMIT_NORMS; |
| - if (fi.hasPayloads()) bits |= STORE_PAYLOADS; |
| - if (fi.isIndexed()) { |
| - bits |= IS_INDEXED; |
| - assert fi.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.hasPayloads(); |
| - if (fi.getIndexOptions() == IndexOptions.DOCS_ONLY) { |
| - bits |= OMIT_TERM_FREQ_AND_POSITIONS; |
| - } else if (fi.getIndexOptions() == IndexOptions.DOCS_AND_FREQS) { |
| - bits |= OMIT_POSITIONS; |
| - } |
| - } |
| - output.writeString(fi.name); |
| - /* |
| - * we need to write the field number since IW tries |
| - * to stabelize the field numbers across segments so the |
| - * FI ordinal is not necessarily equivalent to the field number |
| - */ |
| - output.writeInt(fi.number); |
| - output.writeByte(bits); |
| - if (fi.isIndexed() && !fi.omitsNorms()) { |
| - // to allow null norm types we need to indicate if norms are written |
| - // only in RW case |
| - output.writeByte((byte) (fi.getNormType() == null ? 0 : 1)); |
| - } |
| - assert fi.attributes() == null; // not used or supported |
| - } |
| - } finally { |
| - output.close(); |
| - } |
| - } |
| - |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/package.html |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/package.html (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/package.html (working copy) |
| @@ -1,30 +0,0 @@ |
| -<!doctype html public "-//w3c//dtd html 4.0 transitional//en"> |
| -<!-- |
| - Licensed to the Apache Software Foundation (ASF) under one or more |
| - contributor license agreements. See the NOTICE file distributed with |
| - this work for additional information regarding copyright ownership. |
| - The ASF licenses this file to You under the Apache License, Version 2.0 |
| - (the "License"); you may not use this file except in compliance with |
| - the License. You may obtain a copy of the License at |
| - |
| - http://www.apache.org/licenses/LICENSE-2.0 |
| - |
| - Unless required by applicable law or agreed to in writing, software |
| - distributed under the License is distributed on an "AS IS" BASIS, |
| - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - See the License for the specific language governing permissions and |
| - limitations under the License. |
| ---> |
| -<html> |
| -<head> |
| - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> |
| -</head> |
| -<body> |
| -Support for generating test indexes in the Lucene 3.x index format. |
| -<p> |
| -NOTE: This is not a perfect simulation of the 3.x format, but its close. |
| -Particularly, indexes generated with this codec cannot actually be read |
| -with Lucene 3.x |
| -</p> |
| -</body> |
| -</html> |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWPostingsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWPostingsFormat.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWPostingsFormat.java (working copy) |
| @@ -1,74 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.FieldsConsumer; |
| -import org.apache.lucene.codecs.FieldsProducer; |
| -import org.apache.lucene.index.SegmentWriteState; |
| -import org.apache.lucene.index.SegmentReadState; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -/** Codec, only for testing, that can write and read the |
| - * pre-flex index format. |
| - * |
| - * @lucene.experimental |
| - */ |
| -class PreFlexRWPostingsFormat extends Lucene3xPostingsFormat { |
| - |
| - public PreFlexRWPostingsFormat() { |
| - // NOTE: we impersonate the PreFlex codec so that it can |
| - // read the segments we write! |
| - } |
| - |
| - @Override |
| - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| - return new PreFlexRWFieldsWriter(state); |
| - } |
| - |
| - @Override |
| - public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| - |
| - // Whenever IW opens readers, eg for merging, we have to |
| - // keep terms order in UTF16: |
| - |
| - return new Lucene3xFields(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.termsIndexDivisor) { |
| - @Override |
| - protected boolean sortTermsByUnicode() { |
| - // We carefully peek into stack track above us: if |
| - // we are part of a "merge", we must sort by UTF16: |
| - boolean unicodeSortOrder = true; |
| - |
| - StackTraceElement[] trace = new Exception().getStackTrace(); |
| - for (int i = 0; i < trace.length; i++) { |
| - //System.out.println(trace[i].getClassName()); |
| - if ("merge".equals(trace[i].getMethodName())) { |
| - unicodeSortOrder = false; |
| - if (LuceneTestCase.VERBOSE) { |
| - System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order"); |
| - } |
| - break; |
| - } |
| - } |
| - |
| - return unicodeSortOrder; |
| - } |
| - }; |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWStoredFieldsWriter.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWStoredFieldsWriter.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWStoredFieldsWriter.java (working copy) |
| @@ -1,156 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Copyright 2004 The Apache Software Foundation |
| - * |
| - * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| - * use this file except in compliance with the License. You may obtain a copy of |
| - * the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| - * License for the specific language governing permissions and limitations under |
| - * the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.StoredFieldsWriter; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexableField; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -/** @lucene.experimental */ |
| -final class PreFlexRWStoredFieldsWriter extends StoredFieldsWriter { |
| - private final Directory directory; |
| - private final String segment; |
| - private IndexOutput fieldsStream; |
| - private IndexOutput indexStream; |
| - |
| - public PreFlexRWStoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException { |
| - assert directory != null; |
| - this.directory = directory; |
| - this.segment = segment; |
| - |
| - boolean success = false; |
| - try { |
| - fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); |
| - indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); |
| - |
| - fieldsStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); |
| - indexStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); |
| - |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - abort(); |
| - } |
| - } |
| - } |
| - |
| - // Writes the contents of buffer into the fields stream |
| - // and adds a new entry for this document into the index |
| - // stream. This assumes the buffer was already written |
| - // in the correct fields format. |
| - public void startDocument(int numStoredFields) throws IOException { |
| - indexStream.writeLong(fieldsStream.getFilePointer()); |
| - fieldsStream.writeVInt(numStoredFields); |
| - } |
| - |
| - public void close() throws IOException { |
| - try { |
| - IOUtils.close(fieldsStream, indexStream); |
| - } finally { |
| - fieldsStream = indexStream = null; |
| - } |
| - } |
| - |
| - public void abort() { |
| - try { |
| - close(); |
| - } catch (IOException ignored) {} |
| - IOUtils.deleteFilesIgnoringExceptions(directory, |
| - IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), |
| - IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); |
| - } |
| - |
| - public void writeField(FieldInfo info, IndexableField field) throws IOException { |
| - fieldsStream.writeVInt(info.number); |
| - int bits = 0; |
| - final BytesRef bytes; |
| - final String string; |
| - // TODO: maybe a field should serialize itself? |
| - // this way we don't bake into indexer all these |
| - // specific encodings for different fields? and apps |
| - // can customize... |
| - |
| - Number number = field.numericValue(); |
| - if (number != null) { |
| - if (number instanceof Byte || number instanceof Short || number instanceof Integer) { |
| - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT; |
| - } else if (number instanceof Long) { |
| - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG; |
| - } else if (number instanceof Float) { |
| - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT; |
| - } else if (number instanceof Double) { |
| - bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE; |
| - } else { |
| - throw new IllegalArgumentException("cannot store numeric type " + number.getClass()); |
| - } |
| - string = null; |
| - bytes = null; |
| - } else { |
| - bytes = field.binaryValue(); |
| - if (bytes != null) { |
| - bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY; |
| - string = null; |
| - } else { |
| - string = field.stringValue(); |
| - if (string == null) { |
| - throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); |
| - } |
| - } |
| - } |
| - |
| - fieldsStream.writeByte((byte) bits); |
| - |
| - if (bytes != null) { |
| - fieldsStream.writeVInt(bytes.length); |
| - fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length); |
| - } else if (string != null) { |
| - fieldsStream.writeString(field.stringValue()); |
| - } else { |
| - if (number instanceof Byte || number instanceof Short || number instanceof Integer) { |
| - fieldsStream.writeInt(number.intValue()); |
| - } else if (number instanceof Long) { |
| - fieldsStream.writeLong(number.longValue()); |
| - } else if (number instanceof Float) { |
| - fieldsStream.writeInt(Float.floatToIntBits(number.floatValue())); |
| - } else if (number instanceof Double) { |
| - fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue())); |
| - } else { |
| - assert false; |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public void finish(FieldInfos fis, int numDocs) throws IOException { |
| - if (4+((long) numDocs)*8 != indexStream.getFilePointer()) |
| - // This is most likely a bug in Sun JRE 1.6.0_04/_05; |
| - // we detect that the bug has struck, here, and |
| - // throw an exception to prevent the corruption from |
| - // entering the index. See LUCENE-1282 for |
| - // details. |
| - throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption"); |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java (working copy) |
| @@ -1,62 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.TermVectorsReader; |
| -import org.apache.lucene.codecs.TermVectorsWriter; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -class PreFlexRWTermVectorsFormat extends Lucene3xTermVectorsFormat { |
| - |
| - @Override |
| - public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException { |
| - return new PreFlexRWTermVectorsWriter(directory, segmentInfo.name, context); |
| - } |
| - |
| - @Override |
| - public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { |
| - return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context) { |
| - @Override |
| - protected boolean sortTermsByUnicode() { |
| - // We carefully peek into stack track above us: if |
| - // we are part of a "merge", we must sort by UTF16: |
| - boolean unicodeSortOrder = true; |
| - |
| - StackTraceElement[] trace = new Exception().getStackTrace(); |
| - for (int i = 0; i < trace.length; i++) { |
| - //System.out.println(trace[i].getClassName()); |
| - if ("merge".equals(trace[i].getMethodName())) { |
| - unicodeSortOrder = false; |
| - if (LuceneTestCase.VERBOSE) { |
| - System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 vector term sort order"); |
| - } |
| - break; |
| - } |
| - } |
| - |
| - return unicodeSortOrder; |
| - } |
| - }; |
| - } |
| -} |
| Index: lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java (revision 1344053) |
| +++ lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java (working copy) |
| @@ -518,7 +518,7 @@ |
| @Override |
| public void run() { |
| try { |
| - final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random(), true); |
| int numDocs = 0; |
| while (System.nanoTime() < endTimeNanos) { |
| final int what = random().nextInt(3); |
| Index: lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec |
| =================================================================== |
| --- lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (revision 1344053) |
| +++ lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (working copy) |
| @@ -13,4 +13,3 @@ |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| -org.apache.lucene.codecs.lucene3x.PreFlexRWCodec |
| Index: lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java (revision 1344053) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java (working copy) |
| @@ -311,13 +311,8 @@ |
| } |
| |
| public void testSumTotalTermFreq() throws Exception { |
| - if (Codec.getDefault().getName().equals("Lucene3x")) { |
| - assertHits(new FunctionQuery(new SumTotalTermFreqValueSource("text")), |
| - new float[] { -1f, -1f }); |
| - } else { |
| - assertHits(new FunctionQuery(new SumTotalTermFreqValueSource("text")), |
| + assertHits(new FunctionQuery(new SumTotalTermFreqValueSource("text")), |
| new float[] { 8f, 8f }); |
| - } |
| } |
| |
| public void testTermFreq() throws Exception { |
| @@ -346,15 +341,9 @@ |
| } |
| |
| public void testTotalTermFreq() throws Exception { |
| - if (Codec.getDefault().getName().equals("Lucene3x")) { |
| - assertHits(new FunctionQuery( |
| - new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), |
| - new float[] { -1f, -1f }); |
| - } else { |
| - assertHits(new FunctionQuery( |
| - new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), |
| - new float[] { 4f, 4f }); |
| - } |
| + assertHits(new FunctionQuery( |
| + new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), |
| + new float[] { 4f, 4f }); |
| } |
| |
| void assertHits(Query q, float scores[]) throws Exception { |
| Index: lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (working copy) |
| @@ -1097,7 +1097,7 @@ |
| Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())); |
| } |
| |
| - final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random(), true); |
| final int RUN_TIME_MSEC = atLeast(500); |
| final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64); |
| final File tempDir = _TestUtil.getTempDir("fstlines"); |
| Index: lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java (working copy) |
| @@ -48,8 +48,7 @@ |
| NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); |
| IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf); |
| - final LineFileDocs docs = new LineFileDocs(random(), |
| - defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random(), true); |
| final int numDocs = _TestUtil.nextInt(random(), 100, 400); |
| |
| if (VERBOSE) { |
| Index: lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestImpersonation.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestImpersonation.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestImpersonation.java (working copy) |
| @@ -1,34 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.codecs.lucene3x.PreFlexRWCodec; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -/** |
| - * Test that the SPI magic is returning "PreFlexRWCodec" for Lucene3x |
| - * |
| - * @lucene.experimental |
| - */ |
| -public class TestImpersonation extends LuceneTestCase { |
| - public void test() throws Exception { |
| - Codec codec = Codec.forName("Lucene3x"); |
| - assertTrue(codec instanceof PreFlexRWCodec); |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestSurrogates.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestSurrogates.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestSurrogates.java (working copy) |
| @@ -1,356 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.store.*; |
| -import org.apache.lucene.codecs.lucene3x.PreFlexRWCodec; |
| -import org.apache.lucene.document.*; |
| -import org.apache.lucene.analysis.*; |
| -import org.apache.lucene.index.*; |
| -import org.apache.lucene.util.*; |
| - |
| -import java.util.*; |
| -import java.io.IOException; |
| - |
| -import org.junit.BeforeClass; |
| -import org.junit.Test; |
| - |
| -public class TestSurrogates extends LuceneTestCase { |
| - /** we will manually instantiate preflex-rw here */ |
| - @BeforeClass |
| - public static void beforeClass() { |
| - LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; |
| - } |
| - |
| - private static String makeDifficultRandomUnicodeString(Random r) { |
| - final int end = r.nextInt(20); |
| - if (end == 0) { |
| - // allow 0 length |
| - return ""; |
| - } |
| - final char[] buffer = new char[end]; |
| - for (int i = 0; i < end; i++) { |
| - int t = r.nextInt(5); |
| - |
| - if (0 == t && i < end - 1) { |
| - // hi |
| - buffer[i++] = (char) (0xd800 + r.nextInt(2)); |
| - // lo |
| - buffer[i] = (char) (0xdc00 + r.nextInt(2)); |
| - } else if (t <= 3) { |
| - buffer[i] = (char) ('a' + r.nextInt(2)); |
| - } else if (4 == t) { |
| - buffer[i] = (char) (0xe000 + r.nextInt(2)); |
| - } |
| - } |
| - |
| - return new String(buffer, 0, end); |
| - } |
| - |
| - private String toHexString(Term t) { |
| - return t.field() + ":" + UnicodeUtil.toHexString(t.text()); |
| - } |
| - |
| - private String getRandomString(Random r) { |
| - String s; |
| - if (r.nextInt(5) == 1) { |
| - if (r.nextInt(3) == 1) { |
| - s = makeDifficultRandomUnicodeString(r); |
| - } else { |
| - s = _TestUtil.randomUnicodeString(r); |
| - } |
| - } else { |
| - s = _TestUtil.randomRealisticUnicodeString(r); |
| - } |
| - return s; |
| - } |
| - |
| - private static class SortTermAsUTF16Comparator implements Comparator<Term> { |
| - private static final Comparator<BytesRef> legacyComparator = |
| - BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - |
| - public int compare(Term term1, Term term2) { |
| - if (term1.field().equals(term2.field())) { |
| - return legacyComparator.compare(term1.bytes(), term2.bytes()); |
| - } else { |
| - return term1.field().compareTo(term2.field()); |
| - } |
| - } |
| - } |
| - |
| - private static final SortTermAsUTF16Comparator termAsUTF16Comparator = new SortTermAsUTF16Comparator(); |
| - |
| - // single straight enum |
| - private void doTestStraightEnum(List<Term> fieldTerms, IndexReader reader, int uniqueTermCount) throws IOException { |
| - |
| - if (VERBOSE) { |
| - System.out.println("\nTEST: top now enum reader=" + reader); |
| - } |
| - FieldsEnum fieldsEnum = MultiFields.getFields(reader).iterator(); |
| - |
| - { |
| - // Test straight enum: |
| - String field; |
| - int termCount = 0; |
| - while((field = fieldsEnum.next()) != null) { |
| - Terms terms = fieldsEnum.terms(); |
| - assertNotNull(terms); |
| - TermsEnum termsEnum = terms.iterator(null); |
| - BytesRef text; |
| - BytesRef lastText = null; |
| - while((text = termsEnum.next()) != null) { |
| - Term exp = fieldTerms.get(termCount); |
| - if (VERBOSE) { |
| - System.out.println(" got term=" + field + ":" + UnicodeUtil.toHexString(text.utf8ToString())); |
| - System.out.println(" exp=" + exp.field() + ":" + UnicodeUtil.toHexString(exp.text().toString())); |
| - System.out.println(); |
| - } |
| - if (lastText == null) { |
| - lastText = BytesRef.deepCopyOf(text); |
| - } else { |
| - assertTrue(lastText.compareTo(text) < 0); |
| - lastText.copyBytes(text); |
| - } |
| - assertEquals(exp.field(), field); |
| - assertEquals(exp.bytes(), text); |
| - termCount++; |
| - } |
| - if (VERBOSE) { |
| - System.out.println(" no more terms for field=" + field); |
| - } |
| - } |
| - assertEquals(uniqueTermCount, termCount); |
| - } |
| - } |
| - |
| - // randomly seeks to term that we know exists, then next's |
| - // from there |
| - private void doTestSeekExists(Random r, List<Term> fieldTerms, IndexReader reader) throws IOException { |
| - |
| - final Map<String,TermsEnum> tes = new HashMap<String,TermsEnum>(); |
| - |
| - // Test random seek to existing term, then enum: |
| - if (VERBOSE) { |
| - System.out.println("\nTEST: top now seek"); |
| - } |
| - |
| - int num = atLeast(100); |
| - for (int iter = 0; iter < num; iter++) { |
| - |
| - // pick random field+term |
| - int spot = r.nextInt(fieldTerms.size()); |
| - Term term = fieldTerms.get(spot); |
| - String field = term.field(); |
| - |
| - if (VERBOSE) { |
| - System.out.println("TEST: exist seek field=" + field + " term=" + UnicodeUtil.toHexString(term.text())); |
| - } |
| - |
| - // seek to it |
| - TermsEnum te = tes.get(field); |
| - if (te == null) { |
| - te = MultiFields.getTerms(reader, field).iterator(null); |
| - tes.put(field, te); |
| - } |
| - |
| - if (VERBOSE) { |
| - System.out.println(" done get enum"); |
| - } |
| - |
| - // seek should find the term |
| - assertEquals(TermsEnum.SeekStatus.FOUND, |
| - te.seekCeil(term.bytes())); |
| - |
| - // now .next() this many times: |
| - int ct = _TestUtil.nextInt(r, 5, 100); |
| - for(int i=0;i<ct;i++) { |
| - if (VERBOSE) { |
| - System.out.println("TEST: now next()"); |
| - } |
| - if (1+spot+i >= fieldTerms.size()) { |
| - break; |
| - } |
| - term = fieldTerms.get(1+spot+i); |
| - if (!term.field().equals(field)) { |
| - assertNull(te.next()); |
| - break; |
| - } else { |
| - BytesRef t = te.next(); |
| - |
| - if (VERBOSE) { |
| - System.out.println(" got term=" + (t == null ? null : UnicodeUtil.toHexString(t.utf8ToString()))); |
| - System.out.println(" exp=" + UnicodeUtil.toHexString(term.text().toString())); |
| - } |
| - |
| - assertEquals(term.bytes(), t); |
| - } |
| - } |
| - } |
| - } |
| - |
| - private void doTestSeekDoesNotExist(Random r, int numField, List<Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader) throws IOException { |
| - |
| - final Map<String,TermsEnum> tes = new HashMap<String,TermsEnum>(); |
| - |
| - if (VERBOSE) { |
| - System.out.println("TEST: top random seeks"); |
| - } |
| - |
| - { |
| - int num = atLeast(100); |
| - for (int iter = 0; iter < num; iter++) { |
| - |
| - // seek to random spot |
| - String field = ("f" + r.nextInt(numField)).intern(); |
| - Term tx = new Term(field, getRandomString(r)); |
| - |
| - int spot = Arrays.binarySearch(fieldTermsArray, tx); |
| - |
| - if (spot < 0) { |
| - if (VERBOSE) { |
| - System.out.println("TEST: non-exist seek to " + field + ":" + UnicodeUtil.toHexString(tx.text())); |
| - } |
| - |
| - // term does not exist: |
| - TermsEnum te = tes.get(field); |
| - if (te == null) { |
| - te = MultiFields.getTerms(reader, field).iterator(null); |
| - tes.put(field, te); |
| - } |
| - |
| - if (VERBOSE) { |
| - System.out.println(" got enum"); |
| - } |
| - |
| - spot = -spot - 1; |
| - |
| - if (spot == fieldTerms.size() || !fieldTerms.get(spot).field().equals(field)) { |
| - assertEquals(TermsEnum.SeekStatus.END, te.seekCeil(tx.bytes())); |
| - } else { |
| - assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(tx.bytes())); |
| - |
| - if (VERBOSE) { |
| - System.out.println(" got term=" + UnicodeUtil.toHexString(te.term().utf8ToString())); |
| - System.out.println(" exp term=" + UnicodeUtil.toHexString(fieldTerms.get(spot).text())); |
| - } |
| - |
| - assertEquals(fieldTerms.get(spot).bytes(), |
| - te.term()); |
| - |
| - // now .next() this many times: |
| - int ct = _TestUtil.nextInt(r, 5, 100); |
| - for(int i=0;i<ct;i++) { |
| - if (VERBOSE) { |
| - System.out.println("TEST: now next()"); |
| - } |
| - if (1+spot+i >= fieldTerms.size()) { |
| - break; |
| - } |
| - Term term = fieldTerms.get(1+spot+i); |
| - if (!term.field().equals(field)) { |
| - assertNull(te.next()); |
| - break; |
| - } else { |
| - BytesRef t = te.next(); |
| - |
| - if (VERBOSE) { |
| - System.out.println(" got term=" + (t == null ? null : UnicodeUtil.toHexString(t.utf8ToString()))); |
| - System.out.println(" exp=" + UnicodeUtil.toHexString(term.text().toString())); |
| - } |
| - |
| - assertEquals(term.bytes(), t); |
| - } |
| - } |
| - |
| - } |
| - } |
| - } |
| - } |
| - } |
| - |
| - |
| - @Test |
| - public void testSurrogatesOrder() throws Exception { |
| - Directory dir = newDirectory(); |
| - RandomIndexWriter w = new RandomIndexWriter(random(), |
| - dir, |
| - newIndexWriterConfig( TEST_VERSION_CURRENT, |
| - new MockAnalyzer(random())).setCodec(new PreFlexRWCodec())); |
| - |
| - final int numField = _TestUtil.nextInt(random(), 2, 5); |
| - |
| - int uniqueTermCount = 0; |
| - |
| - int tc = 0; |
| - |
| - List<Term> fieldTerms = new ArrayList<Term>(); |
| - |
| - for(int f=0;f<numField;f++) { |
| - String field = "f" + f; |
| - final int numTerms = atLeast(200); |
| - |
| - final Set<String> uniqueTerms = new HashSet<String>(); |
| - |
| - for(int i=0;i<numTerms;i++) { |
| - String term = getRandomString(random()) + "_ " + (tc++); |
| - uniqueTerms.add(term); |
| - fieldTerms.add(new Term(field, term)); |
| - Document doc = new Document(); |
| - doc.add(newField(field, term, StringField.TYPE_UNSTORED)); |
| - w.addDocument(doc); |
| - } |
| - uniqueTermCount += uniqueTerms.size(); |
| - } |
| - |
| - IndexReader reader = w.getReader(); |
| - |
| - if (VERBOSE) { |
| - Collections.sort(fieldTerms, termAsUTF16Comparator); |
| - |
| - System.out.println("\nTEST: UTF16 order"); |
| - for(Term t: fieldTerms) { |
| - System.out.println(" " + toHexString(t)); |
| - } |
| - } |
| - |
| - // sorts in code point order: |
| - Collections.sort(fieldTerms); |
| - |
| - if (VERBOSE) { |
| - System.out.println("\nTEST: codepoint order"); |
| - for(Term t: fieldTerms) { |
| - System.out.println(" " + toHexString(t)); |
| - } |
| - } |
| - |
| - Term[] fieldTermsArray = fieldTerms.toArray(new Term[fieldTerms.size()]); |
| - |
| - //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms); |
| - |
| - //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1)); |
| - //assertNotNull(fields); |
| - |
| - doTestStraightEnum(fieldTerms, reader, uniqueTermCount); |
| - doTestSeekExists(random(), fieldTerms, reader); |
| - doTestSeekDoesNotExist(random(), numField, fieldTerms, fieldTermsArray, reader); |
| - |
| - reader.close(); |
| - w.close(); |
| - dir.close(); |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestTermInfosReaderIndex.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestTermInfosReaderIndex.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestTermInfosReaderIndex.java (working copy) |
| @@ -1,208 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Collections; |
| -import java.util.List; |
| -import java.util.Random; |
| - |
| -import org.apache.lucene.analysis.MockAnalyzer; |
| -import org.apache.lucene.analysis.MockTokenizer; |
| -import org.apache.lucene.codecs.FieldInfosReader; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.StringField; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.DirectoryReader; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.FieldsEnum; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.IndexWriterConfig; |
| -import org.apache.lucene.index.LogMergePolicy; |
| -import org.apache.lucene.index.MultiFields; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.SegmentReader; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.Terms; |
| -import org.apache.lucene.index.TermsEnum; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.TermQuery; |
| -import org.apache.lucene.search.TopDocs; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.store.LockObtainFailedException; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.util._TestUtil; |
| -import org.junit.AfterClass; |
| -import org.junit.BeforeClass; |
| - |
| -public class TestTermInfosReaderIndex extends LuceneTestCase { |
| - |
| - private static int NUMBER_OF_DOCUMENTS; |
| - private static int NUMBER_OF_FIELDS; |
| - private static TermInfosReaderIndex index; |
| - private static Directory directory; |
| - private static SegmentTermEnum termEnum; |
| - private static int indexDivisor; |
| - private static int termIndexInterval; |
| - private static IndexReader reader; |
| - private static List<Term> sampleTerms; |
| - |
| - /** we will manually instantiate preflex-rw here */ |
| - @BeforeClass |
| - public static void beforeClass() throws Exception { |
| - LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; |
| - IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, |
| - new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); |
| - |
| - termIndexInterval = config.getTermIndexInterval(); |
| - indexDivisor = _TestUtil.nextInt(random(), 1, 10); |
| - NUMBER_OF_DOCUMENTS = atLeast(100); |
| - NUMBER_OF_FIELDS = atLeast(Math.max(10, 3*termIndexInterval*indexDivisor/NUMBER_OF_DOCUMENTS)); |
| - |
| - directory = newDirectory(); |
| - |
| - config.setCodec(new PreFlexRWCodec()); |
| - LogMergePolicy mp = newLogMergePolicy(); |
| - // turn off compound file, this test will open some index files directly. |
| - mp.setUseCompoundFile(false); |
| - config.setMergePolicy(mp); |
| - |
| - |
| - populate(directory, config); |
| - |
| - DirectoryReader r0 = IndexReader.open(directory); |
| - SegmentReader r = LuceneTestCase.getOnlySegmentReader(r0); |
| - String segment = r.getSegmentName(); |
| - r.close(); |
| - |
| - FieldInfosReader infosReader = new PreFlexRWCodec().fieldInfosFormat().getFieldInfosReader(); |
| - FieldInfos fieldInfos = infosReader.read(directory, segment, IOContext.READONCE); |
| - String segmentFileName = IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); |
| - long tiiFileLength = directory.fileLength(segmentFileName); |
| - IndexInput input = directory.openInput(segmentFileName, newIOContext(random())); |
| - termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), newIOContext(random())), fieldInfos, false); |
| - int totalIndexInterval = termEnum.indexInterval * indexDivisor; |
| - |
| - SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true); |
| - index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval); |
| - indexEnum.close(); |
| - input.close(); |
| - |
| - reader = IndexReader.open(directory); |
| - sampleTerms = sample(reader,1000); |
| - } |
| - |
| - @AfterClass |
| - public static void afterClass() throws Exception { |
| - termEnum.close(); |
| - reader.close(); |
| - directory.close(); |
| - termEnum = null; |
| - reader = null; |
| - directory = null; |
| - index = null; |
| - sampleTerms = null; |
| - } |
| - |
| - public void testSeekEnum() throws CorruptIndexException, IOException { |
| - int indexPosition = 3; |
| - SegmentTermEnum clone = termEnum.clone(); |
| - Term term = findTermThatWouldBeAtIndex(clone, indexPosition); |
| - SegmentTermEnum enumerator = clone; |
| - index.seekEnum(enumerator, indexPosition); |
| - assertEquals(term, enumerator.term()); |
| - clone.close(); |
| - } |
| - |
| - public void testCompareTo() throws IOException { |
| - Term term = new Term("field" + random().nextInt(NUMBER_OF_FIELDS) ,getText()); |
| - for (int i = 0; i < index.length(); i++) { |
| - Term t = index.getTerm(i); |
| - int compareTo = term.compareTo(t); |
| - assertEquals(compareTo, index.compareTo(term, i)); |
| - } |
| - } |
| - |
| - public void testRandomSearchPerformance() throws CorruptIndexException, IOException { |
| - IndexSearcher searcher = new IndexSearcher(reader); |
| - for (Term t : sampleTerms) { |
| - TermQuery query = new TermQuery(t); |
| - TopDocs topDocs = searcher.search(query, 10); |
| - assertTrue(topDocs.totalHits > 0); |
| - } |
| - } |
| - |
| - private static List<Term> sample(IndexReader reader, int size) throws IOException { |
| - List<Term> sample = new ArrayList<Term>(); |
| - Random random = new Random(); |
| - FieldsEnum fieldsEnum = MultiFields.getFields(reader).iterator(); |
| - String field; |
| - while((field = fieldsEnum.next()) != null) { |
| - Terms terms = fieldsEnum.terms(); |
| - assertNotNull(terms); |
| - TermsEnum termsEnum = terms.iterator(null); |
| - while (termsEnum.next() != null) { |
| - if (sample.size() >= size) { |
| - int pos = random.nextInt(size); |
| - sample.set(pos, new Term(field, termsEnum.term())); |
| - } else { |
| - sample.add(new Term(field, termsEnum.term())); |
| - } |
| - } |
| - } |
| - Collections.shuffle(sample); |
| - return sample; |
| - } |
| - |
| - private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException { |
| - int termPosition = index * termIndexInterval * indexDivisor; |
| - for (int i = 0; i < termPosition; i++) { |
| - // TODO: this test just uses random terms, so this is always possible |
| - assumeTrue("ran out of terms", termEnum.next()); |
| - } |
| - final Term term = termEnum.term(); |
| - // An indexed term is only written when the term after |
| - // it exists, so, if the number of terms is 0 mod |
| - // termIndexInterval, the last index term will not be |
| - // written; so we require a term after this term |
| - // as well: |
| - assumeTrue("ran out of terms", termEnum.next()); |
| - return term; |
| - } |
| - |
| - private static void populate(Directory directory, IndexWriterConfig config) throws CorruptIndexException, LockObtainFailedException, IOException { |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, config); |
| - for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) { |
| - Document document = new Document(); |
| - for (int f = 0; f < NUMBER_OF_FIELDS; f++) { |
| - document.add(newField("field" + f, getText(), StringField.TYPE_UNSTORED)); |
| - } |
| - writer.addDocument(document); |
| - } |
| - writer.forceMerge(1); |
| - writer.close(); |
| - } |
| - |
| - private static String getText() { |
| - return Long.toString(random().nextLong(),Character.MAX_RADIX); |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java (working copy) |
| @@ -559,9 +559,6 @@ |
| |
| /** Test whether all similarities return document 3 before documents 7 and 8. */ |
| public void testHeartRanking() throws IOException { |
| - assumeFalse("PreFlex codec does not support the stats necessary for this test!", |
| - "Lucene3x".equals(Codec.getDefault().getName())); |
| - |
| Query q = new TermQuery(new Term(FIELD_BODY, "heart")); |
| |
| for (SimilarityBase sim : sims) { |
| Index: lucene/core/src/test/org/apache/lucene/search/TestSort.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestSort.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestSort.java (working copy) |
| @@ -74,8 +74,6 @@ |
| */ |
| |
| public class TestSort extends LuceneTestCase { |
| - // true if our codec supports docvalues: true unless codec is preflex (3.x) |
| - boolean supportsDocValues = Codec.getDefault().getName().equals("Lucene3x") == false; |
| private static int NUM_STRINGS; |
| private IndexSearcher full; |
| private IndexSearcher searchX; |
| @@ -158,20 +156,15 @@ |
| doc.add (new TextField ("contents", data[i][1])); |
| if (data[i][2] != null) { |
| doc.add(new StringField ("int", data[i][2])); |
| - if (supportsDocValues) { |
| - doc.add(new PackedLongDocValuesField("int", Integer.parseInt(data[i][2]))); |
| - } |
| + doc.add(new PackedLongDocValuesField("int", Integer.parseInt(data[i][2]))); |
| } |
| if (data[i][3] != null) { |
| doc.add(new StringField ("float", data[i][3])); |
| - if (supportsDocValues) { |
| - doc.add(new FloatDocValuesField("float", Float.parseFloat(data[i][3]))); |
| - } |
| + doc.add(new FloatDocValuesField("float", Float.parseFloat(data[i][3]))); |
| } |
| if (data[i][4] != null) { |
| doc.add(new StringField ("string", data[i][4])); |
| - if (supportsDocValues) { |
| - switch(stringDVType) { |
| + switch(stringDVType) { |
| case BYTES_FIXED_SORTED: |
| doc.add(new SortedBytesDocValuesField("string", new BytesRef(data[i][4]), true)); |
| break; |
| @@ -192,7 +185,6 @@ |
| break; |
| default: |
| throw new IllegalStateException("unknown type " + stringDVType); |
| - } |
| } |
| } |
| if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5])); |
| @@ -200,9 +192,7 @@ |
| if (data[i][7] != null) doc.add (new StringField ("long", data[i][7])); |
| if (data[i][8] != null) { |
| doc.add(new StringField ("double", data[i][8])); |
| - if (supportsDocValues) { |
| - doc.add(new DoubleDocValuesField("double", Double.parseDouble(data[i][8]))); |
| - } |
| + doc.add(new DoubleDocValuesField("double", Double.parseDouble(data[i][8]))); |
| } |
| if (data[i][9] != null) doc.add (new StringField ("short", data[i][9])); |
| if (data[i][10] != null) doc.add (new StringField ("byte", data[i][10])); |
| @@ -246,14 +236,10 @@ |
| doc.add (new Field ("tracer", num, onlyStored)); |
| //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); |
| doc.add(new StringField("string", num)); |
| - if (supportsDocValues) { |
| - doc.add(new SortedBytesDocValuesField("string", new BytesRef(num))); |
| - } |
| + doc.add(new SortedBytesDocValuesField("string", new BytesRef(num))); |
| String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50); |
| doc.add(new StringField ("string2", num2)); |
| - if (supportsDocValues) { |
| - doc.add(new SortedBytesDocValuesField("string2", new BytesRef(num2))); |
| - } |
| + doc.add(new SortedBytesDocValuesField("string2", new BytesRef(num2))); |
| doc.add (new Field ("tracer2", num2, onlyStored)); |
| for(IndexableField f2 : doc.getFields()) { |
| if (!f2.fieldType().omitNorms()) { |
| @@ -265,14 +251,10 @@ |
| doc.add (new Field ("fixed_tracer", numFixed, onlyStored)); |
| //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); |
| doc.add(new StringField("string_fixed", numFixed)); |
| - if (supportsDocValues) { |
| - doc.add(new SortedBytesDocValuesField("string_fixed", new BytesRef(numFixed), true)); |
| - } |
| + doc.add(new SortedBytesDocValuesField("string_fixed", new BytesRef(numFixed), true)); |
| String num2Fixed = getRandomCharString(fixedLen2, 48, 52); |
| doc.add(new StringField ("string2_fixed", num2Fixed)); |
| - if (supportsDocValues) { |
| - doc.add(new SortedBytesDocValuesField("string2_fixed", new BytesRef(num2Fixed), true)); |
| - } |
| + doc.add(new SortedBytesDocValuesField("string2_fixed", new BytesRef(num2Fixed), true)); |
| doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored)); |
| |
| for(IndexableField f2 : doc.getFields()) { |
| @@ -412,23 +394,21 @@ |
| assertMatches (full, queryX, sort, "AIGEC"); |
| assertMatches (full, queryY, sort, "DJHFB"); |
| |
| - if (supportsDocValues) { |
| - sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC ); |
| - assertMatches (full, queryX, sort, "IGAEC"); |
| - assertMatches (full, queryY, sort, "DHFJB"); |
| + sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC ); |
| + assertMatches (full, queryX, sort, "IGAEC"); |
| + assertMatches (full, queryY, sort, "DHFJB"); |
| |
| - sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC ); |
| - assertMatches (full, queryX, sort, "GCIEA"); |
| - assertMatches (full, queryY, sort, "DHJFB"); |
| + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC ); |
| + assertMatches (full, queryX, sort, "GCIEA"); |
| + assertMatches (full, queryY, sort, "DHJFB"); |
| |
| - sort.setSort (useDocValues(new SortField ("double", SortField.Type.DOUBLE)), SortField.FIELD_DOC ); |
| - assertMatches (full, queryX, sort, "AGICE"); |
| - assertMatches (full, queryY, sort, "DJHBF"); |
| + sort.setSort (useDocValues(new SortField ("double", SortField.Type.DOUBLE)), SortField.FIELD_DOC ); |
| + assertMatches (full, queryX, sort, "AGICE"); |
| + assertMatches (full, queryY, sort, "DJHBF"); |
| |
| - sort.setSort (useDocValues(new SortField ("string", getDVStringSortType())), SortField.FIELD_DOC ); |
| - assertMatches (full, queryX, sort, "AIGEC"); |
| - assertMatches (full, queryY, sort, "DJHFB"); |
| - } |
| + sort.setSort (useDocValues(new SortField ("string", getDVStringSortType())), SortField.FIELD_DOC ); |
| + assertMatches (full, queryX, sort, "AIGEC"); |
| + assertMatches (full, queryY, sort, "DJHFB"); |
| } |
| |
| private SortField.Type getDVStringSortType() { |
| @@ -520,8 +500,6 @@ |
| verifyStringSort(sort); |
| |
| // Doc values field, var length |
| - assumeFalse("cannot work with preflex codec", |
| - "Lucene3x".equals(Codec.getDefault().getName())); |
| sort.setSort( |
| useDocValues(new SortField("string", getDVStringSortType())), |
| useDocValues(new SortField("string2", getDVStringSortType(), true)), |
| @@ -781,19 +759,17 @@ |
| assertMatches (full, queryX, sort, "CEGIA"); |
| assertMatches (full, queryY, sort, "BFHJD"); |
| |
| - if (supportsDocValues) { |
| - sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)) ); |
| - assertMatches (full, queryX, sort, "CAEGI"); |
| - assertMatches (full, queryY, sort, "BJFHD"); |
| + sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)) ); |
| + assertMatches (full, queryX, sort, "CAEGI"); |
| + assertMatches (full, queryY, sort, "BJFHD"); |
| |
| - sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) ); |
| - assertMatches (full, queryX, sort, "AECIG"); |
| - assertMatches (full, queryY, sort, "BFJHD"); |
| + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) ); |
| + assertMatches (full, queryX, sort, "AECIG"); |
| + assertMatches (full, queryY, sort, "BFJHD"); |
| |
| - sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)) ); |
| - assertMatches (full, queryX, sort, "CEGIA"); |
| - assertMatches (full, queryY, sort, "BFHJD"); |
| - } |
| + sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)) ); |
| + assertMatches (full, queryX, sort, "CEGIA"); |
| + assertMatches (full, queryY, sort, "BFHJD"); |
| } |
| |
| // test sorting when the sort field is empty (undefined) for some of the documents |
| @@ -864,19 +840,17 @@ |
| sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) ); |
| assertMatches (full, queryX, sort, "GICEA"); |
| |
| - if (supportsDocValues) { |
| - sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), |
| - useDocValues(new SortField ("float", SortField.Type.FLOAT))); |
| - assertMatches (full, queryX, sort, "IGEAC"); |
| + sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), |
| + useDocValues(new SortField ("float", SortField.Type.FLOAT))); |
| + assertMatches (full, queryX, sort, "IGEAC"); |
| |
| - sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)), |
| - useDocValues(new SortField (null, SortField.Type.DOC, true))); |
| - assertMatches (full, queryX, sort, "CEAGI"); |
| + sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)), |
| + useDocValues(new SortField (null, SortField.Type.DOC, true))); |
| + assertMatches (full, queryX, sort, "CEAGI"); |
| |
| - sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), |
| - useDocValues(new SortField ("string", getDVStringSortType()))); |
| - assertMatches (full, queryX, sort, "GICEA"); |
| - } |
| + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), |
| + useDocValues(new SortField ("string", getDVStringSortType()))); |
| + assertMatches (full, queryX, sort, "GICEA"); |
| } |
| |
| // test a variety of sorts using a parallel multisearcher |
| @@ -1189,53 +1163,51 @@ |
| sort.setSort(new SortField ("string", SortField.Type.STRING, true)); |
| assertMatches(multi, queryF, sort, "IJZ"); |
| |
| - if (supportsDocValues) { |
| - sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT))); |
| - expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; |
| - assertMatches(multi, queryA, sort, expected); |
| + sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT))); |
| + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; |
| + assertMatches(multi, queryA, sort, expected); |
| |
| - sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC); |
| - expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; |
| - assertMatches(multi, queryA, sort, expected); |
| + sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC); |
| + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; |
| + assertMatches(multi, queryA, sort, expected); |
| |
| - sort.setSort(useDocValues(new SortField("int", SortField.Type.INT))); |
| - expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; |
| - assertMatches(multi, queryA, sort, expected); |
| + sort.setSort(useDocValues(new SortField("int", SortField.Type.INT))); |
| + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; |
| + assertMatches(multi, queryA, sort, expected); |
| |
| - sort.setSort(useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC); |
| - assertMatches(multi, queryA, sort, "GDHJCIEFAB"); |
| + sort.setSort(useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC); |
| + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); |
| |
| - sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT))); |
| - assertMatches(multi, queryA, sort, "GDHJCIEFAB"); |
| + sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT))); |
| + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); |
| |
| - sort.setSort(useDocValues(new SortField("int", SortField.Type.INT, true))); |
| - expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; |
| - assertMatches(multi, queryA, sort, expected); |
| + sort.setSort(useDocValues(new SortField("int", SortField.Type.INT, true))); |
| + expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; |
| + assertMatches(multi, queryA, sort, expected); |
| |
| - sort.setSort(useDocValues(new SortField("int", SortField.Type.INT)), useDocValues(new SortField("float", SortField.Type.FLOAT))); |
| - assertMatches(multi, queryA, sort, "IDHFGJEABC"); |
| + sort.setSort(useDocValues(new SortField("int", SortField.Type.INT)), useDocValues(new SortField("float", SortField.Type.FLOAT))); |
| + assertMatches(multi, queryA, sort, "IDHFGJEABC"); |
| |
| - sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT))); |
| - assertMatches(multi, queryF, sort, "IZJ"); |
| + sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT))); |
| + assertMatches(multi, queryF, sort, "IZJ"); |
| |
| - sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true))); |
| - assertMatches(multi, queryF, sort, "JZI"); |
| + sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true))); |
| + assertMatches(multi, queryF, sort, "JZI"); |
| |
| - sort.setSort(useDocValues(new SortField("string", getDVStringSortType()))); |
| - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); |
| + sort.setSort(useDocValues(new SortField("string", getDVStringSortType()))); |
| + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); |
| |
| - sort.setSort(useDocValues(new SortField("string", getDVStringSortType(), true))); |
| - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); |
| + sort.setSort(useDocValues(new SortField("string", getDVStringSortType(), true))); |
| + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); |
| |
| - sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT)),useDocValues(new SortField("string", getDVStringSortType()))); |
| - assertMatches(multi, queryA, sort, "GDHJICEFAB"); |
| + sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT)),useDocValues(new SortField("string", getDVStringSortType()))); |
| + assertMatches(multi, queryA, sort, "GDHJICEFAB"); |
| |
| - sort.setSort(useDocValues(new SortField ("string", getDVStringSortType()))); |
| - assertMatches(multi, queryF, sort, "ZJI"); |
| + sort.setSort(useDocValues(new SortField ("string", getDVStringSortType()))); |
| + assertMatches(multi, queryF, sort, "ZJI"); |
| |
| - sort.setSort(useDocValues(new SortField ("string", getDVStringSortType(), true))); |
| - assertMatches(multi, queryF, sort, "IJZ"); |
| - } |
| + sort.setSort(useDocValues(new SortField ("string", getDVStringSortType(), true))); |
| + assertMatches(multi, queryF, sort, "IJZ"); |
| |
| // up to this point, all of the searches should have "sane" |
| // FieldCache behavior, and should have reused hte cache in several cases |
| @@ -1370,8 +1342,6 @@ |
| |
| public void testRandomStringSort() throws Exception { |
| Random random = new Random(random().nextLong()); |
| - assumeTrue("cannot work with Lucene3x codec", |
| - defaultCodecSupportsDocValues()); |
| |
| final int NUM_DOCS = atLeast(100); |
| final Directory dir = newDirectory(); |
| Index: lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java (working copy) |
| @@ -59,10 +59,7 @@ |
| Field field = newField("field", "", StringField.TYPE_UNSTORED); |
| doc.add(field); |
| |
| - // we generate aweful prefixes: good for testing. |
| - // but for preflex codec, the test can be very slow, so use less iterations. |
| - final String codec = Codec.getDefault().getName(); |
| - int num = codec.equals("Lucene3x") ? 200 * RANDOM_MULTIPLIER : atLeast(1000); |
| + int num = atLeast(1000); |
| for (int i = 0; i < num; i++) { |
| field.setStringValue(_TestUtil.randomUnicodeString(random(), 10)); |
| writer.addDocument(doc); |
| Index: lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java (working copy) |
| @@ -43,7 +43,6 @@ |
| * In the example, a docvalues field is used as a per-document boost (separate from the norm) |
| * @lucene.experimental |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestDocValuesScoring extends LuceneTestCase { |
| private static final float SCORE_EPSILON = 0.001f; /* for comparing floats */ |
| |
| Index: lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java (working copy) |
| @@ -139,9 +139,7 @@ |
| |
| /** test a bunch of random regular expressions */ |
| public void testRegexps() throws Exception { |
| - // we generate aweful regexps: good for testing. |
| - // but for preflex codec, the test can be very slow, so use less iterations. |
| - int num = Codec.getDefault().getName().equals("Lucene3x") ? 100 * RANDOM_MULTIPLIER : atLeast(1000); |
| + int num = atLeast(1000); |
| for (int i = 0; i < num; i++) { |
| String reg = AutomatonTestUtil.randomRegexp(random()); |
| if (VERBOSE) { |
| Index: lucene/core/src/test/org/apache/lucene/search/TestSearchAfter.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestSearchAfter.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestSearchAfter.java (working copy) |
| @@ -48,8 +48,6 @@ |
| private Directory dir; |
| private IndexReader reader; |
| private IndexSearcher searcher; |
| - |
| - boolean supportsDocValues = Codec.getDefault().getName().equals("Lucene3x") == false; |
| |
| private static SortField useDocValues(SortField field) { |
| field.setUseIndexValues(true); |
| @@ -77,13 +75,11 @@ |
| document.add(newField("bytesval", _TestUtil.randomRealisticUnicodeString(random()), StringField.TYPE_UNSTORED)); |
| document.add(new DoubleField("double", random().nextDouble())); |
| |
| - if (supportsDocValues) { |
| - document.add(new IntDocValuesField("intdocvalues", random().nextInt())); |
| - document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat())); |
| - document.add(new SortedBytesDocValuesField("sortedbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random())))); |
| - document.add(new SortedBytesDocValuesField("sortedbytesdocvaluesval", new BytesRef(_TestUtil.randomRealisticUnicodeString(random())))); |
| - document.add(new StraightBytesDocValuesField("straightbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random())))); |
| - } |
| + document.add(new IntDocValuesField("intdocvalues", random().nextInt())); |
| + document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat())); |
| + document.add(new SortedBytesDocValuesField("sortedbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random())))); |
| + document.add(new SortedBytesDocValuesField("sortedbytesdocvaluesval", new BytesRef(_TestUtil.randomRealisticUnicodeString(random())))); |
| + document.add(new StraightBytesDocValuesField("straightbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random())))); |
| |
| iw.addDocument(document); |
| } |
| @@ -131,13 +127,11 @@ |
| assertQuery(query, filter, new Sort(new SortField[] {new SortField("double", SortField.Type.DOUBLE, reversed)})); |
| assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytes", SortField.Type.STRING, reversed)})); |
| assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytesval", SortField.Type.STRING_VAL, reversed)})); |
| - if (supportsDocValues) { |
| - assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("intdocvalues", SortField.Type.INT, reversed))})); |
| - assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("floatdocvalues", SortField.Type.FLOAT, reversed))})); |
| - assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvalues", SortField.Type.STRING, reversed))})); |
| - assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, reversed))})); |
| - assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, reversed))})); |
| - } |
| + assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("intdocvalues", SortField.Type.INT, reversed))})); |
| + assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("floatdocvalues", SortField.Type.FLOAT, reversed))})); |
| + assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvalues", SortField.Type.STRING, reversed))})); |
| + assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, reversed))})); |
| + assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, reversed))})); |
| } |
| } |
| |
| Index: lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java (working copy) |
| @@ -34,13 +34,9 @@ |
| import java.util.Random; |
| import org.junit.Ignore; |
| |
| -// NOTE: this test will fail w/ PreFlexRW codec! (Because |
| -// this test uses full binary term space, but PreFlex cannot |
| -// handle this since it requires the terms are UTF8 bytes). |
| -// |
| -// Also, SimpleText codec will consume very large amounts of |
| +// NOTE: SimpleText codec will consume very large amounts of |
| // disk (but, should run successfully). Best to run w/ |
| -// -Dtests.codec=Standard, and w/ plenty of RAM, eg: |
| +// -Dtests.codec=<current codec>, and w/ plenty of RAM, eg: |
| // |
| // ant test -Dtest.slow=true -Dtests.heapsize=8g |
| // |
| @@ -144,9 +140,6 @@ |
| @Slow |
| public void test2BTerms() throws IOException { |
| |
| - if ("Lucene3x".equals(Codec.getDefault().getName())) { |
| - throw new RuntimeException("this test cannot run with PreFlex codec"); |
| - } |
| System.out.println("Starting Test2B"); |
| final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000; |
| |
| Index: lucene/core/src/test/org/apache/lucene/index/index.31.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip (working copy) |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip |
| ___________________________________________________________________ |
| Added: svn:mime-type |
| ## -0,0 +1 ## |
| +application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java (working copy) |
| @@ -46,7 +46,7 @@ |
| // TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs. |
| // not all codecs store prx separate... |
| // TODO: fix sep codec to index offsets so we can greatly reduce this list! |
| -@SuppressCodecs({"Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) |
| +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) |
| public class TestPostingsOffsets extends LuceneTestCase { |
| IndexWriterConfig iwc; |
| |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip (working copy) |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip |
| ___________________________________________________________________ |
| Added: svn:mime-type |
| ## -0,0 +1 ## |
| +application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java (working copy) |
| @@ -43,7 +43,6 @@ |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| |
| -@SuppressCodecs("Lucene3x") |
| public class TestTypePromotion extends LuceneTestCase { |
| |
| private static EnumSet<Type> INTEGERS = EnumSet.of(Type.VAR_INTS, |
| Index: lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (working copy) |
| @@ -37,10 +37,10 @@ |
| Random random = new Random(random().nextLong()); |
| final MockDirectoryWrapper dir = newDirectory(); |
| dir.setCheckIndexOnClose(false); // we use a custom codec provider |
| - final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random, true); |
| |
| //provider.register(new MemoryCodec()); |
| - if ( (!"Lucene3x".equals(Codec.getDefault().getName())) && random().nextBoolean()) { |
| + if (random().nextBoolean()) { |
| Codec.setDefault(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean()))); |
| } |
| |
| Index: lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java (working copy) |
| @@ -59,7 +59,7 @@ |
| // Try to make an index that requires merging: |
| w.getConfig().setMaxBufferedDocs(_TestUtil.nextInt(random(), 2, 11)); |
| final int numStartDocs = atLeast(20); |
| - final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random(), true); |
| for(int docIDX=0;docIDX<numStartDocs;docIDX++) { |
| w.addDocument(docs.nextDoc()); |
| } |
| Index: lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (working copy) |
| @@ -22,7 +22,6 @@ |
| import java.util.Map.Entry; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| -import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.document.ByteDocValuesField; |
| import org.apache.lucene.document.DerefBytesDocValuesField; |
| import org.apache.lucene.document.Document; |
| @@ -56,7 +55,6 @@ |
| * Tests DocValues integration into IndexWriter & Codecs |
| * |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestDocValuesIndexing extends LuceneTestCase { |
| /* |
| * - add test for multi segment case with deletes |
| Index: lucene/core/src/test/org/apache/lucene/index/index.30.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java (working copy) |
| @@ -65,9 +65,12 @@ |
| return; |
| } |
| } else { |
| + // note: re-enable this if we create a 4.x impersonator, |
| + // and if its format is actually different than the real 4.x (unlikely) |
| // TODO: the non-fork code could simply enable impersonation? |
| - assumeFalse("does not support PreFlex, see LUCENE-3992", |
| - Codec.getDefault().getName().equals("Lucene3x")); |
| + // assumeFalse("does not support PreFlex, see LUCENE-3992", |
| + // Codec.getDefault().getName().equals("Lucene4x")); |
| + |
| // we are the fork, setup a crashing thread |
| final int crashTime = _TestUtil.nextInt(random(), 3000, 4000); |
| Thread t = new Thread() { |
| Index: lucene/core/src/test/org/apache/lucene/index/index.32.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.34.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestCodecs.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (working copy) |
| @@ -28,7 +28,6 @@ |
| import org.apache.lucene.codecs.PostingsConsumer; |
| import org.apache.lucene.codecs.TermStats; |
| import org.apache.lucene.codecs.TermsConsumer; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; |
| import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.FieldType; |
| @@ -254,7 +253,7 @@ |
| final FieldData[] fields = new FieldData[] {field}; |
| final FieldInfos fieldInfos = builder.finish(); |
| final Directory dir = newDirectory(); |
| - this.write(fieldInfos, dir, fields, true); |
| + this.write(fieldInfos, dir, fields); |
| Codec codec = Codec.getDefault(); |
| final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null); |
| |
| @@ -310,7 +309,7 @@ |
| System.out.println("TEST: now write postings"); |
| } |
| |
| - this.write(fieldInfos, dir, fields, false); |
| + this.write(fieldInfos, dir, fields); |
| Codec codec = Codec.getDefault(); |
| final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, |
| false, codec, null, null); |
| @@ -454,10 +453,6 @@ |
| for(int iter=0;iter<NUM_TEST_ITER;iter++) { |
| final FieldData field = fields[random().nextInt(fields.length)]; |
| final TermsEnum termsEnum = termsDict.terms(field.fieldInfo.name).iterator(null); |
| - if (si.getCodec() instanceof Lucene3xCodec) { |
| - // code below expects unicode sort order |
| - continue; |
| - } |
| |
| int upto = 0; |
| // Test straight enum of the terms: |
| @@ -613,7 +608,7 @@ |
| } |
| } |
| |
| - private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable { |
| + private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable { |
| |
| final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27); |
| final Codec codec = Codec.getDefault(); |
| @@ -623,10 +618,6 @@ |
| final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); |
| Arrays.sort(fields); |
| for (final FieldData field : fields) { |
| - if (!allowPreFlex && codec instanceof Lucene3xCodec) { |
| - // code below expects unicode sort order |
| - continue; |
| - } |
| field.write(consumer); |
| } |
| consumer.close(); |
| Index: lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java (working copy) |
| @@ -55,9 +55,7 @@ |
| |
| public void setUp() throws Exception { |
| super.setUp(); |
| - // we generate aweful regexps: good for testing. |
| - // but for preflex codec, the test can be very slow, so use less iterations. |
| - numIterations = Codec.getDefault().getName().equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : atLeast(50); |
| + numIterations = atLeast(50); |
| dir = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| Index: lucene/core/src/test/org/apache/lucene/index/index.30.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestDocCount.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestDocCount.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestDocCount.java (working copy) |
| @@ -24,12 +24,10 @@ |
| import org.apache.lucene.util.FixedBitSet; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util._TestUtil; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| |
| /** |
| * Tests the Terms.docCount statistic |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestDocCount extends LuceneTestCase { |
| public void testSimple() throws Exception { |
| Directory dir = newDirectory(); |
| Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java (working copy) |
| @@ -971,9 +971,6 @@ |
| public void testNoTermsIndex() throws Exception { |
| // Some Codecs don't honor the ReaderTermsIndexDivisor, so skip the test if |
| // they're picked. |
| - assumeFalse("PreFlex codec does not support ReaderTermsIndexDivisor!", |
| - "Lucene3x".equals(Codec.getDefault().getName())); |
| - |
| IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setReaderTermsIndexDivisor(-1); |
| |
| Index: lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java (working copy) |
| @@ -44,7 +44,7 @@ |
| |
| public void test() throws Exception { |
| Random random = new Random(random().nextLong()); |
| - final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random, true); |
| final Directory d = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), d); |
| final int numDocs = atLeast(10); |
| Index: lucene/core/src/test/org/apache/lucene/index/TestNorms.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestNorms.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestNorms.java (working copy) |
| @@ -183,7 +183,7 @@ |
| Similarity provider = new MySimProvider(writeNorms); |
| config.setSimilarity(provider); |
| RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); |
| - final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues()); |
| + final LineFileDocs docs = new LineFileDocs(random, true); |
| int num = atLeast(100); |
| for (int i = 0; i < num; i++) { |
| Document doc = docs.nextDoc(); |
| Index: lucene/core/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java (working copy) |
| @@ -38,7 +38,7 @@ |
| * |
| */ |
| // TODO: what is the problem with SimpleText |
| -@SuppressCodecs({ "SimpleText", "Lucene3x" }) |
| +@SuppressCodecs("SimpleText") |
| public class TestCustomNorms extends LuceneTestCase { |
| final String floatTestField = "normsTestFloat"; |
| final String exceptionTestField = "normsTestExcp"; |
| Index: lucene/core/src/test/org/apache/lucene/index/index.36.surrogates.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.32.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip (working copy) |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip |
| ___________________________________________________________________ |
| Added: svn:mime-type |
| ## -0,0 +1 ## |
| +application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) |
| @@ -57,12 +57,13 @@ |
| import org.junit.BeforeClass; |
| |
| /* |
| - Verify we can read the pre-4.0 file format, do searches |
| + Verify we can read the pre-5.0 file format, do searches |
| against it, and add documents to it. |
| */ |
| -// don't use 3.x codec, its unrealistic since it means |
| +// note: add this if we make a 4.x impersonator |
| +// TODO: don't use 4.x codec, its unrealistic since it means |
| // we won't even be running the actual code, only the impostor |
| -@SuppressCodecs("Lucene3x") |
| +// @SuppressCodecs("Lucene4x") |
| public class TestBackwardsCompatibility extends LuceneTestCase { |
| |
| // Uncomment these cases & run them on an older Lucene |
| @@ -87,7 +88,7 @@ |
| /* |
| // These are only needed for the special upgrade test to verify |
| // that also single-segment indexes are correctly upgraded by IndexUpgrader. |
| - // You don't need them to be build for non-3.1 (the test is happy with just one |
| + // You don't need them to be build for non-4.0 (the test is happy with just one |
| // "old" segment format, version is unimportant: |
| |
| public void testCreateSingleSegmentCFS() throws IOException { |
| @@ -99,14 +100,8 @@ |
| } |
| |
| */ |
| - final static String[] oldNames = {"30.cfs", |
| - "30.nocfs", |
| - "31.cfs", |
| - "31.nocfs", |
| - "32.cfs", |
| - "32.nocfs", |
| - "34.cfs", |
| - "34.nocfs", |
| + final static String[] oldNames = {"40.cfs", |
| + "40.nocfs", |
| }; |
| |
| final String[] unsupportedNames = {"19.cfs", |
| @@ -123,10 +118,18 @@ |
| "24.nocfs", |
| "29.cfs", |
| "29.nocfs", |
| + "30.cfs", |
| + "30.nocfs", |
| + "31.cfs", |
| + "31.nocfs", |
| + "32.cfs", |
| + "32.nocfs", |
| + "34.cfs", |
| + "34.nocfs" |
| }; |
| |
| - final static String[] oldSingleSegmentNames = {"31.optimized.cfs", |
| - "31.optimized.nocfs", |
| + final static String[] oldSingleSegmentNames = {"40.optimized.cfs", |
| + "40.optimized.nocfs", |
| }; |
| |
| static Map<String,Directory> oldIndexDirs; |
| @@ -341,16 +344,15 @@ |
| |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term(new String("content"), "aaa")), null, 1000).scoreDocs; |
| |
| - // First document should be #21 since it's norm was |
| - // increased: |
| + // First document should be #0 |
| Document d = searcher.getIndexReader().document(hits[0].doc); |
| - assertEquals("didn't get the right document first", "21", d.get("id")); |
| + assertEquals("didn't get the right document first", "0", d.get("id")); |
| |
| doTestHits(hits, 34, searcher.getIndexReader()); |
| |
| hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs; |
| assertEquals(34, hits.length); |
| - hits = searcher.search(new TermQuery(new Term(new String("utf8"), "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs; |
| + hits = searcher.search(new TermQuery(new Term(new String("utf8"), "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs; |
| assertEquals(34, hits.length); |
| hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs; |
| assertEquals(34, hits.length); |
| @@ -366,7 +368,7 @@ |
| |
| public void changeIndexWithAdds(Random random, Directory dir, String origOldName) throws IOException { |
| // open writer |
| - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); |
| + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); |
| // add 10 docs |
| for(int i=0;i<10;i++) { |
| addDoc(writer, 35+i); |
| @@ -387,12 +389,12 @@ |
| IndexSearcher searcher = new IndexSearcher(reader); |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| Document d = searcher.getIndexReader().document(hits[0].doc); |
| - assertEquals("wrong first document", "21", d.get("id")); |
| + assertEquals("wrong first document", "0", d.get("id")); |
| doTestHits(hits, 44, searcher.getIndexReader()); |
| reader.close(); |
| |
| // fully merge |
| - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); |
| + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); |
| writer.forceMerge(1); |
| writer.close(); |
| |
| @@ -402,7 +404,7 @@ |
| assertEquals("wrong number of hits", 44, hits.length); |
| d = searcher.doc(hits[0].doc); |
| doTestHits(hits, 44, searcher.getIndexReader()); |
| - assertEquals("wrong first document", "21", d.get("id")); |
| + assertEquals("wrong first document", "0", d.get("id")); |
| reader.close(); |
| } |
| |
| @@ -413,7 +415,7 @@ |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| assertEquals("wrong number of hits", 34, hits.length); |
| Document d = searcher.doc(hits[0].doc); |
| - assertEquals("wrong first document", "21", d.get("id")); |
| + assertEquals("wrong first document", "0", d.get("id")); |
| reader.close(); |
| |
| // fully merge |
| @@ -760,16 +762,4 @@ |
| dir.close(); |
| } |
| } |
| - |
| - public static final String surrogatesIndexName = "index.36.surrogates.zip"; |
| - |
| - public void testSurrogates() throws Exception { |
| - File oldIndexDir = _TestUtil.getTempDir("surrogates"); |
| - _TestUtil.unzip(getDataFile(surrogatesIndexName), oldIndexDir); |
| - Directory dir = newFSDirectory(oldIndexDir); |
| - // TODO: more tests |
| - _TestUtil.checkIndex(dir); |
| - dir.close(); |
| - } |
| - |
| } |
| Index: lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java (working copy) |
| @@ -29,12 +29,10 @@ |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| |
| /** |
| * Test indexing and searching some byte[] terms |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestBinaryTerms extends LuceneTestCase { |
| public void testBinary() throws IOException { |
| Directory dir = newDirectory(); |
| Index: lucene/core/src/test/org/apache/lucene/index/index.31.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip (working copy) |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip |
| ___________________________________________________________________ |
| Added: svn:mime-type |
| ## -0,0 +1 ## |
| +application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/index.34.nocfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java (working copy) |
| @@ -39,7 +39,7 @@ |
| |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| - lineDocFile = new LineFileDocs(random(), defaultCodecSupportsDocValues()); |
| + lineDocFile = new LineFileDocs(random(), true); |
| } |
| |
| @AfterClass |
| Index: lucene/core/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip |
| =================================================================== |
| Cannot display: file marked as a binary type. |
| svn:mime-type = application/octet-stream |
| Index: lucene/core/src/test/org/apache/lucene/index/TestMixedCodecs.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestMixedCodecs.java (revision 1344053) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestMixedCodecs.java (working copy) |
| @@ -27,9 +27,7 @@ |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util._TestUtil; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| |
| -@SuppressCodecs("Lucene3x") |
| public class TestMixedCodecs extends LuceneTestCase { |
| |
| public void test() throws Exception { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSkipListReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSkipListReader.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSkipListReader.java (working copy) |
| @@ -1,117 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Arrays; |
| - |
| -import org.apache.lucene.codecs.MultiLevelSkipListReader; |
| -import org.apache.lucene.store.IndexInput; |
| - |
| -/** |
| - * @deprecated (4.0) This is only used to read indexes created |
| - * before 4.0. |
| - */ |
| -@Deprecated |
| -final class Lucene3xSkipListReader extends MultiLevelSkipListReader { |
| - private boolean currentFieldStoresPayloads; |
| - private long freqPointer[]; |
| - private long proxPointer[]; |
| - private int payloadLength[]; |
| - |
| - private long lastFreqPointer; |
| - private long lastProxPointer; |
| - private int lastPayloadLength; |
| - |
| - public Lucene3xSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) { |
| - super(skipStream, maxSkipLevels, skipInterval); |
| - freqPointer = new long[maxSkipLevels]; |
| - proxPointer = new long[maxSkipLevels]; |
| - payloadLength = new int[maxSkipLevels]; |
| - } |
| - |
| - public void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) { |
| - super.init(skipPointer, df); |
| - this.currentFieldStoresPayloads = storesPayloads; |
| - lastFreqPointer = freqBasePointer; |
| - lastProxPointer = proxBasePointer; |
| - |
| - Arrays.fill(freqPointer, freqBasePointer); |
| - Arrays.fill(proxPointer, proxBasePointer); |
| - Arrays.fill(payloadLength, 0); |
| - } |
| - |
| - /** Returns the freq pointer of the doc to which the last call of |
| - * {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */ |
| - public long getFreqPointer() { |
| - return lastFreqPointer; |
| - } |
| - |
| - /** Returns the prox pointer of the doc to which the last call of |
| - * {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */ |
| - public long getProxPointer() { |
| - return lastProxPointer; |
| - } |
| - |
| - /** Returns the payload length of the payload stored just before |
| - * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} |
| - * has skipped. */ |
| - public int getPayloadLength() { |
| - return lastPayloadLength; |
| - } |
| - |
| - @Override |
| - protected void seekChild(int level) throws IOException { |
| - super.seekChild(level); |
| - freqPointer[level] = lastFreqPointer; |
| - proxPointer[level] = lastProxPointer; |
| - payloadLength[level] = lastPayloadLength; |
| - } |
| - |
| - @Override |
| - protected void setLastSkipData(int level) { |
| - super.setLastSkipData(level); |
| - lastFreqPointer = freqPointer[level]; |
| - lastProxPointer = proxPointer[level]; |
| - lastPayloadLength = payloadLength[level]; |
| - } |
| - |
| - @Override |
| - protected int readSkipData(int level, IndexInput skipStream) throws IOException { |
| - int delta; |
| - if (currentFieldStoresPayloads) { |
| - // the current field stores payloads. |
| - // if the doc delta is odd then we have |
| - // to read the current payload length |
| - // because it differs from the length of the |
| - // previous payload |
| - delta = skipStream.readVInt(); |
| - if ((delta & 1) != 0) { |
| - payloadLength[level] = skipStream.readVInt(); |
| - } |
| - delta >>>= 1; |
| - } else { |
| - delta = skipStream.readVInt(); |
| - } |
| - |
| - freqPointer[level] += skipStream.readVInt(); |
| - proxPointer[level] += skipStream.readVInt(); |
| - |
| - return delta; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java (working copy) |
| @@ -1,80 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.TermVectorsFormat; |
| -import org.apache.lucene.codecs.TermVectorsReader; |
| -import org.apache.lucene.codecs.TermVectorsWriter; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.store.CompoundFileDirectory; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| - |
| -/** |
| - * Lucene3x ReadOnly TermVectorsFormat implementation |
| - * @deprecated (4.0) This is only used to read indexes created |
| - * before 4.0. |
| - * @lucene.experimental |
| - */ |
| -@Deprecated |
| -class Lucene3xTermVectorsFormat extends TermVectorsFormat { |
| - |
| - @Override |
| - public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { |
| - final String fileName = IndexFileNames.segmentFileName(Lucene3xSegmentInfoFormat.getDocStoreSegment(segmentInfo), "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION); |
| - |
| - // Unfortunately, for 3.x indices, each segment's |
| - // FieldInfos can lie about hasVectors (claim it's true |
| - // when really it's false).... so we have to carefully |
| - // check if the files really exist before trying to open |
| - // them (4.x has fixed this): |
| - final boolean exists; |
| - if (Lucene3xSegmentInfoFormat.getDocStoreOffset(segmentInfo) != -1 && Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(segmentInfo)) { |
| - String cfxFileName = IndexFileNames.segmentFileName(Lucene3xSegmentInfoFormat.getDocStoreSegment(segmentInfo), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION); |
| - if (segmentInfo.dir.fileExists(cfxFileName)) { |
| - Directory cfsDir = new CompoundFileDirectory(segmentInfo.dir, cfxFileName, context, false); |
| - try { |
| - exists = cfsDir.fileExists(fileName); |
| - } finally { |
| - cfsDir.close(); |
| - } |
| - } else { |
| - exists = false; |
| - } |
| - } else { |
| - exists = directory.fileExists(fileName); |
| - } |
| - |
| - if (!exists) { |
| - // 3x's FieldInfos sometimes lies and claims a segment |
| - // has vectors when it doesn't: |
| - return null; |
| - } else { |
| - return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context); |
| - } |
| - } |
| - |
| - @Override |
| - public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoFormat.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoFormat.java (working copy) |
| @@ -1,89 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.codecs.SegmentInfoFormat; |
| -import org.apache.lucene.codecs.SegmentInfoReader; |
| -import org.apache.lucene.codecs.SegmentInfoWriter; |
| -import org.apache.lucene.index.SegmentInfo; |
| - |
| -/** |
| - * Lucene3x ReadOnly SegmentInfoFormat implementation |
| - * @deprecated (4.0) This is only used to read indexes created |
| - * before 4.0. |
| - * @lucene.experimental |
| - */ |
| -@Deprecated |
| -public class Lucene3xSegmentInfoFormat extends SegmentInfoFormat { |
| - private final SegmentInfoReader reader = new Lucene3xSegmentInfoReader(); |
| - |
| - /** This format adds optional per-segment String |
| - * diagnostics storage, and switches userData to Map */ |
| - public static final int FORMAT_DIAGNOSTICS = -9; |
| - |
| - /** Each segment records whether it has term vectors */ |
| - public static final int FORMAT_HAS_VECTORS = -10; |
| - |
| - /** Each segment records the Lucene version that created it. */ |
| - public static final int FORMAT_3_1 = -11; |
| - |
| - /** Extension used for saving each SegmentInfo, once a 3.x |
| - * index is first committed to with 4.0. */ |
| - public static final String UPGRADED_SI_EXTENSION = "si"; |
| - public static final String UPGRADED_SI_CODEC_NAME = "Lucene3xSegmentInfo"; |
| - public static final int UPGRADED_SI_VERSION_START = 0; |
| - public static final int UPGRADED_SI_VERSION_CURRENT = UPGRADED_SI_VERSION_START; |
| - |
| - @Override |
| - public SegmentInfoReader getSegmentInfosReader() { |
| - return reader; |
| - } |
| - |
| - @Override |
| - public SegmentInfoWriter getSegmentInfosWriter() { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| - |
| - // only for backwards compat |
| - public static final String DS_OFFSET_KEY = Lucene3xSegmentInfoFormat.class.getSimpleName() + ".dsoffset"; |
| - public static final String DS_NAME_KEY = Lucene3xSegmentInfoFormat.class.getSimpleName() + ".dsname"; |
| - public static final String DS_COMPOUND_KEY = Lucene3xSegmentInfoFormat.class.getSimpleName() + ".dscompound"; |
| - public static final String NORMGEN_KEY = Lucene3xSegmentInfoFormat.class.getSimpleName() + ".normgen"; |
| - public static final String NORMGEN_PREFIX = Lucene3xSegmentInfoFormat.class.getSimpleName() + ".normfield"; |
| - |
| - /** |
| - * @return if this segment shares stored fields & vectors, this |
| - * offset is where in that file this segment's docs begin |
| - */ |
| - public static int getDocStoreOffset(SegmentInfo si) { |
| - String v = si.getAttribute(DS_OFFSET_KEY); |
| - return v == null ? -1 : Integer.parseInt(v); |
| - } |
| - |
| - /** @return name used to derive fields/vectors file we share with other segments */ |
| - public static String getDocStoreSegment(SegmentInfo si) { |
| - String v = si.getAttribute(DS_NAME_KEY); |
| - return v == null ? si.name : v; |
| - } |
| - |
| - /** @return whether doc store files are stored in compound file (*.cfx) */ |
| - public static boolean getDocStoreIsCompoundFile(SegmentInfo si) { |
| - String v = si.getAttribute(DS_COMPOUND_KEY); |
| - return v == null ? false : Boolean.parseBoolean(v); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermBuffer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermBuffer.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermBuffer.java (working copy) |
| @@ -1,128 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Comparator; |
| - |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.FieldInfos; |
| - |
| -/** |
| - * @lucene.experimental |
| - * @deprecated (4.0) |
| - */ |
| -@Deprecated |
| -final class TermBuffer implements Cloneable { |
| - |
| - private String field; |
| - private Term term; // cached |
| - |
| - private BytesRef bytes = new BytesRef(10); |
| - |
| - // Cannot be -1 since (strangely) we write that |
| - // fieldNumber into index for first indexed term: |
| - private int currentFieldNumber = -2; |
| - |
| - private static final Comparator<BytesRef> utf8AsUTF16Comparator = BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - |
| - int newSuffixStart; // only valid right after .read is called |
| - |
| - public int compareTo(TermBuffer other) { |
| - if (field == other.field) // fields are interned |
| - // (only by PreFlex codec) |
| - return utf8AsUTF16Comparator.compare(bytes, other.bytes); |
| - else |
| - return field.compareTo(other.field); |
| - } |
| - |
| - public void read(IndexInput input, FieldInfos fieldInfos) |
| - throws IOException { |
| - this.term = null; // invalidate cache |
| - newSuffixStart = input.readVInt(); |
| - int length = input.readVInt(); |
| - int totalLength = newSuffixStart + length; |
| - if (bytes.bytes.length < totalLength) { |
| - bytes.grow(totalLength); |
| - } |
| - bytes.length = totalLength; |
| - input.readBytes(bytes.bytes, newSuffixStart, length); |
| - final int fieldNumber = input.readVInt(); |
| - if (fieldNumber != currentFieldNumber) { |
| - currentFieldNumber = fieldNumber; |
| - // NOTE: too much sneakiness here, seriously this is a negative vint?! |
| - if (currentFieldNumber == -1) { |
| - field = ""; |
| - } else { |
| - assert fieldInfos.fieldInfo(currentFieldNumber) != null : currentFieldNumber; |
| - field = fieldInfos.fieldInfo(currentFieldNumber).name.intern(); |
| - } |
| - } else { |
| - assert field.equals(fieldInfos.fieldInfo(fieldNumber).name) : "currentFieldNumber=" + currentFieldNumber + " field=" + field + " vs " + fieldInfos.fieldInfo(fieldNumber) == null ? "null" : fieldInfos.fieldInfo(fieldNumber).name; |
| - } |
| - } |
| - |
| - public void set(Term term) { |
| - if (term == null) { |
| - reset(); |
| - return; |
| - } |
| - bytes.copyBytes(term.bytes()); |
| - field = term.field().intern(); |
| - currentFieldNumber = -1; |
| - this.term = term; |
| - } |
| - |
| - public void set(TermBuffer other) { |
| - field = other.field; |
| - currentFieldNumber = other.currentFieldNumber; |
| - // dangerous to copy Term over, since the underlying |
| - // BytesRef could subsequently be modified: |
| - term = null; |
| - bytes.copyBytes(other.bytes); |
| - } |
| - |
| - public void reset() { |
| - field = null; |
| - term = null; |
| - currentFieldNumber= -1; |
| - } |
| - |
| - public Term toTerm() { |
| - if (field == null) // unset |
| - return null; |
| - |
| - if (term == null) { |
| - term = new Term(field, BytesRef.deepCopyOf(bytes)); |
| - } |
| - |
| - return term; |
| - } |
| - |
| - @Override |
| - protected TermBuffer clone() { |
| - TermBuffer clone = null; |
| - try { |
| - clone = (TermBuffer)super.clone(); |
| - } catch (CloneNotSupportedException e) {} |
| - clone.bytes = BytesRef.deepCopyOf(bytes); |
| - return clone; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java (working copy) |
| @@ -1,219 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.store.IndexInput; |
| - |
| -/** |
| - * @lucene.experimental |
| - * @deprecated (4.0) |
| - */ |
| -@Deprecated |
| -final class SegmentTermPositions |
| -extends SegmentTermDocs { |
| - private IndexInput proxStream; |
| - private IndexInput proxStreamOrig; |
| - private int proxCount; |
| - private int position; |
| - |
| - // the current payload length |
| - private int payloadLength; |
| - // indicates whether the payload of the current position has |
| - // been read from the proxStream yet |
| - private boolean needToLoadPayload; |
| - |
| - // these variables are being used to remember information |
| - // for a lazy skip |
| - private long lazySkipPointer = -1; |
| - private int lazySkipProxCount = 0; |
| - |
| - /* |
| - SegmentTermPositions(SegmentReader p) { |
| - super(p); |
| - this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time |
| - } |
| - */ |
| - |
| - public SegmentTermPositions(IndexInput freqStream, IndexInput proxStream, TermInfosReader tis, FieldInfos fieldInfos) { |
| - super(freqStream, tis, fieldInfos); |
| - this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time |
| - } |
| - |
| - @Override |
| - final void seek(TermInfo ti, Term term) throws IOException { |
| - super.seek(ti, term); |
| - if (ti != null) |
| - lazySkipPointer = ti.proxPointer; |
| - |
| - lazySkipProxCount = 0; |
| - proxCount = 0; |
| - payloadLength = 0; |
| - needToLoadPayload = false; |
| - } |
| - |
| - @Override |
| - public final void close() throws IOException { |
| - super.close(); |
| - if (proxStream != null) proxStream.close(); |
| - } |
| - |
| - public final int nextPosition() throws IOException { |
| - if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) |
| - // This field does not store positions, payloads |
| - return 0; |
| - // perform lazy skips if necessary |
| - lazySkip(); |
| - proxCount--; |
| - return position += readDeltaPosition(); |
| - } |
| - |
| - private final int readDeltaPosition() throws IOException { |
| - int delta = proxStream.readVInt(); |
| - if (currentFieldStoresPayloads) { |
| - // if the current field stores payloads then |
| - // the position delta is shifted one bit to the left. |
| - // if the LSB is set, then we have to read the current |
| - // payload length |
| - if ((delta & 1) != 0) { |
| - payloadLength = proxStream.readVInt(); |
| - } |
| - delta >>>= 1; |
| - needToLoadPayload = true; |
| - } |
| - return delta; |
| - } |
| - |
| - @Override |
| - protected final void skippingDoc() throws IOException { |
| - // we remember to skip a document lazily |
| - lazySkipProxCount += freq; |
| - } |
| - |
| - @Override |
| - public final boolean next() throws IOException { |
| - // we remember to skip the remaining positions of the current |
| - // document lazily |
| - lazySkipProxCount += proxCount; |
| - |
| - if (super.next()) { // run super |
| - proxCount = freq; // note frequency |
| - position = 0; // reset position |
| - return true; |
| - } |
| - return false; |
| - } |
| - |
| - @Override |
| - public final int read(final int[] docs, final int[] freqs) { |
| - throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); |
| - } |
| - |
| - |
| - /** Called by super.skipTo(). */ |
| - @Override |
| - protected void skipProx(long proxPointer, int payloadLength) throws IOException { |
| - // we save the pointer, we might have to skip there lazily |
| - lazySkipPointer = proxPointer; |
| - lazySkipProxCount = 0; |
| - proxCount = 0; |
| - this.payloadLength = payloadLength; |
| - needToLoadPayload = false; |
| - } |
| - |
| - private void skipPositions(int n) throws IOException { |
| - assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| - for (int f = n; f > 0; f--) { // skip unread positions |
| - readDeltaPosition(); |
| - skipPayload(); |
| - } |
| - } |
| - |
| - private void skipPayload() throws IOException { |
| - if (needToLoadPayload && payloadLength > 0) { |
| - proxStream.seek(proxStream.getFilePointer() + payloadLength); |
| - } |
| - needToLoadPayload = false; |
| - } |
| - |
| - // It is not always necessary to move the prox pointer |
| - // to a new document after the freq pointer has been moved. |
| - // Consider for example a phrase query with two terms: |
| - // the freq pointer for term 1 has to move to document x |
| - // to answer the question if the term occurs in that document. But |
| - // only if term 2 also matches document x, the positions have to be |
| - // read to figure out if term 1 and term 2 appear next |
| - // to each other in document x and thus satisfy the query. |
| - // So we move the prox pointer lazily to the document |
| - // as soon as positions are requested. |
| - private void lazySkip() throws IOException { |
| - if (proxStream == null) { |
| - // clone lazily |
| - proxStream = (IndexInput)proxStreamOrig.clone(); |
| - } |
| - |
| - // we might have to skip the current payload |
| - // if it was not read yet |
| - skipPayload(); |
| - |
| - if (lazySkipPointer != -1) { |
| - proxStream.seek(lazySkipPointer); |
| - lazySkipPointer = -1; |
| - } |
| - |
| - if (lazySkipProxCount != 0) { |
| - skipPositions(lazySkipProxCount); |
| - lazySkipProxCount = 0; |
| - } |
| - } |
| - |
| - public int getPayloadLength() { |
| - return payloadLength; |
| - } |
| - |
| - public byte[] getPayload(byte[] data, int offset) throws IOException { |
| - if (!needToLoadPayload) { |
| - throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); |
| - } |
| - |
| - // read payloads lazily |
| - byte[] retArray; |
| - int retOffset; |
| - if (data == null || data.length - offset < payloadLength) { |
| - // the array is too small to store the payload data, |
| - // so we allocate a new one |
| - retArray = new byte[payloadLength]; |
| - retOffset = 0; |
| - } else { |
| - retArray = data; |
| - retOffset = offset; |
| - } |
| - proxStream.readBytes(retArray, retOffset, payloadLength); |
| - needToLoadPayload = false; |
| - return retArray; |
| - } |
| - |
| - public boolean isPayloadAvailable() { |
| - return needToLoadPayload && payloadLength > 0; |
| - } |
| - |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java (working copy) |
| @@ -1,45 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.FieldInfosFormat; |
| -import org.apache.lucene.codecs.FieldInfosReader; |
| -import org.apache.lucene.codecs.FieldInfosWriter; |
| - |
| -/** |
| - * Lucene3x ReadOnly FieldInfosFromat implementation |
| - * @deprecated (4.0) This is only used to read indexes created |
| - * before 4.0. |
| - * @lucene.experimental |
| - */ |
| -@Deprecated |
| -class Lucene3xFieldInfosFormat extends FieldInfosFormat { |
| - private final FieldInfosReader reader = new Lucene3xFieldInfosReader(); |
| - |
| - @Override |
| - public FieldInfosReader getFieldInfosReader() throws IOException { |
| - return reader; |
| - } |
| - |
| - @Override |
| - public FieldInfosWriter getFieldInfosWriter() throws IOException { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java (working copy) |
| @@ -1,229 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.Bits; |
| - |
| -/** @deprecated (4.0) |
| - * @lucene.experimental */ |
| -@Deprecated |
| -class SegmentTermDocs { |
| - //protected SegmentReader parent; |
| - private final FieldInfos fieldInfos; |
| - private final TermInfosReader tis; |
| - protected Bits liveDocs; |
| - protected IndexInput freqStream; |
| - protected int count; |
| - protected int df; |
| - int doc = 0; |
| - int freq; |
| - |
| - private int skipInterval; |
| - private int maxSkipLevels; |
| - private Lucene3xSkipListReader skipListReader; |
| - |
| - private long freqBasePointer; |
| - private long proxBasePointer; |
| - |
| - private long skipPointer; |
| - private boolean haveSkipped; |
| - |
| - protected boolean currentFieldStoresPayloads; |
| - protected IndexOptions indexOptions; |
| - |
| - public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) { |
| - this.freqStream = (IndexInput) freqStream.clone(); |
| - this.tis = tis; |
| - this.fieldInfos = fieldInfos; |
| - skipInterval = tis.getSkipInterval(); |
| - maxSkipLevels = tis.getMaxSkipLevels(); |
| - } |
| - |
| - public void seek(Term term) throws IOException { |
| - TermInfo ti = tis.get(term); |
| - seek(ti, term); |
| - } |
| - |
| - public void setLiveDocs(Bits liveDocs) { |
| - this.liveDocs = liveDocs; |
| - } |
| - |
| - public void seek(SegmentTermEnum segmentTermEnum) throws IOException { |
| - TermInfo ti; |
| - Term term; |
| - |
| - // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs |
| - if (segmentTermEnum.fieldInfos == fieldInfos) { // optimized case |
| - term = segmentTermEnum.term(); |
| - ti = segmentTermEnum.termInfo(); |
| - } else { // punt case |
| - term = segmentTermEnum.term(); |
| - ti = tis.get(term); |
| - } |
| - |
| - seek(ti, term); |
| - } |
| - |
| - void seek(TermInfo ti, Term term) throws IOException { |
| - count = 0; |
| - FieldInfo fi = fieldInfos.fieldInfo(term.field()); |
| - this.indexOptions = (fi != null) ? fi.getIndexOptions() : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| - currentFieldStoresPayloads = (fi != null) ? fi.hasPayloads() : false; |
| - if (ti == null) { |
| - df = 0; |
| - } else { |
| - df = ti.docFreq; |
| - doc = 0; |
| - freqBasePointer = ti.freqPointer; |
| - proxBasePointer = ti.proxPointer; |
| - skipPointer = freqBasePointer + ti.skipOffset; |
| - freqStream.seek(freqBasePointer); |
| - haveSkipped = false; |
| - } |
| - } |
| - |
| - public void close() throws IOException { |
| - freqStream.close(); |
| - if (skipListReader != null) |
| - skipListReader.close(); |
| - } |
| - |
| - public final int doc() { return doc; } |
| - public final int freq() { |
| - assert indexOptions != IndexOptions.DOCS_ONLY; |
| - return freq; |
| - } |
| - |
| - protected void skippingDoc() throws IOException { |
| - } |
| - |
| - public boolean next() throws IOException { |
| - while (true) { |
| - if (count == df) |
| - return false; |
| - final int docCode = freqStream.readVInt(); |
| - |
| - if (indexOptions == IndexOptions.DOCS_ONLY) { |
| - doc += docCode; |
| - } else { |
| - doc += docCode >>> 1; // shift off low bit |
| - if ((docCode & 1) != 0) // if low bit is set |
| - freq = 1; // freq is one |
| - else { |
| - freq = freqStream.readVInt(); // else read freq |
| - assert freq != 1; |
| - } |
| - } |
| - |
| - count++; |
| - |
| - if (liveDocs == null || liveDocs.get(doc)) { |
| - break; |
| - } |
| - skippingDoc(); |
| - } |
| - return true; |
| - } |
| - |
| - /** Optimized implementation. */ |
| - public int read(final int[] docs, final int[] freqs) |
| - throws IOException { |
| - final int length = docs.length; |
| - if (indexOptions == IndexOptions.DOCS_ONLY) { |
| - return readNoTf(docs, freqs, length); |
| - } else { |
| - int i = 0; |
| - while (i < length && count < df) { |
| - // manually inlined call to next() for speed |
| - final int docCode = freqStream.readVInt(); |
| - doc += docCode >>> 1; // shift off low bit |
| - if ((docCode & 1) != 0) // if low bit is set |
| - freq = 1; // freq is one |
| - else |
| - freq = freqStream.readVInt(); // else read freq |
| - count++; |
| - |
| - if (liveDocs == null || liveDocs.get(doc)) { |
| - docs[i] = doc; |
| - freqs[i] = freq; |
| - ++i; |
| - } |
| - } |
| - return i; |
| - } |
| - } |
| - |
| - private final int readNoTf(final int[] docs, final int[] freqs, final int length) throws IOException { |
| - int i = 0; |
| - while (i < length && count < df) { |
| - // manually inlined call to next() for speed |
| - doc += freqStream.readVInt(); |
| - count++; |
| - |
| - if (liveDocs == null || liveDocs.get(doc)) { |
| - docs[i] = doc; |
| - // Hardware freq to 1 when term freqs were not |
| - // stored in the index |
| - freqs[i] = 1; |
| - ++i; |
| - } |
| - } |
| - return i; |
| - } |
| - |
| - |
| - /** Overridden by SegmentTermPositions to skip in prox stream. */ |
| - protected void skipProx(long proxPointer, int payloadLength) throws IOException {} |
| - |
| - /** Optimized implementation. */ |
| - public boolean skipTo(int target) throws IOException { |
| - // don't skip if the target is close (within skipInterval docs away) |
| - if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case |
| - if (skipListReader == null) |
| - skipListReader = new Lucene3xSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone |
| - |
| - if (!haveSkipped) { // lazily initialize skip stream |
| - skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); |
| - haveSkipped = true; |
| - } |
| - |
| - int newCount = skipListReader.skipTo(target); |
| - if (newCount > count) { |
| - freqStream.seek(skipListReader.getFreqPointer()); |
| - skipProx(skipListReader.getProxPointer(), skipListReader.getPayloadLength()); |
| - |
| - doc = skipListReader.getDoc(); |
| - count = newCount; |
| - } |
| - } |
| - |
| - // done skipping, now just scan |
| - do { |
| - if (!next()) |
| - return false; |
| - } while (target > doc); |
| - return true; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java (working copy) |
| @@ -1,701 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Arrays; |
| -import java.util.Comparator; |
| -import java.util.HashMap; |
| -import java.util.Map; |
| - |
| -import org.apache.lucene.codecs.TermVectorsReader; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.DocsAndPositionsEnum; |
| -import org.apache.lucene.index.DocsEnum; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.Fields; |
| -import org.apache.lucene.index.FieldsEnum; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexFormatTooNewException; |
| -import org.apache.lucene.index.IndexFormatTooOldException; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.Terms; |
| -import org.apache.lucene.index.TermsEnum; |
| -import org.apache.lucene.store.CompoundFileDirectory; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -/** @deprecated */ |
| -@Deprecated |
| -class Lucene3xTermVectorsReader extends TermVectorsReader { |
| - |
| - // NOTE: if you make a new format, it must be larger than |
| - // the current format |
| - |
| - // Changed strings to UTF8 with length-in-bytes not length-in-chars |
| - static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4; |
| - |
| - // NOTE: always change this if you switch to a new format! |
| - // whenever you add a new format, make it 1 larger (positive version logic)! |
| - public static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES; |
| - |
| - // when removing support for old versions, leave the last supported version here |
| - public static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES; |
| - |
| - //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file |
| - static final int FORMAT_SIZE = 4; |
| - |
| - public static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1; |
| - |
| - public static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2; |
| - |
| - /** Extension of vectors fields file */ |
| - public static final String VECTORS_FIELDS_EXTENSION = "tvf"; |
| - |
| - /** Extension of vectors documents file */ |
| - public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd"; |
| - |
| - /** Extension of vectors index file */ |
| - public static final String VECTORS_INDEX_EXTENSION = "tvx"; |
| - |
| - private FieldInfos fieldInfos; |
| - |
| - private IndexInput tvx; |
| - private IndexInput tvd; |
| - private IndexInput tvf; |
| - private int size; |
| - private int numTotalDocs; |
| - |
| - // The docID offset where our docs begin in the index |
| - // file. This will be 0 if we have our own private file. |
| - private int docStoreOffset; |
| - |
| - // when we are inside a compound share doc store (CFX), |
| - // (lucene 3.0 indexes only), we privately open our own fd. |
| - // TODO: if we are worried, maybe we could eliminate the |
| - // extra fd somehow when you also have vectors... |
| - private final CompoundFileDirectory storeCFSReader; |
| - |
| - private final int format; |
| - |
| - // used by clone |
| - Lucene3xTermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int docStoreOffset, int format) { |
| - this.fieldInfos = fieldInfos; |
| - this.tvx = tvx; |
| - this.tvd = tvd; |
| - this.tvf = tvf; |
| - this.size = size; |
| - this.numTotalDocs = numTotalDocs; |
| - this.docStoreOffset = docStoreOffset; |
| - this.format = format; |
| - this.storeCFSReader = null; |
| - } |
| - |
| - public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) |
| - throws CorruptIndexException, IOException { |
| - final String segment = Lucene3xSegmentInfoFormat.getDocStoreSegment(si); |
| - final int docStoreOffset = Lucene3xSegmentInfoFormat.getDocStoreOffset(si); |
| - final int size = si.getDocCount(); |
| - |
| - boolean success = false; |
| - |
| - try { |
| - if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(si)) { |
| - d = storeCFSReader = new CompoundFileDirectory(si.dir, |
| - IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); |
| - } else { |
| - storeCFSReader = null; |
| - } |
| - String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION); |
| - tvx = d.openInput(idxName, context); |
| - format = checkValidFormat(tvx); |
| - String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); |
| - tvd = d.openInput(fn, context); |
| - final int tvdFormat = checkValidFormat(tvd); |
| - fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); |
| - tvf = d.openInput(fn, context); |
| - final int tvfFormat = checkValidFormat(tvf); |
| - |
| - assert format == tvdFormat; |
| - assert format == tvfFormat; |
| - |
| - numTotalDocs = (int) (tvx.length() >> 4); |
| - |
| - if (-1 == docStoreOffset) { |
| - this.docStoreOffset = 0; |
| - this.size = numTotalDocs; |
| - assert size == 0 || numTotalDocs == size; |
| - } else { |
| - this.docStoreOffset = docStoreOffset; |
| - this.size = size; |
| - // Verify the file is long enough to hold all of our |
| - // docs |
| - assert numTotalDocs >= size + docStoreOffset: "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset; |
| - } |
| - |
| - this.fieldInfos = fieldInfos; |
| - success = true; |
| - } finally { |
| - // With lock-less commits, it's entirely possible (and |
| - // fine) to hit a FileNotFound exception above. In |
| - // this case, we want to explicitly close any subset |
| - // of things that were opened so that we don't have to |
| - // wait for a GC to do so. |
| - if (!success) { |
| - close(); |
| - } |
| - } |
| - } |
| - |
| - // Not private to avoid synthetic access$NNN methods |
| - void seekTvx(final int docNum) throws IOException { |
| - tvx.seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE); |
| - } |
| - |
| - private int checkValidFormat(IndexInput in) throws CorruptIndexException, IOException |
| - { |
| - int format = in.readInt(); |
| - if (format < FORMAT_MINIMUM) |
| - throw new IndexFormatTooOldException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - if (format > FORMAT_CURRENT) |
| - throw new IndexFormatTooNewException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - return format; |
| - } |
| - |
| - public void close() throws IOException { |
| - IOUtils.close(tvx, tvd, tvf, storeCFSReader); |
| - } |
| - |
| - /** |
| - * |
| - * @return The number of documents in the reader |
| - */ |
| - int size() { |
| - return size; |
| - } |
| - |
| - private class TVFields extends Fields { |
| - private final int[] fieldNumbers; |
| - private final long[] fieldFPs; |
| - private final Map<Integer,Integer> fieldNumberToIndex = new HashMap<Integer,Integer>(); |
| - |
| - public TVFields(int docID) throws IOException { |
| - seekTvx(docID); |
| - tvd.seek(tvx.readLong()); |
| - |
| - final int fieldCount = tvd.readVInt(); |
| - assert fieldCount >= 0; |
| - if (fieldCount != 0) { |
| - fieldNumbers = new int[fieldCount]; |
| - fieldFPs = new long[fieldCount]; |
| - for(int fieldUpto=0;fieldUpto<fieldCount;fieldUpto++) { |
| - final int fieldNumber = tvd.readVInt(); |
| - fieldNumbers[fieldUpto] = fieldNumber; |
| - fieldNumberToIndex.put(fieldNumber, fieldUpto); |
| - } |
| - |
| - long position = tvx.readLong(); |
| - fieldFPs[0] = position; |
| - for(int fieldUpto=1;fieldUpto<fieldCount;fieldUpto++) { |
| - position += tvd.readVLong(); |
| - fieldFPs[fieldUpto] = position; |
| - } |
| - } else { |
| - // TODO: we can improve writer here, eg write 0 into |
| - // tvx file, so we know on first read from tvx that |
| - // this doc has no TVs |
| - fieldNumbers = null; |
| - fieldFPs = null; |
| - } |
| - } |
| - |
| - @Override |
| - public FieldsEnum iterator() throws IOException { |
| - |
| - return new FieldsEnum() { |
| - private int fieldUpto; |
| - |
| - @Override |
| - public String next() throws IOException { |
| - if (fieldNumbers != null && fieldUpto < fieldNumbers.length) { |
| - return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name; |
| - } else { |
| - return null; |
| - } |
| - } |
| - |
| - @Override |
| - public Terms terms() throws IOException { |
| - return TVFields.this.terms(fieldInfos.fieldInfo(fieldNumbers[fieldUpto-1]).name); |
| - } |
| - }; |
| - } |
| - |
| - @Override |
| - public Terms terms(String field) throws IOException { |
| - final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); |
| - if (fieldInfo == null) { |
| - // No such field |
| - return null; |
| - } |
| - |
| - final Integer fieldIndex = fieldNumberToIndex.get(fieldInfo.number); |
| - if (fieldIndex == null) { |
| - // Term vectors were not indexed for this field |
| - return null; |
| - } |
| - |
| - return new TVTerms(fieldFPs[fieldIndex]); |
| - } |
| - |
| - @Override |
| - public int size() { |
| - if (fieldNumbers == null) { |
| - return 0; |
| - } else { |
| - return fieldNumbers.length; |
| - } |
| - } |
| - } |
| - |
| - private class TVTerms extends Terms { |
| - private final int numTerms; |
| - private final long tvfFPStart; |
| - private final boolean unicodeSortOrder; |
| - |
| - public TVTerms(long tvfFP) throws IOException { |
| - tvf.seek(tvfFP); |
| - numTerms = tvf.readVInt(); |
| - tvfFPStart = tvf.getFilePointer(); |
| - unicodeSortOrder = sortTermsByUnicode(); |
| - } |
| - |
| - @Override |
| - public TermsEnum iterator(TermsEnum reuse) throws IOException { |
| - TVTermsEnum termsEnum; |
| - if (reuse instanceof TVTermsEnum) { |
| - termsEnum = (TVTermsEnum) reuse; |
| - if (!termsEnum.canReuse(tvf)) { |
| - termsEnum = new TVTermsEnum(); |
| - } |
| - } else { |
| - termsEnum = new TVTermsEnum(); |
| - } |
| - termsEnum.reset(numTerms, tvfFPStart, unicodeSortOrder); |
| - return termsEnum; |
| - } |
| - |
| - @Override |
| - public long size() { |
| - return numTerms; |
| - } |
| - |
| - @Override |
| - public long getSumTotalTermFreq() { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public long getSumDocFreq() { |
| - // Every term occurs in just one doc: |
| - return numTerms; |
| - } |
| - |
| - @Override |
| - public int getDocCount() { |
| - return 1; |
| - } |
| - |
| - @Override |
| - public Comparator<BytesRef> getComparator() { |
| - if (unicodeSortOrder) { |
| - return BytesRef.getUTF8SortedAsUnicodeComparator(); |
| - } else { |
| - return BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - } |
| - } |
| - } |
| - |
| - static class TermAndPostings { |
| - BytesRef term; |
| - int freq; |
| - int[] positions; |
| - int[] startOffsets; |
| - int[] endOffsets; |
| - } |
| - |
| - private class TVTermsEnum extends TermsEnum { |
| - private boolean unicodeSortOrder; |
| - private final IndexInput origTVF; |
| - private final IndexInput tvf; |
| - private int numTerms; |
| - private int currentTerm; |
| - private boolean storePositions; |
| - private boolean storeOffsets; |
| - |
| - private TermAndPostings[] termAndPostings; |
| - |
| - // NOTE: tvf is pre-positioned by caller |
| - public TVTermsEnum() throws IOException { |
| - this.origTVF = Lucene3xTermVectorsReader.this.tvf; |
| - tvf = (IndexInput) origTVF.clone(); |
| - } |
| - |
| - public boolean canReuse(IndexInput tvf) { |
| - return tvf == origTVF; |
| - } |
| - |
| - public void reset(int numTerms, long tvfFPStart, boolean unicodeSortOrder) throws IOException { |
| - this.numTerms = numTerms; |
| - currentTerm = -1; |
| - tvf.seek(tvfFPStart); |
| - final byte bits = tvf.readByte(); |
| - storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; |
| - storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; |
| - this.unicodeSortOrder = unicodeSortOrder; |
| - readVectors(); |
| - if (unicodeSortOrder) { |
| - Arrays.sort(termAndPostings, new Comparator<TermAndPostings>() { |
| - public int compare(TermAndPostings left, TermAndPostings right) { |
| - return left.term.compareTo(right.term); |
| - } |
| - }); |
| - } |
| - } |
| - |
| - private void readVectors() throws IOException { |
| - termAndPostings = new TermAndPostings[numTerms]; |
| - BytesRef lastTerm = new BytesRef(); |
| - for (int i = 0; i < numTerms; i++) { |
| - TermAndPostings t = new TermAndPostings(); |
| - BytesRef term = new BytesRef(); |
| - term.copyBytes(lastTerm); |
| - final int start = tvf.readVInt(); |
| - final int deltaLen = tvf.readVInt(); |
| - term.length = start + deltaLen; |
| - term.grow(term.length); |
| - tvf.readBytes(term.bytes, start, deltaLen); |
| - t.term = term; |
| - int freq = tvf.readVInt(); |
| - t.freq = freq; |
| - |
| - if (storePositions) { |
| - int positions[] = new int[freq]; |
| - int pos = 0; |
| - for(int posUpto=0;posUpto<freq;posUpto++) { |
| - pos += tvf.readVInt(); |
| - positions[posUpto] = pos; |
| - } |
| - t.positions = positions; |
| - } |
| - |
| - if (storeOffsets) { |
| - int startOffsets[] = new int[freq]; |
| - int endOffsets[] = new int[freq]; |
| - int offset = 0; |
| - for(int posUpto=0;posUpto<freq;posUpto++) { |
| - startOffsets[posUpto] = offset + tvf.readVInt(); |
| - offset = endOffsets[posUpto] = startOffsets[posUpto] + tvf.readVInt(); |
| - } |
| - t.startOffsets = startOffsets; |
| - t.endOffsets = endOffsets; |
| - } |
| - lastTerm.copyBytes(term); |
| - termAndPostings[i] = t; |
| - } |
| - } |
| - |
| - // NOTE: slow! (linear scan) |
| - @Override |
| - public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException { |
| - Comparator<BytesRef> comparator = getComparator(); |
| - for (int i = 0; i < numTerms; i++) { |
| - int cmp = comparator.compare(text, termAndPostings[i].term); |
| - if (cmp < 0) { |
| - currentTerm = i; |
| - return SeekStatus.NOT_FOUND; |
| - } else if (cmp == 0) { |
| - currentTerm = i; |
| - return SeekStatus.FOUND; |
| - } |
| - } |
| - currentTerm = termAndPostings.length; |
| - return SeekStatus.END; |
| - } |
| - |
| - @Override |
| - public void seekExact(long ord) { |
| - throw new UnsupportedOperationException(); |
| - } |
| - |
| - @Override |
| - public BytesRef next() throws IOException { |
| - if (++currentTerm >= numTerms) { |
| - return null; |
| - } |
| - return term(); |
| - } |
| - |
| - @Override |
| - public BytesRef term() { |
| - return termAndPostings[currentTerm].term; |
| - } |
| - |
| - @Override |
| - public long ord() { |
| - throw new UnsupportedOperationException(); |
| - } |
| - |
| - @Override |
| - public int docFreq() { |
| - return 1; |
| - } |
| - |
| - @Override |
| - public long totalTermFreq() { |
| - return termAndPostings[currentTerm].freq; |
| - } |
| - |
| - @Override |
| - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs /* ignored */) throws IOException { |
| - TVDocsEnum docsEnum; |
| - if (reuse != null && reuse instanceof TVDocsEnum) { |
| - docsEnum = (TVDocsEnum) reuse; |
| - } else { |
| - docsEnum = new TVDocsEnum(); |
| - } |
| - docsEnum.reset(liveDocs, termAndPostings[currentTerm]); |
| - return docsEnum; |
| - } |
| - |
| - @Override |
| - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { |
| - if (needsOffsets && !storeOffsets) { |
| - return null; |
| - } |
| - |
| - if (!storePositions && !storeOffsets) { |
| - return null; |
| - } |
| - |
| - TVDocsAndPositionsEnum docsAndPositionsEnum; |
| - if (reuse != null && reuse instanceof TVDocsAndPositionsEnum) { |
| - docsAndPositionsEnum = (TVDocsAndPositionsEnum) reuse; |
| - } else { |
| - docsAndPositionsEnum = new TVDocsAndPositionsEnum(); |
| - } |
| - docsAndPositionsEnum.reset(liveDocs, termAndPostings[currentTerm]); |
| - return docsAndPositionsEnum; |
| - } |
| - |
| - @Override |
| - public Comparator<BytesRef> getComparator() { |
| - if (unicodeSortOrder) { |
| - return BytesRef.getUTF8SortedAsUnicodeComparator(); |
| - } else { |
| - return BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - } |
| - } |
| - } |
| - |
| - // NOTE: sort of a silly class, since you can get the |
| - // freq() already by TermsEnum.totalTermFreq |
| - private static class TVDocsEnum extends DocsEnum { |
| - private boolean didNext; |
| - private int doc = -1; |
| - private int freq; |
| - private Bits liveDocs; |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - return freq; |
| - } |
| - |
| - @Override |
| - public int docID() { |
| - return doc; |
| - } |
| - |
| - @Override |
| - public int nextDoc() { |
| - if (!didNext && (liveDocs == null || liveDocs.get(0))) { |
| - didNext = true; |
| - return (doc = 0); |
| - } else { |
| - return (doc = NO_MORE_DOCS); |
| - } |
| - } |
| - |
| - @Override |
| - public int advance(int target) { |
| - if (!didNext && target == 0) { |
| - return nextDoc(); |
| - } else { |
| - return (doc = NO_MORE_DOCS); |
| - } |
| - } |
| - |
| - public void reset(Bits liveDocs, TermAndPostings termAndPostings) { |
| - this.liveDocs = liveDocs; |
| - this.freq = termAndPostings.freq; |
| - this.doc = -1; |
| - didNext = false; |
| - } |
| - } |
| - |
| - private static class TVDocsAndPositionsEnum extends DocsAndPositionsEnum { |
| - private boolean didNext; |
| - private int doc = -1; |
| - private int nextPos; |
| - private Bits liveDocs; |
| - private int[] positions; |
| - private int[] startOffsets; |
| - private int[] endOffsets; |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - if (positions != null) { |
| - return positions.length; |
| - } else { |
| - assert startOffsets != null; |
| - return startOffsets.length; |
| - } |
| - } |
| - |
| - @Override |
| - public int docID() { |
| - return doc; |
| - } |
| - |
| - @Override |
| - public int nextDoc() { |
| - if (!didNext && (liveDocs == null || liveDocs.get(0))) { |
| - didNext = true; |
| - return (doc = 0); |
| - } else { |
| - return (doc = NO_MORE_DOCS); |
| - } |
| - } |
| - |
| - @Override |
| - public int advance(int target) { |
| - if (!didNext && target == 0) { |
| - return nextDoc(); |
| - } else { |
| - return (doc = NO_MORE_DOCS); |
| - } |
| - } |
| - |
| - public void reset(Bits liveDocs, TermAndPostings termAndPostings) { |
| - this.liveDocs = liveDocs; |
| - this.positions = termAndPostings.positions; |
| - this.startOffsets = termAndPostings.startOffsets; |
| - this.endOffsets = termAndPostings.endOffsets; |
| - this.doc = -1; |
| - didNext = false; |
| - nextPos = 0; |
| - } |
| - |
| - @Override |
| - public BytesRef getPayload() { |
| - return null; |
| - } |
| - |
| - @Override |
| - public boolean hasPayload() { |
| - return false; |
| - } |
| - |
| - @Override |
| - public int nextPosition() { |
| - assert (positions != null && nextPos < positions.length) || |
| - startOffsets != null && nextPos < startOffsets.length; |
| - |
| - if (positions != null) { |
| - return positions[nextPos++]; |
| - } else { |
| - nextPos++; |
| - return -1; |
| - } |
| - } |
| - |
| - @Override |
| - public int startOffset() { |
| - assert startOffsets != null; |
| - return startOffsets[nextPos-1]; |
| - } |
| - |
| - @Override |
| - public int endOffset() { |
| - assert endOffsets != null; |
| - return endOffsets[nextPos-1]; |
| - } |
| - } |
| - |
| - @Override |
| - public Fields get(int docID) throws IOException { |
| - if (docID < 0 || docID >= numTotalDocs) { |
| - throw new IllegalArgumentException("doID=" + docID + " is out of bounds [0.." + (numTotalDocs-1) + "]"); |
| - } |
| - if (tvx != null) { |
| - Fields fields = new TVFields(docID); |
| - if (fields.size() == 0) { |
| - // TODO: we can improve writer here, eg write 0 into |
| - // tvx file, so we know on first read from tvx that |
| - // this doc has no TVs |
| - return null; |
| - } else { |
| - return fields; |
| - } |
| - } else { |
| - return null; |
| - } |
| - } |
| - |
| - @Override |
| - public TermVectorsReader clone() { |
| - IndexInput cloneTvx = null; |
| - IndexInput cloneTvd = null; |
| - IndexInput cloneTvf = null; |
| - |
| - // These are null when a TermVectorsReader was created |
| - // on a segment that did not have term vectors saved |
| - if (tvx != null && tvd != null && tvf != null) { |
| - cloneTvx = (IndexInput) tvx.clone(); |
| - cloneTvd = (IndexInput) tvd.clone(); |
| - cloneTvf = (IndexInput) tvf.clone(); |
| - } |
| - |
| - return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format); |
| - } |
| - |
| - // If this returns, we do the surrogates shuffle so that the |
| - // terms are sorted by unicode sort order. This should be |
| - // true when segments are used for "normal" searching; |
| - // it's only false during testing, to create a pre-flex |
| - // index, using the test-only PreFlexRW. |
| - protected boolean sortTermsByUnicode() { |
| - return true; |
| - } |
| -} |
| - |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java (working copy) |
| @@ -1,45 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.StoredFieldsFormat; |
| -import org.apache.lucene.codecs.StoredFieldsReader; |
| -import org.apache.lucene.codecs.StoredFieldsWriter; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| - |
| -/** @deprecated */ |
| -@Deprecated |
| -class Lucene3xStoredFieldsFormat extends StoredFieldsFormat { |
| - |
| - @Override |
| - public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, |
| - FieldInfos fn, IOContext context) throws IOException { |
| - return new Lucene3xStoredFieldsReader(directory, si, fn, context); |
| - } |
| - |
| - @Override |
| - public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, |
| - IOContext context) throws IOException { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java (working copy) |
| @@ -1,272 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Collections; |
| -import java.util.HashMap; |
| -import java.util.HashSet; |
| -import java.util.Map; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.SegmentInfoReader; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexFormatTooNewException; |
| -import org.apache.lucene.index.IndexFormatTooOldException; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.SegmentInfoPerCommit; |
| -import org.apache.lucene.index.SegmentInfos; |
| -import org.apache.lucene.store.CompoundFileDirectory; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.CodecUtil; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -/** |
| - * Lucene 3x implementation of {@link SegmentInfoReader}. |
| - * @lucene.experimental |
| - * @deprecated |
| - */ |
| -@Deprecated |
| -public class Lucene3xSegmentInfoReader extends SegmentInfoReader { |
| - |
| - public static void readLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) throws IOException { |
| - infos.version = input.readLong(); // read version |
| - infos.counter = input.readInt(); // read counter |
| - Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); |
| - for (int i = input.readInt(); i > 0; i--) { // read segmentInfos |
| - SegmentInfoPerCommit siPerCommit = reader.readLegacySegmentInfo(directory, format, input); |
| - SegmentInfo si = siPerCommit.info; |
| - |
| - if (si.getVersion() == null) { |
| - // Could be a 3.0 - try to open the doc stores - if it fails, it's a |
| - // 2.x segment, and an IndexFormatTooOldException will be thrown, |
| - // which is what we want. |
| - Directory dir = directory; |
| - if (Lucene3xSegmentInfoFormat.getDocStoreOffset(si) != -1) { |
| - if (Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(si)) { |
| - dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( |
| - Lucene3xSegmentInfoFormat.getDocStoreSegment(si), "", |
| - Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false); |
| - } |
| - } else if (si.getUseCompoundFile()) { |
| - dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( |
| - si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false); |
| - } |
| - |
| - try { |
| - Lucene3xStoredFieldsReader.checkCodeVersion(dir, Lucene3xSegmentInfoFormat.getDocStoreSegment(si)); |
| - } finally { |
| - // If we opened the directory, close it |
| - if (dir != directory) dir.close(); |
| - } |
| - |
| - // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next |
| - // time the segment is read, its version won't be null and we won't |
| - // need to open FieldsReader every time for each such segment. |
| - si.setVersion("3.0"); |
| - } else if (si.getVersion().equals("2.x")) { |
| - // If it's a 3x index touched by 3.1+ code, then segments record their |
| - // version, whether they are 2.x ones or not. We detect that and throw |
| - // appropriate exception. |
| - throw new IndexFormatTooOldException("segment " + si.name + " in resource " + input, si.getVersion()); |
| - } |
| - infos.add(siPerCommit); |
| - } |
| - |
| - infos.userData = input.readStringStringMap(); |
| - } |
| - |
| - @Override |
| - public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { |
| - // NOTE: this is NOT how 3.x is really written... |
| - String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); |
| - |
| - boolean success = false; |
| - |
| - IndexInput input = directory.openInput(fileName, context); |
| - |
| - try { |
| - SegmentInfo si = readUpgradedSegmentInfo(segmentName, directory, input); |
| - success = true; |
| - return si; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(input); |
| - } else { |
| - input.close(); |
| - } |
| - } |
| - } |
| - |
| - private static void addIfExists(Directory dir, Set<String> files, String fileName) throws IOException { |
| - if (dir.fileExists(fileName)) { |
| - files.add(fileName); |
| - } |
| - } |
| - |
| - /** reads from legacy 3.x segments_N */ |
| - private SegmentInfoPerCommit readLegacySegmentInfo(Directory dir, int format, IndexInput input) throws IOException { |
| - // check that it is a format we can understand |
| - if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { |
| - throw new IndexFormatTooOldException(input, format, |
| - Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); |
| - } |
| - if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { |
| - throw new IndexFormatTooNewException(input, format, |
| - Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); |
| - } |
| - final String version; |
| - if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { |
| - version = input.readString(); |
| - } else { |
| - version = null; |
| - } |
| - |
| - final String name = input.readString(); |
| - |
| - final int docCount = input.readInt(); |
| - final long delGen = input.readLong(); |
| - |
| - final int docStoreOffset = input.readInt(); |
| - final Map<String,String> attributes = new HashMap<String,String>(); |
| - |
| - // parse the docstore stuff and shove it into attributes |
| - final String docStoreSegment; |
| - final boolean docStoreIsCompoundFile; |
| - if (docStoreOffset != -1) { |
| - docStoreSegment = input.readString(); |
| - docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES; |
| - attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset)); |
| - attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment); |
| - attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile)); |
| - } else { |
| - docStoreSegment = name; |
| - docStoreIsCompoundFile = false; |
| - } |
| - |
| - // pre-4.0 indexes write a byte if there is a single norms file |
| - byte b = input.readByte(); |
| - |
| - //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); |
| - |
| - assert 1 == b : "expected 1 but was: "+ b + " format: " + format; |
| - final int numNormGen = input.readInt(); |
| - final Map<Integer,Long> normGen; |
| - if (numNormGen == SegmentInfo.NO) { |
| - normGen = null; |
| - } else { |
| - normGen = new HashMap<Integer, Long>(); |
| - for(int j=0;j<numNormGen;j++) { |
| - normGen.put(j, input.readLong()); |
| - } |
| - } |
| - final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| - |
| - final int delCount = input.readInt(); |
| - assert delCount <= docCount; |
| - |
| - final boolean hasProx = input.readByte() == 1; |
| - |
| - final Map<String,String> diagnostics = input.readStringStringMap(); |
| - |
| - if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { |
| - // NOTE: unused |
| - final int hasVectors = input.readByte(); |
| - } |
| - |
| - // Replicate logic from 3.x's SegmentInfo.files(): |
| - final Set<String> files = new HashSet<String>(); |
| - if (isCompoundFile) { |
| - files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); |
| - } else { |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); |
| - } |
| - |
| - if (docStoreOffset != -1) { |
| - if (docStoreIsCompoundFile) { |
| - files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); |
| - } else { |
| - files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); |
| - files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); |
| - } |
| - } else if (!isCompoundFile) { |
| - files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); |
| - files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); |
| - addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); |
| - } |
| - |
| - // parse the normgen stuff and shove it into attributes |
| - if (normGen != null) { |
| - attributes.put(Lucene3xSegmentInfoFormat.NORMGEN_KEY, Integer.toString(numNormGen)); |
| - for(Map.Entry<Integer,Long> ent : normGen.entrySet()) { |
| - long gen = ent.getValue(); |
| - if (gen >= SegmentInfo.YES) { |
| - // Definitely a separate norm file, with generation: |
| - files.add(IndexFileNames.fileNameFromGeneration(name, "s" + ent.getKey(), gen)); |
| - attributes.put(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.getKey(), Long.toString(gen)); |
| - } else if (gen == SegmentInfo.NO) { |
| - // No separate norm |
| - } else { |
| - // We should have already hit indexformat too old exception |
| - assert false; |
| - } |
| - } |
| - } |
| - |
| - SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, |
| - null, diagnostics, Collections.unmodifiableMap(attributes)); |
| - info.setFiles(files); |
| - |
| - SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen); |
| - return infoPerCommit; |
| - } |
| - |
| - private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException { |
| - CodecUtil.checkHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, |
| - Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, |
| - Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); |
| - final String version = input.readString(); |
| - |
| - final int docCount = input.readInt(); |
| - |
| - final Map<String,String> attributes = input.readStringStringMap(); |
| - |
| - final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| - |
| - final Map<String,String> diagnostics = input.readStringStringMap(); |
| - |
| - final Set<String> files = input.readStringSet(); |
| - |
| - SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, |
| - null, diagnostics, Collections.unmodifiableMap(attributes)); |
| - info.setFiles(files); |
| - return info; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java (working copy) |
| @@ -1,240 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Collections; |
| -import java.util.HashMap; |
| -import java.util.IdentityHashMap; |
| -import java.util.Map; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.PerDocProducer; |
| -import org.apache.lucene.index.DocValues; |
| -import org.apache.lucene.index.DocValues.Source; |
| -import org.apache.lucene.index.DocValues.Type; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| -import org.apache.lucene.util.StringHelper; |
| - |
| -/** |
| - * Reads Lucene 3.x norms format and exposes it via DocValues API |
| - * @lucene.experimental |
| - * @deprecated |
| - */ |
| -@Deprecated |
| -class Lucene3xNormsProducer extends PerDocProducer { |
| - |
| - /** norms header placeholder */ |
| - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; |
| - |
| - /** Extension of norms file */ |
| - static final String NORMS_EXTENSION = "nrm"; |
| - |
| - /** Extension of separate norms file */ |
| - static final String SEPARATE_NORMS_EXTENSION = "s"; |
| - |
| - final Map<String,NormsDocValues> norms = new HashMap<String,NormsDocValues>(); |
| - // any .nrm or .sNN files we have open at any time. |
| - // TODO: just a list, and double-close() separate norms files? |
| - final Set<IndexInput> openFiles = Collections.newSetFromMap(new IdentityHashMap<IndexInput,Boolean>()); |
| - // points to a singleNormFile |
| - IndexInput singleNormStream; |
| - final int maxdoc; |
| - |
| - // note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. |
| - // but we just don't do any seeks or reading yet. |
| - public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context) throws IOException { |
| - Directory separateNormsDir = info.dir; // separate norms are never inside CFS |
| - maxdoc = info.getDocCount(); |
| - String segmentName = info.name; |
| - boolean success = false; |
| - try { |
| - long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now) |
| - for (FieldInfo fi : fields) { |
| - if (fi.hasNorms()) { |
| - String fileName = getNormFilename(info, fi.number); |
| - Directory d = hasSeparateNorms(info, fi.number) ? separateNormsDir : dir; |
| - |
| - // singleNormFile means multiple norms share this file |
| - boolean singleNormFile = IndexFileNames.matchesExtension(fileName, NORMS_EXTENSION); |
| - IndexInput normInput = null; |
| - long normSeek; |
| - |
| - if (singleNormFile) { |
| - normSeek = nextNormSeek; |
| - if (singleNormStream == null) { |
| - singleNormStream = d.openInput(fileName, context); |
| - openFiles.add(singleNormStream); |
| - } |
| - // All norms in the .nrm file can share a single IndexInput since |
| - // they are only used in a synchronized context. |
| - // If this were to change in the future, a clone could be done here. |
| - normInput = singleNormStream; |
| - } else { |
| - normInput = d.openInput(fileName, context); |
| - openFiles.add(normInput); |
| - // if the segment was created in 3.2 or after, we wrote the header for sure, |
| - // and don't need to do the sketchy file size check. otherwise, we check |
| - // if the size is exactly equal to maxDoc to detect a headerless file. |
| - // NOTE: remove this check in Lucene 5.0! |
| - String version = info.getVersion(); |
| - final boolean isUnversioned = |
| - (version == null || StringHelper.getVersionComparator().compare(version, "3.2") < 0) |
| - && normInput.length() == maxdoc; |
| - if (isUnversioned) { |
| - normSeek = 0; |
| - } else { |
| - normSeek = NORMS_HEADER.length; |
| - } |
| - } |
| - NormsDocValues norm = new NormsDocValues(normInput, normSeek); |
| - norms.put(fi.name, norm); |
| - nextNormSeek += maxdoc; // increment also if some norms are separate |
| - } |
| - } |
| - // TODO: change to a real check? see LUCENE-3619 |
| - assert singleNormStream == null || nextNormSeek == singleNormStream.length() : singleNormStream != null ? "len: " + singleNormStream.length() + " expected: " + nextNormSeek : "null"; |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(openFiles); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public DocValues docValues(String field) throws IOException { |
| - return norms.get(field); |
| - } |
| - |
| - @Override |
| - public void close() throws IOException { |
| - try { |
| - IOUtils.close(openFiles); |
| - } finally { |
| - norms.clear(); |
| - openFiles.clear(); |
| - } |
| - } |
| - |
| - private static String getNormFilename(SegmentInfo info, int number) { |
| - if (hasSeparateNorms(info, number)) { |
| - long gen = Long.parseLong(info.getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + number)); |
| - return IndexFileNames.fileNameFromGeneration(info.name, SEPARATE_NORMS_EXTENSION + number, gen); |
| - } else { |
| - // single file for all norms |
| - return IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION); |
| - } |
| - } |
| - |
| - private static boolean hasSeparateNorms(SegmentInfo info, int number) { |
| - String v = info.getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + number); |
| - if (v == null) { |
| - return false; |
| - } else { |
| - assert Long.parseLong(v) != SegmentInfo.NO; |
| - return true; |
| - } |
| - } |
| - |
| - static final class NormSource extends Source { |
| - protected NormSource(byte[] bytes) { |
| - super(Type.FIXED_INTS_8); |
| - this.bytes = bytes; |
| - } |
| - |
| - final byte bytes[]; |
| - |
| - @Override |
| - public BytesRef getBytes(int docID, BytesRef ref) { |
| - ref.bytes = bytes; |
| - ref.offset = docID; |
| - ref.length = 1; |
| - return ref; |
| - } |
| - |
| - @Override |
| - public long getInt(int docID) { |
| - return bytes[docID]; |
| - } |
| - |
| - @Override |
| - public boolean hasArray() { |
| - return true; |
| - } |
| - |
| - @Override |
| - public Object getArray() { |
| - return bytes; |
| - } |
| - |
| - } |
| - |
| - private class NormsDocValues extends DocValues { |
| - private final IndexInput file; |
| - private final long offset; |
| - public NormsDocValues(IndexInput normInput, long normSeek) { |
| - this.file = normInput; |
| - this.offset = normSeek; |
| - } |
| - |
| - @Override |
| - public Source load() throws IOException { |
| - return new NormSource(bytes()); |
| - } |
| - |
| - @Override |
| - public Source getDirectSource() throws IOException { |
| - return getSource(); |
| - } |
| - |
| - @Override |
| - public Type getType() { |
| - return Type.FIXED_INTS_8; |
| - } |
| - |
| - byte[] bytes() throws IOException { |
| - byte[] bytes = new byte[maxdoc]; |
| - // some norms share fds |
| - synchronized(file) { |
| - file.seek(offset); |
| - file.readBytes(bytes, 0, bytes.length, false); |
| - } |
| - // we are done with this file |
| - if (file != singleNormStream) { |
| - openFiles.remove(file); |
| - file.close(); |
| - } |
| - return bytes; |
| - } |
| - |
| - @Override |
| - public int getValueSize() { |
| - return 1; |
| - } |
| - |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java (working copy) |
| @@ -1,117 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Collections; |
| - |
| -import org.apache.lucene.codecs.FieldInfosReader; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.DocValues.Type; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexFormatTooNewException; |
| -import org.apache.lucene.index.IndexFormatTooOldException; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| - |
| -/** |
| - * @lucene.experimental |
| - * @deprecated |
| - */ |
| -@Deprecated |
| -class Lucene3xFieldInfosReader extends FieldInfosReader { |
| - /** Extension of field infos */ |
| - static final String FIELD_INFOS_EXTENSION = "fnm"; |
| - |
| - // First used in 2.9; prior to 2.9 there was no format header |
| - static final int FORMAT_START = -2; |
| - // First used in 3.4: omit only positional information |
| - static final int FORMAT_OMIT_POSITIONS = -3; |
| - static final int FORMAT_MINIMUM = FORMAT_START; |
| - static final int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS; |
| - static final byte IS_INDEXED = 0x1; |
| - static final byte STORE_TERMVECTOR = 0x2; |
| - static final byte OMIT_NORMS = 0x10; |
| - static final byte STORE_PAYLOADS = 0x20; |
| - static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; |
| - static final byte OMIT_POSITIONS = -128; |
| - |
| - @Override |
| - public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException { |
| - final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); |
| - IndexInput input = directory.openInput(fileName, iocontext); |
| - |
| - try { |
| - final int format = input.readVInt(); |
| - |
| - if (format > FORMAT_MINIMUM) { |
| - throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - } |
| - if (format < FORMAT_CURRENT) { |
| - throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - } |
| - |
| - final int size = input.readVInt(); //read in the size |
| - FieldInfo infos[] = new FieldInfo[size]; |
| - |
| - for (int i = 0; i < size; i++) { |
| - String name = input.readString(); |
| - final int fieldNumber = i; |
| - byte bits = input.readByte(); |
| - boolean isIndexed = (bits & IS_INDEXED) != 0; |
| - boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; |
| - boolean omitNorms = (bits & OMIT_NORMS) != 0; |
| - boolean storePayloads = (bits & STORE_PAYLOADS) != 0; |
| - final IndexOptions indexOptions; |
| - if (!isIndexed) { |
| - indexOptions = null; |
| - } else if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) { |
| - indexOptions = IndexOptions.DOCS_ONLY; |
| - } else if ((bits & OMIT_POSITIONS) != 0) { |
| - if (format <= FORMAT_OMIT_POSITIONS) { |
| - indexOptions = IndexOptions.DOCS_AND_FREQS; |
| - } else { |
| - throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); |
| - } |
| - } else { |
| - indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| - } |
| - |
| - // LUCENE-3027: past indices were able to write |
| - // storePayloads=true when omitTFAP is also true, |
| - // which is invalid. We correct that, here: |
| - if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { |
| - storePayloads = false; |
| - } |
| - infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, |
| - omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.FIXED_INTS_8 : null, Collections.<String,String>emptyMap()); |
| - } |
| - |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| - return new FieldInfos(infos); |
| - } finally { |
| - input.close(); |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java (working copy) |
| @@ -1,1106 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Comparator; |
| -import java.util.HashMap; |
| -import java.util.Iterator; |
| -import java.util.Map; |
| -import java.util.TreeMap; |
| - |
| -import org.apache.lucene.codecs.FieldsProducer; |
| -import org.apache.lucene.index.DocsAndPositionsEnum; |
| -import org.apache.lucene.index.DocsEnum; |
| -import org.apache.lucene.index.FieldInfo.IndexOptions; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.FieldsEnum; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.Terms; |
| -import org.apache.lucene.index.TermsEnum; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.UnicodeUtil; |
| - |
| -/** Exposes flex API on a pre-flex index, as a codec. |
| - * @lucene.experimental |
| - * @deprecated (4.0) |
| - */ |
| -@Deprecated |
| -class Lucene3xFields extends FieldsProducer { |
| - |
| - private static final boolean DEBUG_SURROGATES = false; |
| - |
| - public TermInfosReader tis; |
| - public final TermInfosReader tisNoIndex; |
| - |
| - public final IndexInput freqStream; |
| - public final IndexInput proxStream; |
| - final private FieldInfos fieldInfos; |
| - private final SegmentInfo si; |
| - final TreeMap<String,FieldInfo> fields = new TreeMap<String,FieldInfo>(); |
| - final Map<String,Terms> preTerms = new HashMap<String,Terms>(); |
| - private final Directory dir; |
| - private final IOContext context; |
| - private Directory cfsReader; |
| - |
| - public Lucene3xFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, IOContext context, int indexDivisor) |
| - throws IOException { |
| - |
| - si = info; |
| - |
| - // NOTE: we must always load terms index, even for |
| - // "sequential" scan during merging, because what is |
| - // sequential to merger may not be to TermInfosReader |
| - // since we do the surrogates dance: |
| - if (indexDivisor < 0) { |
| - indexDivisor = -indexDivisor; |
| - } |
| - |
| - boolean success = false; |
| - try { |
| - TermInfosReader r = new TermInfosReader(dir, info.name, fieldInfos, context, indexDivisor); |
| - if (indexDivisor == -1) { |
| - tisNoIndex = r; |
| - } else { |
| - tisNoIndex = null; |
| - tis = r; |
| - } |
| - this.context = context; |
| - this.fieldInfos = fieldInfos; |
| - |
| - // make sure that all index files have been read or are kept open |
| - // so that if an index update removes them we'll still have them |
| - freqStream = dir.openInput(IndexFileNames.segmentFileName(info.name, "", Lucene3xPostingsFormat.FREQ_EXTENSION), context); |
| - boolean anyProx = false; |
| - for (FieldInfo fi : fieldInfos) { |
| - if (fi.isIndexed()) { |
| - fields.put(fi.name, fi); |
| - preTerms.put(fi.name, new PreTerms(fi)); |
| - if (fi.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { |
| - anyProx = true; |
| - } |
| - } |
| - } |
| - |
| - if (anyProx) { |
| - proxStream = dir.openInput(IndexFileNames.segmentFileName(info.name, "", Lucene3xPostingsFormat.PROX_EXTENSION), context); |
| - } else { |
| - proxStream = null; |
| - } |
| - success = true; |
| - } finally { |
| - // With lock-less commits, it's entirely possible (and |
| - // fine) to hit a FileNotFound exception above. In |
| - // this case, we want to explicitly close any subset |
| - // of things that were opened so that we don't have to |
| - // wait for a GC to do so. |
| - if (!success) { |
| - close(); |
| - } |
| - } |
| - this.dir = dir; |
| - } |
| - |
| - // If this returns, we do the surrogates dance so that the |
| - // terms are sorted by unicode sort order. This should be |
| - // true when segments are used for "normal" searching; |
| - // it's only false during testing, to create a pre-flex |
| - // index, using the test-only PreFlexRW. |
| - protected boolean sortTermsByUnicode() { |
| - return true; |
| - } |
| - |
| - @Override |
| - public FieldsEnum iterator() throws IOException { |
| - return new PreFlexFieldsEnum(); |
| - } |
| - |
| - @Override |
| - public Terms terms(String field) { |
| - return preTerms.get(field); |
| - } |
| - |
| - @Override |
| - public int size() { |
| - return preTerms.size(); |
| - } |
| - |
| - @Override |
| - public long getUniqueTermCount() throws IOException { |
| - return getTermsDict().size(); |
| - } |
| - |
| - synchronized private TermInfosReader getTermsDict() { |
| - if (tis != null) { |
| - return tis; |
| - } else { |
| - return tisNoIndex; |
| - } |
| - } |
| - |
| - @Override |
| - public void close() throws IOException { |
| - if (tis != null) { |
| - tis.close(); |
| - } |
| - if (tisNoIndex != null) { |
| - tisNoIndex.close(); |
| - } |
| - if (cfsReader != null) { |
| - cfsReader.close(); |
| - } |
| - if (freqStream != null) { |
| - freqStream.close(); |
| - } |
| - if (proxStream != null) { |
| - proxStream.close(); |
| - } |
| - } |
| - |
| - private class PreFlexFieldsEnum extends FieldsEnum { |
| - final Iterator<FieldInfo> it; |
| - FieldInfo current; |
| - |
| - public PreFlexFieldsEnum() throws IOException { |
| - it = fields.values().iterator(); |
| - } |
| - |
| - @Override |
| - public String next() { |
| - if (it.hasNext()) { |
| - current = it.next(); |
| - return current.name; |
| - } else { |
| - return null; |
| - } |
| - } |
| - |
| - @Override |
| - public Terms terms() throws IOException { |
| - return Lucene3xFields.this.terms(current.name); |
| - } |
| - } |
| - |
| - private class PreTerms extends Terms { |
| - final FieldInfo fieldInfo; |
| - PreTerms(FieldInfo fieldInfo) { |
| - this.fieldInfo = fieldInfo; |
| - } |
| - |
| - @Override |
| - public TermsEnum iterator(TermsEnum reuse) throws IOException { |
| - PreTermsEnum termsEnum = new PreTermsEnum(); |
| - termsEnum.reset(fieldInfo); |
| - return termsEnum; |
| - } |
| - |
| - @Override |
| - public Comparator<BytesRef> getComparator() { |
| - // Pre-flex indexes always sorted in UTF16 order, but |
| - // we remap on-the-fly to unicode order |
| - if (sortTermsByUnicode()) { |
| - return BytesRef.getUTF8SortedAsUnicodeComparator(); |
| - } else { |
| - return BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - } |
| - } |
| - |
| - @Override |
| - public long size() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public long getSumTotalTermFreq() { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public long getSumDocFreq() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public int getDocCount() throws IOException { |
| - return -1; |
| - } |
| - } |
| - |
| - private class PreTermsEnum extends TermsEnum { |
| - private SegmentTermEnum termEnum; |
| - private FieldInfo fieldInfo; |
| - private String internedFieldName; |
| - private boolean skipNext; |
| - private BytesRef current; |
| - |
| - private SegmentTermEnum seekTermEnum; |
| - |
| - private static final byte UTF8_NON_BMP_LEAD = (byte) 0xf0; |
| - private static final byte UTF8_HIGH_BMP_LEAD = (byte) 0xee; |
| - |
| - // Returns true if the unicode char is "after" the |
| - // surrogates in UTF16, ie >= U+E000 and <= U+FFFF: |
| - private final boolean isHighBMPChar(byte[] b, int idx) { |
| - return (b[idx] & UTF8_HIGH_BMP_LEAD) == UTF8_HIGH_BMP_LEAD; |
| - } |
| - |
| - // Returns true if the unicode char in the UTF8 byte |
| - // sequence starting at idx encodes a char outside of |
| - // BMP (ie what would be a surrogate pair in UTF16): |
| - private final boolean isNonBMPChar(byte[] b, int idx) { |
| - return (b[idx] & UTF8_NON_BMP_LEAD) == UTF8_NON_BMP_LEAD; |
| - } |
| - |
| - private final byte[] scratch = new byte[4]; |
| - private final BytesRef prevTerm = new BytesRef(); |
| - private final BytesRef scratchTerm = new BytesRef(); |
| - private int newSuffixStart; |
| - |
| - // Swap in S, in place of E: |
| - private boolean seekToNonBMP(SegmentTermEnum te, BytesRef term, int pos) throws IOException { |
| - final int savLength = term.length; |
| - |
| - assert term.offset == 0; |
| - |
| - // The 3 bytes starting at downTo make up 1 |
| - // unicode character: |
| - assert isHighBMPChar(term.bytes, pos); |
| - |
| - // NOTE: we cannot make this assert, because |
| - // AutomatonQuery legitimately sends us malformed UTF8 |
| - // (eg the UTF8 bytes with just 0xee) |
| - // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString(); |
| - |
| - // Save the bytes && length, since we need to |
| - // restore this if seek "back" finds no matching |
| - // terms |
| - if (term.bytes.length < 4+pos) { |
| - term.grow(4+pos); |
| - } |
| - |
| - scratch[0] = term.bytes[pos]; |
| - scratch[1] = term.bytes[pos+1]; |
| - scratch[2] = term.bytes[pos+2]; |
| - |
| - term.bytes[pos] = (byte) 0xf0; |
| - term.bytes[pos+1] = (byte) 0x90; |
| - term.bytes[pos+2] = (byte) 0x80; |
| - term.bytes[pos+3] = (byte) 0x80; |
| - term.length = 4+pos; |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" try seek term=" + UnicodeUtil.toHexString(term.utf8ToString())); |
| - } |
| - |
| - // Seek "back": |
| - getTermsDict().seekEnum(te, new Term(fieldInfo.name, term), true); |
| - |
| - // Test if the term we seek'd to in fact found a |
| - // surrogate pair at the same position as the E: |
| - Term t2 = te.term(); |
| - |
| - // Cannot be null (or move to next field) because at |
| - // "worst" it'd seek to the same term we are on now, |
| - // unless we are being called from seek |
| - if (t2 == null || t2.field() != internedFieldName) { |
| - return false; |
| - } |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text())); |
| - } |
| - |
| - // Now test if prefix is identical and we found |
| - // a non-BMP char at the same position: |
| - BytesRef b2 = t2.bytes(); |
| - assert b2.offset == 0; |
| - |
| - boolean matches; |
| - if (b2.length >= term.length && isNonBMPChar(b2.bytes, pos)) { |
| - matches = true; |
| - for(int i=0;i<pos;i++) { |
| - if (term.bytes[i] != b2.bytes[i]) { |
| - matches = false; |
| - break; |
| - } |
| - } |
| - } else { |
| - matches = false; |
| - } |
| - |
| - // Restore term: |
| - term.length = savLength; |
| - term.bytes[pos] = scratch[0]; |
| - term.bytes[pos+1] = scratch[1]; |
| - term.bytes[pos+2] = scratch[2]; |
| - |
| - return matches; |
| - } |
| - |
| - // Seek type 2 "continue" (back to the start of the |
| - // surrogates): scan the stripped suffix from the |
| - // prior term, backwards. If there was an E in that |
| - // part, then we try to seek back to S. If that |
| - // seek finds a matching term, we go there. |
| - private boolean doContinue() throws IOException { |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" try cont"); |
| - } |
| - |
| - int downTo = prevTerm.length-1; |
| - |
| - boolean didSeek = false; |
| - |
| - final int limit = Math.min(newSuffixStart, scratchTerm.length-1); |
| - |
| - while(downTo > limit) { |
| - |
| - if (isHighBMPChar(prevTerm.bytes, downTo)) { |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" found E pos=" + downTo + " vs len=" + prevTerm.length); |
| - } |
| - |
| - if (seekToNonBMP(seekTermEnum, prevTerm, downTo)) { |
| - // TODO: more efficient seek? |
| - getTermsDict().seekEnum(termEnum, seekTermEnum.term(), true); |
| - //newSuffixStart = downTo+4; |
| - newSuffixStart = downTo; |
| - scratchTerm.copyBytes(termEnum.term().bytes()); |
| - didSeek = true; |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" seek!"); |
| - } |
| - break; |
| - } else { |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" no seek"); |
| - } |
| - } |
| - } |
| - |
| - // Shorten prevTerm in place so that we don't redo |
| - // this loop if we come back here: |
| - if ((prevTerm.bytes[downTo] & 0xc0) == 0xc0 || (prevTerm.bytes[downTo] & 0x80) == 0) { |
| - prevTerm.length = downTo; |
| - } |
| - |
| - downTo--; |
| - } |
| - |
| - return didSeek; |
| - } |
| - |
| - // Look for seek type 3 ("pop"): if the delta from |
| - // prev -> current was replacing an S with an E, |
| - // we must now seek to beyond that E. This seek |
| - // "finishes" the dance at this character |
| - // position. |
| - private boolean doPop() throws IOException { |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" try pop"); |
| - } |
| - |
| - assert newSuffixStart <= prevTerm.length; |
| - assert newSuffixStart < scratchTerm.length || newSuffixStart == 0; |
| - |
| - if (prevTerm.length > newSuffixStart && |
| - isNonBMPChar(prevTerm.bytes, newSuffixStart) && |
| - isHighBMPChar(scratchTerm.bytes, newSuffixStart)) { |
| - |
| - // Seek type 2 -- put 0xFF at this position: |
| - scratchTerm.bytes[newSuffixStart] = (byte) 0xff; |
| - scratchTerm.length = newSuffixStart+1; |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" seek to term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()) + " " + scratchTerm.toString()); |
| - } |
| - |
| - // TODO: more efficient seek? can we simply swap |
| - // the enums? |
| - getTermsDict().seekEnum(termEnum, new Term(fieldInfo.name, scratchTerm), true); |
| - |
| - final Term t2 = termEnum.term(); |
| - |
| - // We could hit EOF or different field since this |
| - // was a seek "forward": |
| - if (t2 != null && t2.field() == internedFieldName) { |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text()) + " " + t2.bytes()); |
| - } |
| - |
| - final BytesRef b2 = t2.bytes(); |
| - assert b2.offset == 0; |
| - |
| - |
| - // Set newSuffixStart -- we can't use |
| - // termEnum's since the above seek may have |
| - // done no scanning (eg, term was precisely |
| - // and index term, or, was in the term seek |
| - // cache): |
| - scratchTerm.copyBytes(b2); |
| - setNewSuffixStart(prevTerm, scratchTerm); |
| - |
| - return true; |
| - } else if (newSuffixStart != 0 || scratchTerm.length != 0) { |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" got term=null (or next field)"); |
| - } |
| - newSuffixStart = 0; |
| - scratchTerm.length = 0; |
| - return true; |
| - } |
| - } |
| - |
| - return false; |
| - } |
| - |
| - // Pre-flex indices store terms in UTF16 sort order, but |
| - // certain queries require Unicode codepoint order; this |
| - // method carefully seeks around surrogates to handle |
| - // this impedance mismatch |
| - |
| - private void surrogateDance() throws IOException { |
| - |
| - if (!unicodeSortOrder) { |
| - return; |
| - } |
| - |
| - // We are invoked after TIS.next() (by UTF16 order) to |
| - // possibly seek to a different "next" (by unicode |
| - // order) term. |
| - |
| - // We scan only the "delta" from the last term to the |
| - // current term, in UTF8 bytes. We look at 1) the bytes |
| - // stripped from the prior term, and then 2) the bytes |
| - // appended to that prior term's prefix. |
| - |
| - // We don't care about specific UTF8 sequences, just |
| - // the "category" of the UTF16 character. Category S |
| - // is a high/low surrogate pair (it non-BMP). |
| - // Category E is any BMP char > UNI_SUR_LOW_END (and < |
| - // U+FFFF). Category A is the rest (any unicode char |
| - // <= UNI_SUR_HIGH_START). |
| - |
| - // The core issue is that pre-flex indices sort the |
| - // characters as ASE, while flex must sort as AES. So |
| - // when scanning, when we hit S, we must 1) seek |
| - // forward to E and enum the terms there, then 2) seek |
| - // back to S and enum all terms there, then 3) seek to |
| - // after E. Three different seek points (1, 2, 3). |
| - |
| - // We can easily detect S in UTF8: if a byte has |
| - // prefix 11110 (0xf0), then that byte and the |
| - // following 3 bytes encode a single unicode codepoint |
| - // in S. Similarly, we can detect E: if a byte has |
| - // prefix 1110111 (0xee), then that byte and the |
| - // following 2 bytes encode a single unicode codepoint |
| - // in E. |
| - |
| - // Note that this is really a recursive process -- |
| - // maybe the char at pos 2 needs to dance, but any |
| - // point in its dance, suddenly pos 4 needs to dance |
| - // so you must finish pos 4 before returning to pos |
| - // 2. But then during pos 4's dance maybe pos 7 needs |
| - // to dance, etc. However, despite being recursive, |
| - // we don't need to hold any state because the state |
| - // can always be derived by looking at prior term & |
| - // current term. |
| - |
| - // TODO: can we avoid this copy? |
| - if (termEnum.term() == null || termEnum.term().field() != internedFieldName) { |
| - scratchTerm.length = 0; |
| - } else { |
| - scratchTerm.copyBytes(termEnum.term().bytes()); |
| - } |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" dance"); |
| - System.out.println(" prev=" + UnicodeUtil.toHexString(prevTerm.utf8ToString())); |
| - System.out.println(" " + prevTerm.toString()); |
| - System.out.println(" term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString())); |
| - System.out.println(" " + scratchTerm.toString()); |
| - } |
| - |
| - // This code assumes TermInfosReader/SegmentTermEnum |
| - // always use BytesRef.offset == 0 |
| - assert prevTerm.offset == 0; |
| - assert scratchTerm.offset == 0; |
| - |
| - // Need to loop here because we may need to do multiple |
| - // pops, and possibly a continue in the end, ie: |
| - // |
| - // cont |
| - // pop, cont |
| - // pop, pop, cont |
| - // <nothing> |
| - // |
| - |
| - while(true) { |
| - if (doContinue()) { |
| - break; |
| - } else { |
| - if (!doPop()) { |
| - break; |
| - } |
| - } |
| - } |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" finish bmp ends"); |
| - } |
| - |
| - doPushes(); |
| - } |
| - |
| - |
| - // Look for seek type 1 ("push"): if the newly added |
| - // suffix contains any S, we must try to seek to the |
| - // corresponding E. If we find a match, we go there; |
| - // else we keep looking for additional S's in the new |
| - // suffix. This "starts" the dance, at this character |
| - // position: |
| - private void doPushes() throws IOException { |
| - |
| - int upTo = newSuffixStart; |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.length); |
| - } |
| - |
| - while(upTo < scratchTerm.length) { |
| - if (isNonBMPChar(scratchTerm.bytes, upTo) && |
| - (upTo > newSuffixStart || |
| - (upTo >= prevTerm.length || |
| - (!isNonBMPChar(prevTerm.bytes, upTo) && |
| - !isHighBMPChar(prevTerm.bytes, upTo))))) { |
| - |
| - // A non-BMP char (4 bytes UTF8) starts here: |
| - assert scratchTerm.length >= upTo + 4; |
| - |
| - final int savLength = scratchTerm.length; |
| - scratch[0] = scratchTerm.bytes[upTo]; |
| - scratch[1] = scratchTerm.bytes[upTo+1]; |
| - scratch[2] = scratchTerm.bytes[upTo+2]; |
| - |
| - scratchTerm.bytes[upTo] = UTF8_HIGH_BMP_LEAD; |
| - scratchTerm.bytes[upTo+1] = (byte) 0x80; |
| - scratchTerm.bytes[upTo+2] = (byte) 0x80; |
| - scratchTerm.length = upTo+3; |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()) + " " + scratchTerm.toString() + " len=" + scratchTerm.length); |
| - } |
| - |
| - // Seek "forward": |
| - // TODO: more efficient seek? |
| - getTermsDict().seekEnum(seekTermEnum, new Term(fieldInfo.name, scratchTerm), true); |
| - |
| - scratchTerm.bytes[upTo] = scratch[0]; |
| - scratchTerm.bytes[upTo+1] = scratch[1]; |
| - scratchTerm.bytes[upTo+2] = scratch[2]; |
| - scratchTerm.length = savLength; |
| - |
| - // Did we find a match? |
| - final Term t2 = seekTermEnum.term(); |
| - |
| - if (DEBUG_SURROGATES) { |
| - if (t2 == null) { |
| - System.out.println(" hit term=null"); |
| - } else { |
| - System.out.println(" hit term=" + UnicodeUtil.toHexString(t2.text()) + " " + (t2==null? null:t2.bytes())); |
| - } |
| - } |
| - |
| - // Since this was a seek "forward", we could hit |
| - // EOF or a different field: |
| - boolean matches; |
| - |
| - if (t2 != null && t2.field() == internedFieldName) { |
| - final BytesRef b2 = t2.bytes(); |
| - assert b2.offset == 0; |
| - if (b2.length >= upTo+3 && isHighBMPChar(b2.bytes, upTo)) { |
| - matches = true; |
| - for(int i=0;i<upTo;i++) { |
| - if (scratchTerm.bytes[i] != b2.bytes[i]) { |
| - matches = false; |
| - break; |
| - } |
| - } |
| - |
| - } else { |
| - matches = false; |
| - } |
| - } else { |
| - matches = false; |
| - } |
| - |
| - if (matches) { |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" matches!"); |
| - } |
| - |
| - // OK seek "back" |
| - // TODO: more efficient seek? |
| - getTermsDict().seekEnum(termEnum, seekTermEnum.term(), true); |
| - |
| - scratchTerm.copyBytes(seekTermEnum.term().bytes()); |
| - |
| - // +3 because we don't need to check the char |
| - // at upTo: we know it's > BMP |
| - upTo += 3; |
| - |
| - // NOTE: we keep iterating, now, since this |
| - // can easily "recurse". Ie, after seeking |
| - // forward at a certain char position, we may |
| - // find another surrogate in our [new] suffix |
| - // and must then do another seek (recurse) |
| - } else { |
| - upTo++; |
| - } |
| - } else { |
| - upTo++; |
| - } |
| - } |
| - } |
| - |
| - private boolean unicodeSortOrder; |
| - |
| - void reset(FieldInfo fieldInfo) throws IOException { |
| - //System.out.println("pff.reset te=" + termEnum); |
| - this.fieldInfo = fieldInfo; |
| - internedFieldName = fieldInfo.name.intern(); |
| - final Term term = new Term(internedFieldName); |
| - if (termEnum == null) { |
| - termEnum = getTermsDict().terms(term); |
| - seekTermEnum = getTermsDict().terms(term); |
| - //System.out.println(" term=" + termEnum.term()); |
| - } else { |
| - getTermsDict().seekEnum(termEnum, term, true); |
| - } |
| - skipNext = true; |
| - |
| - unicodeSortOrder = sortTermsByUnicode(); |
| - |
| - final Term t = termEnum.term(); |
| - if (t != null && t.field() == internedFieldName) { |
| - newSuffixStart = 0; |
| - prevTerm.length = 0; |
| - surrogateDance(); |
| - } |
| - } |
| - |
| - @Override |
| - public Comparator<BytesRef> getComparator() { |
| - // Pre-flex indexes always sorted in UTF16 order, but |
| - // we remap on-the-fly to unicode order |
| - if (unicodeSortOrder) { |
| - return BytesRef.getUTF8SortedAsUnicodeComparator(); |
| - } else { |
| - return BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - } |
| - } |
| - |
| - @Override |
| - public void seekExact(long ord) throws IOException { |
| - throw new UnsupportedOperationException(); |
| - } |
| - |
| - @Override |
| - public long ord() throws IOException { |
| - throw new UnsupportedOperationException(); |
| - } |
| - |
| - @Override |
| - public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException { |
| - if (DEBUG_SURROGATES) { |
| - System.out.println("TE.seek target=" + UnicodeUtil.toHexString(term.utf8ToString())); |
| - } |
| - skipNext = false; |
| - final TermInfosReader tis = getTermsDict(); |
| - final Term t0 = new Term(fieldInfo.name, term); |
| - |
| - assert termEnum != null; |
| - |
| - tis.seekEnum(termEnum, t0, useCache); |
| - |
| - final Term t = termEnum.term(); |
| - |
| - if (t != null && t.field() == internedFieldName && term.bytesEquals(t.bytes())) { |
| - // If we found an exact match, no need to do the |
| - // surrogate dance |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" seek exact match"); |
| - } |
| - current = t.bytes(); |
| - return SeekStatus.FOUND; |
| - } else if (t == null || t.field() != internedFieldName) { |
| - |
| - // TODO: maybe we can handle this like the next() |
| - // into null? set term as prevTerm then dance? |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" seek hit EOF"); |
| - } |
| - |
| - // We hit EOF; try end-case surrogate dance: if we |
| - // find an E, try swapping in S, backwards: |
| - scratchTerm.copyBytes(term); |
| - |
| - assert scratchTerm.offset == 0; |
| - |
| - for(int i=scratchTerm.length-1;i>=0;i--) { |
| - if (isHighBMPChar(scratchTerm.bytes, i)) { |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" found E pos=" + i + "; try seek"); |
| - } |
| - |
| - if (seekToNonBMP(seekTermEnum, scratchTerm, i)) { |
| - |
| - scratchTerm.copyBytes(seekTermEnum.term().bytes()); |
| - getTermsDict().seekEnum(termEnum, seekTermEnum.term(), useCache); |
| - |
| - newSuffixStart = 1+i; |
| - |
| - doPushes(); |
| - |
| - // Found a match |
| - // TODO: faster seek? |
| - current = termEnum.term().bytes(); |
| - return SeekStatus.NOT_FOUND; |
| - } |
| - } |
| - } |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" seek END"); |
| - } |
| - |
| - current = null; |
| - return SeekStatus.END; |
| - } else { |
| - |
| - // We found a non-exact but non-null term; this one |
| - // is fun -- just treat it like next, by pretending |
| - // requested term was prev: |
| - prevTerm.copyBytes(term); |
| - |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" seek hit non-exact term=" + UnicodeUtil.toHexString(t.text())); |
| - } |
| - |
| - final BytesRef br = t.bytes(); |
| - assert br.offset == 0; |
| - |
| - setNewSuffixStart(term, br); |
| - |
| - surrogateDance(); |
| - |
| - final Term t2 = termEnum.term(); |
| - if (t2 == null || t2.field() != internedFieldName) { |
| - // PreFlex codec interns field names; verify: |
| - assert t2 == null || !t2.field().equals(internedFieldName); |
| - current = null; |
| - return SeekStatus.END; |
| - } else { |
| - current = t2.bytes(); |
| - assert !unicodeSortOrder || term.compareTo(current) < 0 : "term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " vs current=" + UnicodeUtil.toHexString(current.utf8ToString()); |
| - return SeekStatus.NOT_FOUND; |
| - } |
| - } |
| - } |
| - |
| - private void setNewSuffixStart(BytesRef br1, BytesRef br2) { |
| - final int limit = Math.min(br1.length, br2.length); |
| - int lastStart = 0; |
| - for(int i=0;i<limit;i++) { |
| - if ((br1.bytes[br1.offset+i] & 0xc0) == 0xc0 || (br1.bytes[br1.offset+i] & 0x80) == 0) { |
| - lastStart = i; |
| - } |
| - if (br1.bytes[br1.offset+i] != br2.bytes[br2.offset+i]) { |
| - newSuffixStart = lastStart; |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" set newSuffixStart=" + newSuffixStart); |
| - } |
| - return; |
| - } |
| - } |
| - newSuffixStart = limit; |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" set newSuffixStart=" + newSuffixStart); |
| - } |
| - } |
| - |
| - @Override |
| - public BytesRef next() throws IOException { |
| - if (DEBUG_SURROGATES) { |
| - System.out.println("TE.next()"); |
| - } |
| - if (skipNext) { |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" skipNext=true"); |
| - } |
| - skipNext = false; |
| - if (termEnum.term() == null) { |
| - return null; |
| - // PreFlex codec interns field names: |
| - } else if (termEnum.term().field() != internedFieldName) { |
| - return null; |
| - } else { |
| - return current = termEnum.term().bytes(); |
| - } |
| - } |
| - |
| - // TODO: can we use STE's prevBuffer here? |
| - prevTerm.copyBytes(termEnum.term().bytes()); |
| - |
| - if (termEnum.next() && termEnum.term().field() == internedFieldName) { |
| - newSuffixStart = termEnum.newSuffixStart; |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" newSuffixStart=" + newSuffixStart); |
| - } |
| - surrogateDance(); |
| - final Term t = termEnum.term(); |
| - if (t == null || t.field() != internedFieldName) { |
| - // PreFlex codec interns field names; verify: |
| - assert t == null || !t.field().equals(internedFieldName); |
| - current = null; |
| - } else { |
| - current = t.bytes(); |
| - } |
| - return current; |
| - } else { |
| - // This field is exhausted, but we have to give |
| - // surrogateDance a chance to seek back: |
| - if (DEBUG_SURROGATES) { |
| - System.out.println(" force cont"); |
| - } |
| - //newSuffixStart = prevTerm.length; |
| - newSuffixStart = 0; |
| - surrogateDance(); |
| - |
| - final Term t = termEnum.term(); |
| - if (t == null || t.field() != internedFieldName) { |
| - // PreFlex codec interns field names; verify: |
| - assert t == null || !t.field().equals(internedFieldName); |
| - return null; |
| - } else { |
| - current = t.bytes(); |
| - return current; |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public BytesRef term() { |
| - return current; |
| - } |
| - |
| - @Override |
| - public int docFreq() { |
| - return termEnum.docFreq(); |
| - } |
| - |
| - @Override |
| - public long totalTermFreq() { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { |
| - PreDocsEnum docsEnum; |
| - if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) { |
| - return null; |
| - } else if (reuse == null || !(reuse instanceof PreDocsEnum)) { |
| - docsEnum = new PreDocsEnum(); |
| - } else { |
| - docsEnum = (PreDocsEnum) reuse; |
| - if (docsEnum.getFreqStream() != freqStream) { |
| - docsEnum = new PreDocsEnum(); |
| - } |
| - } |
| - return docsEnum.reset(termEnum, liveDocs); |
| - } |
| - |
| - @Override |
| - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { |
| - if (needsOffsets) { |
| - // Pre-4.0 indices never have offsets: |
| - return null; |
| - } |
| - |
| - PreDocsAndPositionsEnum docsPosEnum; |
| - if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { |
| - return null; |
| - } else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) { |
| - docsPosEnum = new PreDocsAndPositionsEnum(); |
| - } else { |
| - docsPosEnum = (PreDocsAndPositionsEnum) reuse; |
| - if (docsPosEnum.getFreqStream() != freqStream) { |
| - docsPosEnum = new PreDocsAndPositionsEnum(); |
| - } |
| - } |
| - return docsPosEnum.reset(termEnum, liveDocs); |
| - } |
| - } |
| - |
| - private final class PreDocsEnum extends DocsEnum { |
| - final private SegmentTermDocs docs; |
| - private int docID = -1; |
| - PreDocsEnum() throws IOException { |
| - docs = new SegmentTermDocs(freqStream, getTermsDict(), fieldInfos); |
| - } |
| - |
| - IndexInput getFreqStream() { |
| - return freqStream; |
| - } |
| - |
| - public PreDocsEnum reset(SegmentTermEnum termEnum, Bits liveDocs) throws IOException { |
| - docs.setLiveDocs(liveDocs); |
| - docs.seek(termEnum); |
| - docID = -1; |
| - return this; |
| - } |
| - |
| - @Override |
| - public int nextDoc() throws IOException { |
| - if (docs.next()) { |
| - return docID = docs.doc(); |
| - } else { |
| - return docID = NO_MORE_DOCS; |
| - } |
| - } |
| - |
| - @Override |
| - public int advance(int target) throws IOException { |
| - if (docs.skipTo(target)) { |
| - return docID = docs.doc(); |
| - } else { |
| - return docID = NO_MORE_DOCS; |
| - } |
| - } |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - return docs.freq(); |
| - } |
| - |
| - @Override |
| - public int docID() { |
| - return docID; |
| - } |
| - } |
| - |
| - private final class PreDocsAndPositionsEnum extends DocsAndPositionsEnum { |
| - final private SegmentTermPositions pos; |
| - private int docID = -1; |
| - PreDocsAndPositionsEnum() throws IOException { |
| - pos = new SegmentTermPositions(freqStream, proxStream, getTermsDict(), fieldInfos); |
| - } |
| - |
| - IndexInput getFreqStream() { |
| - return freqStream; |
| - } |
| - |
| - public DocsAndPositionsEnum reset(SegmentTermEnum termEnum, Bits liveDocs) throws IOException { |
| - pos.setLiveDocs(liveDocs); |
| - pos.seek(termEnum); |
| - docID = -1; |
| - return this; |
| - } |
| - |
| - @Override |
| - public int nextDoc() throws IOException { |
| - if (pos.next()) { |
| - return docID = pos.doc(); |
| - } else { |
| - return docID = NO_MORE_DOCS; |
| - } |
| - } |
| - |
| - @Override |
| - public int advance(int target) throws IOException { |
| - if (pos.skipTo(target)) { |
| - return docID = pos.doc(); |
| - } else { |
| - return docID = NO_MORE_DOCS; |
| - } |
| - } |
| - |
| - @Override |
| - public int freq() throws IOException { |
| - return pos.freq(); |
| - } |
| - |
| - @Override |
| - public int docID() { |
| - return docID; |
| - } |
| - |
| - @Override |
| - public int nextPosition() throws IOException { |
| - assert docID != NO_MORE_DOCS; |
| - return pos.nextPosition(); |
| - } |
| - |
| - @Override |
| - public int startOffset() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public int endOffset() throws IOException { |
| - return -1; |
| - } |
| - |
| - @Override |
| - public boolean hasPayload() { |
| - assert docID != NO_MORE_DOCS; |
| - return pos.isPayloadAvailable(); |
| - } |
| - |
| - private BytesRef payload; |
| - |
| - @Override |
| - public BytesRef getPayload() throws IOException { |
| - final int len = pos.getPayloadLength(); |
| - if (payload == null) { |
| - payload = new BytesRef(); |
| - payload.bytes = new byte[len]; |
| - } else { |
| - if (payload.bytes.length < len) { |
| - payload.grow(len); |
| - } |
| - } |
| - |
| - payload.bytes = pos.getPayload(payload.bytes, 0); |
| - payload.length = len; |
| - return payload; |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/package.html |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/package.html (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/package.html (working copy) |
| @@ -1,25 +0,0 @@ |
| -<!doctype html public "-//w3c//dtd html 4.0 transitional//en"> |
| -<!-- |
| - Licensed to the Apache Software Foundation (ASF) under one or more |
| - contributor license agreements. See the NOTICE file distributed with |
| - this work for additional information regarding copyright ownership. |
| - The ASF licenses this file to You under the Apache License, Version 2.0 |
| - (the "License"); you may not use this file except in compliance with |
| - the License. You may obtain a copy of the License at |
| - |
| - http://www.apache.org/licenses/LICENSE-2.0 |
| - |
| - Unless required by applicable law or agreed to in writing, software |
| - distributed under the License is distributed on an "AS IS" BASIS, |
| - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - See the License for the specific language governing permissions and |
| - limitations under the License. |
| ---> |
| -<html> |
| -<head> |
| - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> |
| -</head> |
| -<body> |
| -Codec to support Lucene 3.x indexes (readonly) |
| -</body> |
| -</html> |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java (working copy) |
| @@ -1,46 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.NormsFormat; |
| -import org.apache.lucene.codecs.PerDocConsumer; |
| -import org.apache.lucene.codecs.PerDocProducer; |
| -import org.apache.lucene.index.PerDocWriteState; |
| -import org.apache.lucene.index.SegmentReadState; |
| - |
| -/** |
| - * Lucene3x ReadOnly NormsFormat implementation |
| - * @deprecated (4.0) This is only used to read indexes created |
| - * before 4.0. |
| - * @lucene.experimental |
| - */ |
| -@Deprecated |
| -class Lucene3xNormsFormat extends NormsFormat { |
| - |
| - @Override |
| - public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| - |
| - @Override |
| - public PerDocProducer docsProducer(SegmentReadState state) throws IOException { |
| - return new Lucene3xNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermEnum.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermEnum.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermEnum.java (working copy) |
| @@ -1,225 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.IndexFormatTooOldException; |
| -import org.apache.lucene.index.IndexFormatTooNewException; |
| - |
| -/** |
| - * @deprecated (4.0) No longer used with flex indexing, except for |
| - * reading old segments |
| - * @lucene.experimental */ |
| - |
| -@Deprecated |
| -final class SegmentTermEnum implements Cloneable { |
| - private IndexInput input; |
| - FieldInfos fieldInfos; |
| - long size; |
| - long position = -1; |
| - |
| - // Changed strings to true utf8 with length-in-bytes not |
| - // length-in-chars |
| - public static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; |
| - |
| - // NOTE: always change this if you switch to a new format! |
| - // whenever you add a new format, make it 1 smaller (negative version logic)! |
| - public static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; |
| - |
| - // when removing support for old versions, leave the last supported version here |
| - public static final int FORMAT_MINIMUM = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; |
| - |
| - private TermBuffer termBuffer = new TermBuffer(); |
| - private TermBuffer prevBuffer = new TermBuffer(); |
| - private TermBuffer scanBuffer = new TermBuffer(); // used for scanning |
| - |
| - TermInfo termInfo = new TermInfo(); |
| - |
| - private int format; |
| - private boolean isIndex = false; |
| - long indexPointer = 0; |
| - int indexInterval; |
| - int skipInterval; |
| - int newSuffixStart; |
| - int maxSkipLevels; |
| - private boolean first = true; |
| - |
| - SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi) |
| - throws CorruptIndexException, IOException { |
| - input = i; |
| - fieldInfos = fis; |
| - isIndex = isi; |
| - maxSkipLevels = 1; // use single-level skip lists for formats > -3 |
| - |
| - int firstInt = input.readInt(); |
| - if (firstInt >= 0) { |
| - // original-format file, without explicit format version number |
| - format = 0; |
| - size = firstInt; |
| - |
| - // back-compatible settings |
| - indexInterval = 128; |
| - skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization |
| - } else { |
| - // we have a format version number |
| - format = firstInt; |
| - |
| - // check that it is a format we can understand |
| - if (format > FORMAT_MINIMUM) |
| - throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - if (format < FORMAT_CURRENT) |
| - throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - |
| - size = input.readLong(); // read the size |
| - |
| - indexInterval = input.readInt(); |
| - skipInterval = input.readInt(); |
| - maxSkipLevels = input.readInt(); |
| - assert indexInterval > 0: "indexInterval=" + indexInterval + " is negative; must be > 0"; |
| - assert skipInterval > 0: "skipInterval=" + skipInterval + " is negative; must be > 0"; |
| - } |
| - } |
| - |
| - @Override |
| - protected SegmentTermEnum clone() { |
| - SegmentTermEnum clone = null; |
| - try { |
| - clone = (SegmentTermEnum) super.clone(); |
| - } catch (CloneNotSupportedException e) {} |
| - |
| - clone.input = (IndexInput) input.clone(); |
| - clone.termInfo = new TermInfo(termInfo); |
| - |
| - clone.termBuffer = termBuffer.clone(); |
| - clone.prevBuffer = prevBuffer.clone(); |
| - clone.scanBuffer = new TermBuffer(); |
| - |
| - return clone; |
| - } |
| - |
| - final void seek(long pointer, long p, Term t, TermInfo ti) |
| - throws IOException { |
| - input.seek(pointer); |
| - position = p; |
| - termBuffer.set(t); |
| - prevBuffer.reset(); |
| - //System.out.println(" ste doSeek prev=" + prevBuffer.toTerm() + " this=" + this); |
| - termInfo.set(ti); |
| - first = p == -1; |
| - } |
| - |
| - /** Increments the enumeration to the next element. True if one exists.*/ |
| - public final boolean next() throws IOException { |
| - prevBuffer.set(termBuffer); |
| - //System.out.println(" ste setPrev=" + prev() + " this=" + this); |
| - |
| - if (position++ >= size - 1) { |
| - termBuffer.reset(); |
| - //System.out.println(" EOF"); |
| - return false; |
| - } |
| - |
| - termBuffer.read(input, fieldInfos); |
| - newSuffixStart = termBuffer.newSuffixStart; |
| - |
| - termInfo.docFreq = input.readVInt(); // read doc freq |
| - termInfo.freqPointer += input.readVLong(); // read freq pointer |
| - termInfo.proxPointer += input.readVLong(); // read prox pointer |
| - |
| - if (termInfo.docFreq >= skipInterval) |
| - termInfo.skipOffset = input.readVInt(); |
| - |
| - if (isIndex) |
| - indexPointer += input.readVLong(); // read index pointer |
| - |
| - //System.out.println(" ste ret term=" + term()); |
| - return true; |
| - } |
| - |
| - /* Optimized scan, without allocating new terms. |
| - * Return number of invocations to next(). |
| - * |
| - * NOTE: LUCENE-3183: if you pass Term("", "") here then this |
| - * will incorrectly return before positioning the enum, |
| - * and position will be -1; caller must detect this. */ |
| - final int scanTo(Term term) throws IOException { |
| - scanBuffer.set(term); |
| - int count = 0; |
| - if (first) { |
| - // Always force initial next() in case term is |
| - // Term("", "") |
| - next(); |
| - first = false; |
| - count++; |
| - } |
| - while (scanBuffer.compareTo(termBuffer) > 0 && next()) { |
| - count++; |
| - } |
| - return count; |
| - } |
| - |
| - /** Returns the current Term in the enumeration. |
| - Initially invalid, valid after next() called for the first time.*/ |
| - public final Term term() { |
| - return termBuffer.toTerm(); |
| - } |
| - |
| - /** Returns the previous Term enumerated. Initially null.*/ |
| - final Term prev() { |
| - return prevBuffer.toTerm(); |
| - } |
| - |
| - /** Returns the current TermInfo in the enumeration. |
| - Initially invalid, valid after next() called for the first time.*/ |
| - final TermInfo termInfo() { |
| - return new TermInfo(termInfo); |
| - } |
| - |
| - /** Sets the argument to the current TermInfo in the enumeration. |
| - Initially invalid, valid after next() called for the first time.*/ |
| - final void termInfo(TermInfo ti) { |
| - ti.set(termInfo); |
| - } |
| - |
| - /** Returns the docFreq from the current TermInfo in the enumeration. |
| - Initially invalid, valid after next() called for the first time.*/ |
| - public final int docFreq() { |
| - return termInfo.docFreq; |
| - } |
| - |
| - /* Returns the freqPointer from the current TermInfo in the enumeration. |
| - Initially invalid, valid after next() called for the first time.*/ |
| - final long freqPointer() { |
| - return termInfo.freqPointer; |
| - } |
| - |
| - /* Returns the proxPointer from the current TermInfo in the enumeration. |
| - Initially invalid, valid after next() called for the first time.*/ |
| - final long proxPointer() { |
| - return termInfo.proxPointer; |
| - } |
| - |
| - /** Closes the enumeration to further activity, freeing resources. */ |
| - public final void close() throws IOException { |
| - input.close(); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java (working copy) |
| @@ -1,298 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.StoredFieldsReader; |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.FieldInfo; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexFormatTooNewException; |
| -import org.apache.lucene.index.IndexFormatTooOldException; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.StoredFieldVisitor; |
| -import org.apache.lucene.store.AlreadyClosedException; |
| -import org.apache.lucene.store.CompoundFileDirectory; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -import java.io.Closeable; |
| - |
| -/** |
| - * Class responsible for access to stored document fields. |
| - * <p/> |
| - * It uses <segment>.fdt and <segment>.fdx; files. |
| - * |
| - * @deprecated |
| - */ |
| -@Deprecated |
| -final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable { |
| - private final static int FORMAT_SIZE = 4; |
| - |
| - /** Extension of stored fields file */ |
| - public static final String FIELDS_EXTENSION = "fdt"; |
| - |
| - /** Extension of stored fields index file */ |
| - public static final String FIELDS_INDEX_EXTENSION = "fdx"; |
| - |
| - // Lucene 3.0: Removal of compressed fields |
| - static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; |
| - |
| - // Lucene 3.2: NumericFields are stored in binary format |
| - static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3; |
| - |
| - // NOTE: if you introduce a new format, make it 1 higher |
| - // than the current one, and always change this if you |
| - // switch to a new format! |
| - public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS; |
| - |
| - // when removing support for old versions, leave the last supported version here |
| - static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; |
| - |
| - // NOTE: bit 0 is free here! You can steal it! |
| - public static final int FIELD_IS_BINARY = 1 << 1; |
| - |
| - // the old bit 1 << 2 was compressed, is now left out |
| - |
| - private static final int _NUMERIC_BIT_SHIFT = 3; |
| - static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT; |
| - |
| - public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT; |
| - public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT; |
| - public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT; |
| - public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT; |
| - |
| - private final FieldInfos fieldInfos; |
| - private final IndexInput fieldsStream; |
| - private final IndexInput indexStream; |
| - private int numTotalDocs; |
| - private int size; |
| - private boolean closed; |
| - private final int format; |
| - |
| - // The docID offset where our docs begin in the index |
| - // file. This will be 0 if we have our own private file. |
| - private int docStoreOffset; |
| - |
| - // when we are inside a compound share doc store (CFX), |
| - // (lucene 3.0 indexes only), we privately open our own fd. |
| - private final CompoundFileDirectory storeCFSReader; |
| - |
| - /** Returns a cloned FieldsReader that shares open |
| - * IndexInputs with the original one. It is the caller's |
| - * job not to close the original FieldsReader until all |
| - * clones are called (eg, currently SegmentReader manages |
| - * this logic). */ |
| - @Override |
| - public Lucene3xStoredFieldsReader clone() { |
| - ensureOpen(); |
| - return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone()); |
| - } |
| - |
| - /** Verifies that the code version which wrote the segment is supported. */ |
| - public static void checkCodeVersion(Directory dir, String segment) throws IOException { |
| - final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); |
| - IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT); |
| - |
| - try { |
| - int format = idxStream.readInt(); |
| - if (format < FORMAT_MINIMUM) |
| - throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - if (format > FORMAT_CURRENT) |
| - throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - } finally { |
| - idxStream.close(); |
| - } |
| - } |
| - |
| - // Used only by clone |
| - private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, |
| - IndexInput fieldsStream, IndexInput indexStream) { |
| - this.fieldInfos = fieldInfos; |
| - this.numTotalDocs = numTotalDocs; |
| - this.size = size; |
| - this.format = format; |
| - this.docStoreOffset = docStoreOffset; |
| - this.fieldsStream = fieldsStream; |
| - this.indexStream = indexStream; |
| - this.storeCFSReader = null; |
| - } |
| - |
| - public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { |
| - final String segment = Lucene3xSegmentInfoFormat.getDocStoreSegment(si); |
| - final int docStoreOffset = Lucene3xSegmentInfoFormat.getDocStoreOffset(si); |
| - final int size = si.getDocCount(); |
| - boolean success = false; |
| - fieldInfos = fn; |
| - try { |
| - if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(si)) { |
| - d = storeCFSReader = new CompoundFileDirectory(si.dir, |
| - IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); |
| - } else { |
| - storeCFSReader = null; |
| - } |
| - fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context); |
| - final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); |
| - indexStream = d.openInput(indexStreamFN, context); |
| - |
| - format = indexStream.readInt(); |
| - |
| - if (format < FORMAT_MINIMUM) |
| - throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - if (format > FORMAT_CURRENT) |
| - throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); |
| - |
| - final long indexSize = indexStream.length() - FORMAT_SIZE; |
| - |
| - if (docStoreOffset != -1) { |
| - // We read only a slice out of this shared fields file |
| - this.docStoreOffset = docStoreOffset; |
| - this.size = size; |
| - |
| - // Verify the file is long enough to hold all of our |
| - // docs |
| - assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset; |
| - } else { |
| - this.docStoreOffset = 0; |
| - this.size = (int) (indexSize >> 3); |
| - // Verify two sources of "maxDoc" agree: |
| - if (this.size != si.getDocCount()) { |
| - throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.getDocCount()); |
| - } |
| - } |
| - numTotalDocs = (int) (indexSize >> 3); |
| - success = true; |
| - } finally { |
| - // With lock-less commits, it's entirely possible (and |
| - // fine) to hit a FileNotFound exception above. In |
| - // this case, we want to explicitly close any subset |
| - // of things that were opened so that we don't have to |
| - // wait for a GC to do so. |
| - if (!success) { |
| - close(); |
| - } |
| - } |
| - } |
| - |
| - /** |
| - * @throws AlreadyClosedException if this FieldsReader is closed |
| - */ |
| - private void ensureOpen() throws AlreadyClosedException { |
| - if (closed) { |
| - throw new AlreadyClosedException("this FieldsReader is closed"); |
| - } |
| - } |
| - |
| - /** |
| - * Closes the underlying {@link org.apache.lucene.store.IndexInput} streams. |
| - * This means that the Fields values will not be accessible. |
| - * |
| - * @throws IOException |
| - */ |
| - public final void close() throws IOException { |
| - if (!closed) { |
| - IOUtils.close(fieldsStream, indexStream, storeCFSReader); |
| - closed = true; |
| - } |
| - } |
| - |
| - private void seekIndex(int docID) throws IOException { |
| - indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); |
| - } |
| - |
| - public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { |
| - seekIndex(n); |
| - fieldsStream.seek(indexStream.readLong()); |
| - |
| - final int numFields = fieldsStream.readVInt(); |
| - for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { |
| - int fieldNumber = fieldsStream.readVInt(); |
| - FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); |
| - |
| - int bits = fieldsStream.readByte() & 0xFF; |
| - assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); |
| - |
| - switch(visitor.needsField(fieldInfo)) { |
| - case YES: |
| - readField(visitor, fieldInfo, bits); |
| - break; |
| - case NO: |
| - skipField(bits); |
| - break; |
| - case STOP: |
| - return; |
| - } |
| - } |
| - } |
| - |
| - private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException { |
| - final int numeric = bits & FIELD_IS_NUMERIC_MASK; |
| - if (numeric != 0) { |
| - switch(numeric) { |
| - case FIELD_IS_NUMERIC_INT: |
| - visitor.intField(info, fieldsStream.readInt()); |
| - return; |
| - case FIELD_IS_NUMERIC_LONG: |
| - visitor.longField(info, fieldsStream.readLong()); |
| - return; |
| - case FIELD_IS_NUMERIC_FLOAT: |
| - visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt())); |
| - return; |
| - case FIELD_IS_NUMERIC_DOUBLE: |
| - visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong())); |
| - return; |
| - default: |
| - throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric)); |
| - } |
| - } else { |
| - final int length = fieldsStream.readVInt(); |
| - byte bytes[] = new byte[length]; |
| - fieldsStream.readBytes(bytes, 0, length); |
| - if ((bits & FIELD_IS_BINARY) != 0) { |
| - visitor.binaryField(info, bytes, 0, bytes.length); |
| - } else { |
| - visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8)); |
| - } |
| - } |
| - } |
| - |
| - private void skipField(int bits) throws IOException { |
| - final int numeric = bits & FIELD_IS_NUMERIC_MASK; |
| - if (numeric != 0) { |
| - switch(numeric) { |
| - case FIELD_IS_NUMERIC_INT: |
| - case FIELD_IS_NUMERIC_FLOAT: |
| - fieldsStream.readInt(); |
| - return; |
| - case FIELD_IS_NUMERIC_LONG: |
| - case FIELD_IS_NUMERIC_DOUBLE: |
| - fieldsStream.readLong(); |
| - return; |
| - default: |
| - throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric)); |
| - } |
| - } else { |
| - final int length = fieldsStream.readVInt(); |
| - fieldsStream.seek(fieldsStream.getFilePointer() + length); |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (working copy) |
| @@ -1,257 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Comparator; |
| -import java.util.List; |
| - |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.util.BitUtil; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.MathUtil; |
| -import org.apache.lucene.util.PagedBytes.PagedBytesDataInput; |
| -import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput; |
| -import org.apache.lucene.util.PagedBytes; |
| -import org.apache.lucene.util.packed.GrowableWriter; |
| -import org.apache.lucene.util.packed.PackedInts; |
| - |
| -/** |
| - * This stores a monotonically increasing set of <Term, TermInfo> pairs in an |
| - * index segment. Pairs are accessed either by Term or by ordinal position the |
| - * set. The Terms and TermInfo are actually serialized and stored into a byte |
| - * array and pointers to the position of each are stored in a int array. |
| - * @deprecated |
| - */ |
| -@Deprecated |
| -class TermInfosReaderIndex { |
| - |
| - private static final int MAX_PAGE_BITS = 18; // 256 KB block |
| - private Term[] fields; |
| - private int totalIndexInterval; |
| - private Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - private final PagedBytesDataInput dataInput; |
| - private final PackedInts.Reader indexToDataOffset; |
| - private final int indexSize; |
| - private final int skipInterval; |
| - |
| - /** |
| - * Loads the segment information at segment load time. |
| - * |
| - * @param indexEnum |
| - * the term enum. |
| - * @param indexDivisor |
| - * the index divisor. |
| - * @param tiiFileLength |
| - * the size of the tii file, used to approximate the size of the |
| - * buffer. |
| - * @param totalIndexInterval |
| - * the total index interval. |
| - */ |
| - TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { |
| - this.totalIndexInterval = totalIndexInterval; |
| - indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; |
| - skipInterval = indexEnum.skipInterval; |
| - // this is only an inital size, it will be GCed once the build is complete |
| - long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; |
| - PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); |
| - PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); |
| - |
| - final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); |
| - GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); |
| - |
| - String currentField = null; |
| - List<String> fieldStrs = new ArrayList<String>(); |
| - int fieldCounter = -1; |
| - for (int i = 0; indexEnum.next(); i++) { |
| - Term term = indexEnum.term(); |
| - if (currentField == null || !currentField.equals(term.field())) { |
| - currentField = term.field(); |
| - fieldStrs.add(currentField); |
| - fieldCounter++; |
| - } |
| - TermInfo termInfo = indexEnum.termInfo(); |
| - indexToTerms.set(i, dataOutput.getPosition()); |
| - dataOutput.writeVInt(fieldCounter); |
| - dataOutput.writeString(term.text()); |
| - dataOutput.writeVInt(termInfo.docFreq); |
| - if (termInfo.docFreq >= skipInterval) { |
| - dataOutput.writeVInt(termInfo.skipOffset); |
| - } |
| - dataOutput.writeVLong(termInfo.freqPointer); |
| - dataOutput.writeVLong(termInfo.proxPointer); |
| - dataOutput.writeVLong(indexEnum.indexPointer); |
| - for (int j = 1; j < indexDivisor; j++) { |
| - if (!indexEnum.next()) { |
| - break; |
| - } |
| - } |
| - } |
| - |
| - fields = new Term[fieldStrs.size()]; |
| - for (int i = 0; i < fields.length; i++) { |
| - fields[i] = new Term(fieldStrs.get(i)); |
| - } |
| - |
| - dataPagedBytes.freeze(true); |
| - dataInput = dataPagedBytes.getDataInput(); |
| - indexToDataOffset = indexToTerms.getMutable(); |
| - } |
| - |
| - private static int estimatePageBits(long estSize) { |
| - return Math.max(Math.min(64 - BitUtil.nlz(estSize), MAX_PAGE_BITS), 4); |
| - } |
| - |
| - void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException { |
| - PagedBytesDataInput input = dataInput.clone(); |
| - |
| - input.setPosition(indexToDataOffset.get(indexOffset)); |
| - |
| - // read the term |
| - int fieldId = input.readVInt(); |
| - Term field = fields[fieldId]; |
| - Term term = new Term(field.field(), input.readString()); |
| - |
| - // read the terminfo |
| - TermInfo termInfo = new TermInfo(); |
| - termInfo.docFreq = input.readVInt(); |
| - if (termInfo.docFreq >= skipInterval) { |
| - termInfo.skipOffset = input.readVInt(); |
| - } else { |
| - termInfo.skipOffset = 0; |
| - } |
| - termInfo.freqPointer = input.readVLong(); |
| - termInfo.proxPointer = input.readVLong(); |
| - |
| - long pointer = input.readVLong(); |
| - |
| - // perform the seek |
| - enumerator.seek(pointer, ((long) indexOffset * totalIndexInterval) - 1, term, termInfo); |
| - } |
| - |
| - /** |
| - * Binary search for the given term. |
| - * |
| - * @param term |
| - * the term to locate. |
| - * @throws IOException |
| - */ |
| - int getIndexOffset(Term term) throws IOException { |
| - int lo = 0; |
| - int hi = indexSize - 1; |
| - PagedBytesDataInput input = dataInput.clone(); |
| - BytesRef scratch = new BytesRef(); |
| - while (hi >= lo) { |
| - int mid = (lo + hi) >>> 1; |
| - int delta = compareTo(term, mid, input, scratch); |
| - if (delta < 0) |
| - hi = mid - 1; |
| - else if (delta > 0) |
| - lo = mid + 1; |
| - else |
| - return mid; |
| - } |
| - return hi; |
| - } |
| - |
| - /** |
| - * Gets the term at the given position. For testing. |
| - * |
| - * @param termIndex |
| - * the position to read the term from the index. |
| - * @return the term. |
| - * @throws IOException |
| - */ |
| - Term getTerm(int termIndex) throws IOException { |
| - PagedBytesDataInput input = dataInput.clone(); |
| - input.setPosition(indexToDataOffset.get(termIndex)); |
| - |
| - // read the term |
| - int fieldId = input.readVInt(); |
| - Term field = fields[fieldId]; |
| - return new Term(field.field(), input.readString()); |
| - } |
| - |
| - /** |
| - * Returns the number of terms. |
| - * |
| - * @return int. |
| - */ |
| - int length() { |
| - return indexSize; |
| - } |
| - |
| - /** |
| - * The compares the given term against the term in the index specified by the |
| - * term index. ie It returns negative N when term is less than index term; |
| - * |
| - * @param term |
| - * the given term. |
| - * @param termIndex |
| - * the index of the of term to compare. |
| - * @return int. |
| - * @throws IOException |
| - */ |
| - int compareTo(Term term, int termIndex) throws IOException { |
| - return compareTo(term, termIndex, dataInput.clone(), new BytesRef()); |
| - } |
| - |
| - /** |
| - * Compare the fields of the terms first, and if not equals return from |
| - * compare. If equal compare terms. |
| - * |
| - * @param term |
| - * the term to compare. |
| - * @param termIndex |
| - * the position of the term in the input to compare |
| - * @param input |
| - * the input buffer. |
| - * @return int. |
| - * @throws IOException |
| - */ |
| - private int compareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRef reuse) throws IOException { |
| - // if term field does not equal mid's field index, then compare fields |
| - // else if they are equal, compare term's string values... |
| - int c = compareField(term, termIndex, input); |
| - if (c == 0) { |
| - reuse.length = input.readVInt(); |
| - reuse.grow(reuse.length); |
| - input.readBytes(reuse.bytes, 0, reuse.length); |
| - return comparator.compare(term.bytes(), reuse); |
| - } |
| - return c; |
| - } |
| - |
| - /** |
| - * Compares the fields before checking the text of the terms. |
| - * |
| - * @param term |
| - * the given term. |
| - * @param termIndex |
| - * the term that exists in the data block. |
| - * @param input |
| - * the data block. |
| - * @return int. |
| - * @throws IOException |
| - */ |
| - private int compareField(Term term, int termIndex, PagedBytesDataInput input) throws IOException { |
| - input.setPosition(indexToDataOffset.get(termIndex)); |
| - return term.field().compareTo(fields[input.readVInt()].field()); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfo.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfo.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfo.java (working copy) |
| @@ -1,63 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -/** A TermInfo is the record of information stored for a |
| - * term |
| - * @deprecated (4.0) This class is no longer used in flexible |
| - * indexing. */ |
| - |
| -@Deprecated |
| -class TermInfo { |
| - /** The number of documents which contain the term. */ |
| - public int docFreq = 0; |
| - |
| - public long freqPointer = 0; |
| - public long proxPointer = 0; |
| - public int skipOffset; |
| - |
| - public TermInfo() {} |
| - |
| - public TermInfo(int df, long fp, long pp) { |
| - docFreq = df; |
| - freqPointer = fp; |
| - proxPointer = pp; |
| - } |
| - |
| - public TermInfo(TermInfo ti) { |
| - docFreq = ti.docFreq; |
| - freqPointer = ti.freqPointer; |
| - proxPointer = ti.proxPointer; |
| - skipOffset = ti.skipOffset; |
| - } |
| - |
| - public final void set(int docFreq, |
| - long freqPointer, long proxPointer, int skipOffset) { |
| - this.docFreq = docFreq; |
| - this.freqPointer = freqPointer; |
| - this.proxPointer = proxPointer; |
| - this.skipOffset = skipOffset; |
| - } |
| - |
| - public final void set(TermInfo ti) { |
| - docFreq = ti.docFreq; |
| - freqPointer = ti.freqPointer; |
| - proxPointer = ti.proxPointer; |
| - skipOffset = ti.skipOffset; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java (working copy) |
| @@ -1,151 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.HashSet; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.codecs.DocValuesFormat; |
| -import org.apache.lucene.codecs.FieldInfosFormat; |
| -import org.apache.lucene.codecs.LiveDocsFormat; |
| -import org.apache.lucene.codecs.NormsFormat; |
| -import org.apache.lucene.codecs.PerDocConsumer; |
| -import org.apache.lucene.codecs.PerDocProducer; |
| -import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.SegmentInfoFormat; |
| -import org.apache.lucene.codecs.StoredFieldsFormat; |
| -import org.apache.lucene.codecs.TermVectorsFormat; |
| -import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.PerDocWriteState; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.SegmentInfoPerCommit; |
| -import org.apache.lucene.index.SegmentReadState; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.util.MutableBits; |
| - |
| -/** |
| - * Supports the Lucene 3.x index format (readonly) |
| - * @deprecated |
| - */ |
| -@Deprecated |
| -public class Lucene3xCodec extends Codec { |
| - public Lucene3xCodec() { |
| - super("Lucene3x"); |
| - } |
| - |
| - private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat(); |
| - |
| - private final StoredFieldsFormat fieldsFormat = new Lucene3xStoredFieldsFormat(); |
| - |
| - private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat(); |
| - |
| - private final FieldInfosFormat fieldInfosFormat = new Lucene3xFieldInfosFormat(); |
| - |
| - private final SegmentInfoFormat infosFormat = new Lucene3xSegmentInfoFormat(); |
| - |
| - private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat(); |
| - |
| - /** Extension of compound file for doc store files*/ |
| - static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; |
| - |
| - // TODO: this should really be a different impl |
| - private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() { |
| - @Override |
| - public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfoPerCommit info, int newDelCount, IOContext context) throws IOException { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| - }; |
| - |
| - // 3.x doesn't support docvalues |
| - private final DocValuesFormat docValuesFormat = new DocValuesFormat() { |
| - @Override |
| - public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { |
| - return null; |
| - } |
| - |
| - @Override |
| - public PerDocProducer docsProducer(SegmentReadState state) throws IOException { |
| - return null; |
| - } |
| - }; |
| - |
| - @Override |
| - public PostingsFormat postingsFormat() { |
| - return postingsFormat; |
| - } |
| - |
| - @Override |
| - public DocValuesFormat docValuesFormat() { |
| - return docValuesFormat; |
| - } |
| - |
| - @Override |
| - public StoredFieldsFormat storedFieldsFormat() { |
| - return fieldsFormat; |
| - } |
| - |
| - @Override |
| - public TermVectorsFormat termVectorsFormat() { |
| - return vectorsFormat; |
| - } |
| - |
| - @Override |
| - public FieldInfosFormat fieldInfosFormat() { |
| - return fieldInfosFormat; |
| - } |
| - |
| - @Override |
| - public SegmentInfoFormat segmentInfoFormat() { |
| - return infosFormat; |
| - } |
| - |
| - @Override |
| - public NormsFormat normsFormat() { |
| - return normsFormat; |
| - } |
| - |
| - @Override |
| - public LiveDocsFormat liveDocsFormat() { |
| - return liveDocsFormat; |
| - } |
| - |
| - /** Returns file names for shared doc stores, if any, else |
| - * null. */ |
| - public static Set<String> getDocStoreFiles(SegmentInfo info) { |
| - if (Lucene3xSegmentInfoFormat.getDocStoreOffset(info) != -1) { |
| - final String dsName = Lucene3xSegmentInfoFormat.getDocStoreSegment(info); |
| - Set<String> files = new HashSet<String>(); |
| - if (Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(info)) { |
| - files.add(IndexFileNames.segmentFileName(dsName, "", COMPOUND_FILE_STORE_EXTENSION)); |
| - } else { |
| - files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); |
| - files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); |
| - files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); |
| - files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); |
| - files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); |
| - } |
| - return files; |
| - } else { |
| - return null; |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java (working copy) |
| @@ -1,64 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.FieldsConsumer; |
| -import org.apache.lucene.codecs.FieldsProducer; |
| -import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.index.SegmentWriteState; |
| -import org.apache.lucene.index.SegmentReadState; |
| - |
| -/** Codec that reads the pre-flex-indexing postings |
| - * format. It does not provide a writer because newly |
| - * written segments should use the Codec configured on IndexWriter. |
| - * |
| - * @deprecated (4.0) This is only used to read indexes created |
| - * before 4.0. |
| - * @lucene.experimental |
| - */ |
| -@Deprecated |
| -class Lucene3xPostingsFormat extends PostingsFormat { |
| - |
| - /** Extension of terms file */ |
| - public static final String TERMS_EXTENSION = "tis"; |
| - |
| - /** Extension of terms index file */ |
| - public static final String TERMS_INDEX_EXTENSION = "tii"; |
| - |
| - /** Extension of freq postings file */ |
| - public static final String FREQ_EXTENSION = "frq"; |
| - |
| - /** Extension of prox postings file */ |
| - public static final String PROX_EXTENSION = "prx"; |
| - |
| - public Lucene3xPostingsFormat() { |
| - super("Lucene3x"); |
| - } |
| - |
| - @Override |
| - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| - throw new UnsupportedOperationException("this codec can only be used for reading"); |
| - } |
| - |
| - @Override |
| - public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| - return new Lucene3xFields(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.termsIndexDivisor); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java (working copy) |
| @@ -1,351 +0,0 @@ |
| -package org.apache.lucene.codecs.lucene3x; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Comparator; |
| - |
| -import org.apache.lucene.index.CorruptIndexException; |
| -import org.apache.lucene.index.FieldInfos; |
| -import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.CloseableThreadLocal; |
| -import org.apache.lucene.util.DoubleBarrelLRUCache; |
| - |
| -/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a |
| - * Directory. Pairs are accessed either by Term or by ordinal position the |
| - * set |
| - * @deprecated (4.0) This class has been replaced by |
| - * FormatPostingsTermsDictReader, except for reading old segments. |
| - * @lucene.experimental |
| - */ |
| -@Deprecated |
| -final class TermInfosReader { |
| - private final Directory directory; |
| - private final String segment; |
| - private final FieldInfos fieldInfos; |
| - |
| - private final CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>(); |
| - private final SegmentTermEnum origEnum; |
| - private final long size; |
| - |
| - private final TermInfosReaderIndex index; |
| - private final int indexLength; |
| - |
| - private final int totalIndexInterval; |
| - |
| - private final static int DEFAULT_CACHE_SIZE = 1024; |
| - |
| - // Just adds term's ord to TermInfo |
| - private final static class TermInfoAndOrd extends TermInfo { |
| - final long termOrd; |
| - public TermInfoAndOrd(TermInfo ti, long termOrd) { |
| - super(ti); |
| - assert termOrd >= 0; |
| - this.termOrd = termOrd; |
| - } |
| - } |
| - |
| - private static class CloneableTerm extends DoubleBarrelLRUCache.CloneableKey { |
| - Term term; |
| - public CloneableTerm(Term t) { |
| - this.term = t; |
| - } |
| - |
| - @Override |
| - public boolean equals(Object other) { |
| - CloneableTerm t = (CloneableTerm) other; |
| - return this.term.equals(t.term); |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - return term.hashCode(); |
| - } |
| - |
| - @Override |
| - public CloneableTerm clone() { |
| - return new CloneableTerm(term); |
| - } |
| - } |
| - |
| - private final DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd> termsCache = new DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd>(DEFAULT_CACHE_SIZE); |
| - |
| - /** |
| - * Per-thread resources managed by ThreadLocal |
| - */ |
| - private static final class ThreadResources { |
| - SegmentTermEnum termEnum; |
| - } |
| - |
| - TermInfosReader(Directory dir, String seg, FieldInfos fis, IOContext context, int indexDivisor) |
| - throws CorruptIndexException, IOException { |
| - boolean success = false; |
| - |
| - if (indexDivisor < 1 && indexDivisor != -1) { |
| - throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); |
| - } |
| - |
| - try { |
| - directory = dir; |
| - segment = seg; |
| - fieldInfos = fis; |
| - |
| - origEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), |
| - context), fieldInfos, false); |
| - size = origEnum.size; |
| - |
| - |
| - if (indexDivisor != -1) { |
| - // Load terms index |
| - totalIndexInterval = origEnum.indexInterval * indexDivisor; |
| - |
| - final String indexFileName = IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); |
| - final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(indexFileName, |
| - context), fieldInfos, true); |
| - |
| - try { |
| - index = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.fileLength(indexFileName), totalIndexInterval); |
| - indexLength = index.length(); |
| - } finally { |
| - indexEnum.close(); |
| - } |
| - } else { |
| - // Do not load terms index: |
| - totalIndexInterval = -1; |
| - index = null; |
| - indexLength = -1; |
| - } |
| - success = true; |
| - } finally { |
| - // With lock-less commits, it's entirely possible (and |
| - // fine) to hit a FileNotFound exception above. In |
| - // this case, we want to explicitly close any subset |
| - // of things that were opened so that we don't have to |
| - // wait for a GC to do so. |
| - if (!success) { |
| - close(); |
| - } |
| - } |
| - } |
| - |
| - public int getSkipInterval() { |
| - return origEnum.skipInterval; |
| - } |
| - |
| - public int getMaxSkipLevels() { |
| - return origEnum.maxSkipLevels; |
| - } |
| - |
| - void close() throws IOException { |
| - if (origEnum != null) |
| - origEnum.close(); |
| - threadResources.close(); |
| - } |
| - |
| - /** Returns the number of term/value pairs in the set. */ |
| - long size() { |
| - return size; |
| - } |
| - |
| - private ThreadResources getThreadResources() { |
| - ThreadResources resources = threadResources.get(); |
| - if (resources == null) { |
| - resources = new ThreadResources(); |
| - resources.termEnum = terms(); |
| - threadResources.set(resources); |
| - } |
| - return resources; |
| - } |
| - |
| - private static final Comparator<BytesRef> legacyComparator = |
| - BytesRef.getUTF8SortedAsUTF16Comparator(); |
| - |
| - private final int compareAsUTF16(Term term1, Term term2) { |
| - if (term1.field().equals(term2.field())) { |
| - return legacyComparator.compare(term1.bytes(), term2.bytes()); |
| - } else { |
| - return term1.field().compareTo(term2.field()); |
| - } |
| - } |
| - |
| - /** Returns the TermInfo for a Term in the set, or null. */ |
| - TermInfo get(Term term) throws IOException { |
| - return get(term, false); |
| - } |
| - |
| - /** Returns the TermInfo for a Term in the set, or null. */ |
| - private TermInfo get(Term term, boolean mustSeekEnum) throws IOException { |
| - if (size == 0) return null; |
| - |
| - ensureIndexIsRead(); |
| - TermInfoAndOrd tiOrd = termsCache.get(new CloneableTerm(term)); |
| - ThreadResources resources = getThreadResources(); |
| - |
| - if (!mustSeekEnum && tiOrd != null) { |
| - return tiOrd; |
| - } |
| - |
| - return seekEnum(resources.termEnum, term, tiOrd, true); |
| - } |
| - |
| - public void cacheCurrentTerm(SegmentTermEnum enumerator) { |
| - termsCache.put(new CloneableTerm(enumerator.term()), |
| - new TermInfoAndOrd(enumerator.termInfo, |
| - enumerator.position)); |
| - } |
| - |
| - TermInfo seekEnum(SegmentTermEnum enumerator, Term term, boolean useCache) throws IOException { |
| - if (useCache) { |
| - return seekEnum(enumerator, term, |
| - termsCache.get(new CloneableTerm(term.deepCopyOf())), |
| - useCache); |
| - } else { |
| - return seekEnum(enumerator, term, null, useCache); |
| - } |
| - } |
| - |
| - TermInfo seekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd, boolean useCache) throws IOException { |
| - if (size == 0) { |
| - return null; |
| - } |
| - |
| - // optimize sequential access: first try scanning cached enum w/o seeking |
| - if (enumerator.term() != null // term is at or past current |
| - && ((enumerator.prev() != null && compareAsUTF16(term, enumerator.prev())> 0) |
| - || compareAsUTF16(term, enumerator.term()) >= 0)) { |
| - int enumOffset = (int)(enumerator.position/totalIndexInterval)+1; |
| - if (indexLength == enumOffset // but before end of block |
| - || index.compareTo(term, enumOffset) < 0) { |
| - // no need to seek |
| - |
| - final TermInfo ti; |
| - int numScans = enumerator.scanTo(term); |
| - if (enumerator.term() != null && compareAsUTF16(term, enumerator.term()) == 0) { |
| - ti = enumerator.termInfo; |
| - if (numScans > 1) { |
| - // we only want to put this TermInfo into the cache if |
| - // scanEnum skipped more than one dictionary entry. |
| - // This prevents RangeQueries or WildcardQueries to |
| - // wipe out the cache when they iterate over a large numbers |
| - // of terms in order |
| - if (tiOrd == null) { |
| - if (useCache) { |
| - termsCache.put(new CloneableTerm(term.deepCopyOf()), |
| - new TermInfoAndOrd(ti, enumerator.position)); |
| - } |
| - } else { |
| - assert sameTermInfo(ti, tiOrd, enumerator); |
| - assert (int) enumerator.position == tiOrd.termOrd; |
| - } |
| - } |
| - } else { |
| - ti = null; |
| - } |
| - |
| - return ti; |
| - } |
| - } |
| - |
| - // random-access: must seek |
| - final int indexPos; |
| - if (tiOrd != null) { |
| - indexPos = (int) (tiOrd.termOrd / totalIndexInterval); |
| - } else { |
| - // Must do binary search: |
| - indexPos = index.getIndexOffset(term); |
| - } |
| - |
| - index.seekEnum(enumerator, indexPos); |
| - enumerator.scanTo(term); |
| - final TermInfo ti; |
| - |
| - if (enumerator.term() != null && compareAsUTF16(term, enumerator.term()) == 0) { |
| - ti = enumerator.termInfo; |
| - if (tiOrd == null) { |
| - if (useCache) { |
| - termsCache.put(new CloneableTerm(term.deepCopyOf()), |
| - new TermInfoAndOrd(ti, enumerator.position)); |
| - } |
| - } else { |
| - assert sameTermInfo(ti, tiOrd, enumerator); |
| - assert enumerator.position == tiOrd.termOrd; |
| - } |
| - } else { |
| - ti = null; |
| - } |
| - return ti; |
| - } |
| - |
| - // called only from asserts |
| - private boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) { |
| - if (ti1.docFreq != ti2.docFreq) { |
| - return false; |
| - } |
| - if (ti1.freqPointer != ti2.freqPointer) { |
| - return false; |
| - } |
| - if (ti1.proxPointer != ti2.proxPointer) { |
| - return false; |
| - } |
| - // skipOffset is only valid when docFreq >= skipInterval: |
| - if (ti1.docFreq >= enumerator.skipInterval && |
| - ti1.skipOffset != ti2.skipOffset) { |
| - return false; |
| - } |
| - return true; |
| - } |
| - |
| - private void ensureIndexIsRead() { |
| - if (index == null) { |
| - throw new IllegalStateException("terms index was not loaded when this reader was created"); |
| - } |
| - } |
| - |
| - /** Returns the position of a Term in the set or -1. */ |
| - long getPosition(Term term) throws IOException { |
| - if (size == 0) return -1; |
| - |
| - ensureIndexIsRead(); |
| - int indexOffset = index.getIndexOffset(term); |
| - |
| - SegmentTermEnum enumerator = getThreadResources().termEnum; |
| - index.seekEnum(enumerator, indexOffset); |
| - |
| - while(compareAsUTF16(term, enumerator.term()) > 0 && enumerator.next()) {} |
| - |
| - if (compareAsUTF16(term, enumerator.term()) == 0) |
| - return enumerator.position; |
| - else |
| - return -1; |
| - } |
| - |
| - /** Returns an enumeration of all the Terms and TermInfos in the set. */ |
| - public SegmentTermEnum terms() { |
| - return origEnum.clone(); |
| - } |
| - |
| - /** Returns an enumeration of terms starting at or after the named term. */ |
| - public SegmentTermEnum terms(Term term) throws IOException { |
| - get(term, true); |
| - return getThreadResources().termEnum.clone(); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java (working copy) |
| @@ -28,7 +28,7 @@ |
| /** @lucene.internal */ |
| public IndexFormatTooOldException(String resourceDesc, String version) { |
| super("Format version is not supported (resource: " + resourceDesc + "): " + |
| - version + ". This version of Lucene only supports indexes created with release 3.0 and later."); |
| + version + ". This version of Lucene only supports indexes created with release 4.0 and later."); |
| assert resourceDesc != null; |
| } |
| |
| @@ -41,7 +41,7 @@ |
| public IndexFormatTooOldException(String resourceDesc, int version, int minVersion, int maxVersion) { |
| super("Format version is not supported (resource: " + resourceDesc + "): " + |
| version + " (needs to be between " + minVersion + " and " + maxVersion + |
| - "). This version of Lucene only supports indexes created with release 3.0 and later."); |
| + "). This version of Lucene only supports indexes created with release 4.0 and later."); |
| assert resourceDesc != null; |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) |
| @@ -232,7 +232,6 @@ |
| assert w != null; |
| boolean hasDeletions = w.numDeletedDocs(info) > 0; |
| return !hasDeletions && |
| - !info.info.hasSeparateNorms() && |
| info.info.dir == w.getDirectory() && |
| (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) |
| @@ -33,8 +33,6 @@ |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; |
| import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; |
| import org.apache.lucene.index.FieldInfos.FieldNumbers; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| @@ -2223,7 +2221,7 @@ |
| assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; |
| |
| String newSegName = newSegmentName(); |
| - String dsName = Lucene3xSegmentInfoFormat.getDocStoreSegment(info.info); |
| + String dsName = info.info.name; |
| |
| if (infoStream.isEnabled("IW")) { |
| infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); |
| @@ -2355,6 +2353,7 @@ |
| } |
| |
| /** Copies the segment files as-is into the IndexWriter's directory. */ |
| + // TODO: this can be substantially simplified now that 3.x support/shared docstores is removed! |
| private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, |
| Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context, |
| Set<String> copiedFiles) |
| @@ -2363,7 +2362,7 @@ |
| // only relevant for segments that share doc store with others, |
| // because the DS might have been copied already, in which case we |
| // just want to update the DS name of this SegmentInfo. |
| - final String dsName = Lucene3xSegmentInfoFormat.getDocStoreSegment(info.info); |
| + final String dsName = info.info.name; |
| assert dsName != null; |
| final String newDsName; |
| if (dsNames.containsKey(dsName)) { |
| @@ -2377,8 +2376,6 @@ |
| // so we don't pass a null value to the si writer |
| FieldInfos fis = getFieldInfos(info.info); |
| |
| - Set<String> docStoreFiles3xOnly = Lucene3xCodec.getDocStoreFiles(info.info); |
| - |
| final Map<String,String> attributes; |
| // copy the attributes map, we might modify it below. |
| // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something). |
| @@ -2387,12 +2384,6 @@ |
| } else { |
| attributes = new HashMap<String,String>(info.info.attributes()); |
| } |
| - if (docStoreFiles3xOnly != null) { |
| - // only violate the codec this way if it's preflex & |
| - // shares doc stores |
| - // change docStoreSegment to newDsName |
| - attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, newDsName); |
| - } |
| |
| //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); |
| // Same SI as before but we change directory, name and docStoreSegment: |
| @@ -2407,11 +2398,7 @@ |
| // before writing SegmentInfo: |
| for (String file: info.files()) { |
| final String newFileName; |
| - if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.contains(file)) { |
| - newFileName = newDsName + IndexFileNames.stripSegmentName(file); |
| - } else { |
| - newFileName = segName + IndexFileNames.stripSegmentName(file); |
| - } |
| + newFileName = segName + IndexFileNames.stripSegmentName(file); |
| segFiles.add(newFileName); |
| } |
| newInfo.setFiles(segFiles); |
| @@ -2432,16 +2419,7 @@ |
| // Copy the segment's files |
| for (String file: info.files()) { |
| |
| - final String newFileName; |
| - if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.contains(file)) { |
| - newFileName = newDsName + IndexFileNames.stripSegmentName(file); |
| - if (dsFilesCopied.contains(newFileName)) { |
| - continue; |
| - } |
| - dsFilesCopied.add(newFileName); |
| - } else { |
| - newFileName = segName + IndexFileNames.stripSegmentName(file); |
| - } |
| + final String newFileName = segName + IndexFileNames.stripSegmentName(file); |
| |
| if (siFiles.contains(newFileName)) { |
| // We already rewrote this above |
| @@ -4016,7 +3994,6 @@ |
| if (infoStream.isEnabled("IW")) { |
| infoStream.message("IW", "create compound file " + fileName); |
| } |
| - assert Lucene3xSegmentInfoFormat.getDocStoreOffset(info) == -1; |
| // Now merge all added files |
| Collection<String> files = info.files(); |
| CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (working copy) |
| @@ -33,9 +33,6 @@ |
| |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.LiveDocsFormat; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoReader; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.ChecksumIndexOutput; |
| import org.apache.lucene.store.DataOutput; // javadocs |
| @@ -274,32 +271,29 @@ |
| |
| ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ)); |
| try { |
| - final int format = input.readInt(); |
| - if (format == CodecUtil.CODEC_MAGIC) { |
| - // 4.0+ |
| - CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_40); |
| - version = input.readLong(); |
| - counter = input.readInt(); |
| - int numSegments = input.readInt(); |
| - for(int seg=0;seg<numSegments;seg++) { |
| - String segName = input.readString(); |
| - Codec codec = Codec.forName(input.readString()); |
| - //System.out.println("SIS.read seg=" + seg + " codec=" + codec); |
| - SegmentInfo info = codec.segmentInfoFormat().getSegmentInfosReader().read(directory, segName, IOContext.READ); |
| - info.setCodec(codec); |
| - long delGen = input.readLong(); |
| - int delCount = input.readInt(); |
| - assert delCount <= info.getDocCount(); |
| - add(new SegmentInfoPerCommit(info, delCount, delGen)); |
| - } |
| - userData = input.readStringStringMap(); |
| - } else { |
| - Lucene3xSegmentInfoReader.readLegacyInfos(this, directory, input, format); |
| - Codec codec = Codec.forName("Lucene3x"); |
| - for (SegmentInfoPerCommit info : this) { |
| - info.info.setCodec(codec); |
| - } |
| + // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need |
| + // to read the magic ourselves. |
| + int magic = input.readInt(); |
| + if (magic != CodecUtil.CODEC_MAGIC) { |
| + throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); |
| } |
| + // 4.0+ |
| + CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_40); |
| + version = input.readLong(); |
| + counter = input.readInt(); |
| + int numSegments = input.readInt(); |
| + for(int seg=0;seg<numSegments;seg++) { |
| + String segName = input.readString(); |
| + Codec codec = Codec.forName(input.readString()); |
| + //System.out.println("SIS.read seg=" + seg + " codec=" + codec); |
| + SegmentInfo info = codec.segmentInfoFormat().getSegmentInfosReader().read(directory, segName, IOContext.READ); |
| + info.setCodec(codec); |
| + long delGen = input.readLong(); |
| + int delCount = input.readInt(); |
| + assert delCount <= info.getDocCount(); |
| + add(new SegmentInfoPerCommit(info, delCount, delGen)); |
| + } |
| + userData = input.readStringStringMap(); |
| |
| final long checksumNow = input.getChecksum(); |
| final long checksumThen = input.readLong(); |
| @@ -351,8 +345,6 @@ |
| ChecksumIndexOutput segnOutput = null; |
| boolean success = false; |
| |
| - final Set<String> upgradedSIFiles = new HashSet<String>(); |
| - |
| try { |
| segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName, IOContext.DEFAULT)); |
| CodecUtil.writeHeader(segnOutput, "segments", VERSION_40); |
| @@ -368,16 +360,6 @@ |
| assert si.dir == directory; |
| |
| assert siPerCommit.getDelCount() <= si.getDocCount(); |
| - |
| - // If this segment is pre-4.x, perform a one-time |
| - // "ugprade" to write the .si file for it: |
| - String version = si.getVersion(); |
| - if (version == null || StringHelper.getVersionComparator().compare(version, "4.0") < 0) { |
| - String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); |
| - if (!directory.fileExists(fileName)) { |
| - upgradedSIFiles.add(write3xInfo(directory, si, IOContext.DEFAULT)); |
| - } |
| - } |
| } |
| segnOutput.writeStringStringMap(userData); |
| pendingSegnOutput = segnOutput; |
| @@ -388,14 +370,6 @@ |
| // but suppress any exception: |
| IOUtils.closeWhileHandlingException(segnOutput); |
| |
| - for(String fileName : upgradedSIFiles) { |
| - try { |
| - directory.deleteFile(fileName); |
| - } catch (Throwable t) { |
| - // Suppress so we keep throwing the original exception |
| - } |
| - } |
| - |
| try { |
| // Try not to leave a truncated segments_N file in |
| // the index: |
| @@ -407,49 +381,6 @@ |
| } |
| } |
| |
| - @Deprecated |
| - public static String write3xInfo(Directory dir, SegmentInfo si, IOContext context) throws IOException { |
| - |
| - // NOTE: this is NOT how 3.x is really written... |
| - String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); |
| - si.addFile(fileName); |
| - |
| - //System.out.println("UPGRADE write " + fileName); |
| - boolean success = false; |
| - IndexOutput output = dir.createOutput(fileName, context); |
| - try { |
| - // we are about to write this SI in 3.x format, dropping all codec information, etc. |
| - // so it had better be a 3.x segment or you will get very confusing errors later. |
| - assert si.getCodec() instanceof Lucene3xCodec : "broken test, trying to mix preflex with other codecs"; |
| - CodecUtil.writeHeader(output, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, |
| - Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); |
| - // Write the Lucene version that created this segment, since 3.1 |
| - output.writeString(si.getVersion()); |
| - output.writeInt(si.getDocCount()); |
| - |
| - output.writeStringStringMap(si.attributes()); |
| - |
| - output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); |
| - output.writeStringStringMap(si.getDiagnostics()); |
| - output.writeStringSet(si.files()); |
| - |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(output); |
| - try { |
| - si.dir.deleteFile(fileName); |
| - } catch (Throwable t) { |
| - // Suppress so we keep throwing the original exception |
| - } |
| - } else { |
| - output.close(); |
| - } |
| - } |
| - |
| - return fileName; |
| - } |
| - |
| /** |
| * Returns a copy of this instance, also copying each |
| * SegmentInfo. |
| Index: lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java (working copy) |
| @@ -629,7 +629,6 @@ |
| assert w != null; |
| boolean hasDeletions = w.numDeletedDocs(info) > 0; |
| return !hasDeletions && |
| - !info.info.hasSeparateNorms() && |
| info.info.dir == w.getDirectory() && |
| (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1344053) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) |
| @@ -26,7 +26,6 @@ |
| import java.util.Set; |
| |
| import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.TrackingDirectoryWrapper; |
| |
| @@ -107,14 +106,6 @@ |
| } |
| |
| /** |
| - * @deprecated separate norms are not supported in >= 4.0 |
| - */ |
| - @Deprecated |
| - boolean hasSeparateNorms() { |
| - return getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_KEY) != null; |
| - } |
| - |
| - /** |
| * Mark whether this segment is stored as a compound file. |
| * |
| * @param isCompoundFile true if this is a compound file; |
| Index: lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec |
| =================================================================== |
| --- lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (revision 1344053) |
| +++ lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (working copy) |
| @@ -14,6 +14,5 @@ |
| # limitations under the License. |
| |
| org.apache.lucene.codecs.lucene40.Lucene40Codec |
| -org.apache.lucene.codecs.lucene3x.Lucene3xCodec |
| org.apache.lucene.codecs.simpletext.SimpleTextCodec |
| org.apache.lucene.codecs.appending.AppendingCodec |
| Index: lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java |
| =================================================================== |
| --- lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (revision 1344053) |
| +++ lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (working copy) |
| @@ -21,12 +21,10 @@ |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.CollationTestBase; |
| import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| |
| import java.text.Collator; |
| import java.util.Locale; |
| |
| -@SuppressCodecs("Lucene3x") |
| public class TestCollationKeyAnalyzer extends CollationTestBase { |
| // the sort order of Ø versus U depends on the version of the rules being used |
| // for the inherited root locale: Ø's order isnt specified in Locale.US since |
| Index: lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java |
| =================================================================== |
| --- lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (revision 1344053) |
| +++ lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (working copy) |
| @@ -22,13 +22,10 @@ |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.CollationTestBase; |
| -import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| |
| import java.util.Locale; |
| |
| -@SuppressCodecs("Lucene3x") |
| public class TestICUCollationKeyAnalyzer extends CollationTestBase { |
| |
| private Collator collator = Collator.getInstance(new Locale("fa")); |
| Index: solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java |
| =================================================================== |
| --- solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java (revision 1344053) |
| +++ solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java (working copy) |
| @@ -427,10 +427,7 @@ |
| /** |
| * test collection-level term stats (new in 4.x indexes) |
| */ |
| - public void testTotalTermFreq() throws Exception { |
| - assumeFalse("PreFlex codec does not support collection-level term stats", |
| - "Lucene3x".equals(Codec.getDefault().getName())); |
| - |
| + public void testTotalTermFreq() throws Exception { |
| clearIndex(); |
| |
| assertU(adoc("id","1", "a_tdt","2009-08-31T12:10:10.123Z", "b_tdt","2009-08-31T12:10:10.124Z")); |
| Index: solr/core/src/test/org/apache/solr/schema/TestCollationField.java |
| =================================================================== |
| --- solr/core/src/test/org/apache/solr/schema/TestCollationField.java (revision 1344053) |
| +++ solr/core/src/test/org/apache/solr/schema/TestCollationField.java (working copy) |
| @@ -25,15 +25,12 @@ |
| |
| import org.apache.commons.io.FileUtils; |
| import org.apache.commons.io.IOUtils; |
| -import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.junit.BeforeClass; |
| |
| /** |
| * Tests {@link CollationField} with TermQueries, RangeQueries, and sort order. |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestCollationField extends SolrTestCaseJ4 { |
| |
| @BeforeClass |
| Index: solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java |
| =================================================================== |
| --- solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java (revision 1344053) |
| +++ solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java (working copy) |
| @@ -17,14 +17,12 @@ |
| * limitations under the License. |
| */ |
| |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.junit.BeforeClass; |
| |
| /** |
| * Tests expert options of {@link ICUCollationField}. |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestICUCollationFieldOptions extends SolrTestCaseJ4 { |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| Index: solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java |
| =================================================================== |
| --- solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java (revision 1344053) |
| +++ solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java (working copy) |
| @@ -22,7 +22,6 @@ |
| |
| import org.apache.commons.io.FileUtils; |
| import org.apache.commons.io.IOUtils; |
| -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.junit.BeforeClass; |
| |
| @@ -33,7 +32,6 @@ |
| /** |
| * Tests {@link ICUCollationField} with TermQueries, RangeQueries, and sort order. |
| */ |
| -@SuppressCodecs("Lucene3x") |
| public class TestICUCollationField extends SolrTestCaseJ4 { |
| |
| @BeforeClass |