| package org.apache.lucene.index; |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.ByteArrayInputStream; |
| import java.io.DataInputStream; |
| import java.util.Arrays; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.ArrayList; |
| import java.util.Random; |
| |
| import org.apache.lucene.analysis.WhitespaceAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.Fieldable; |
| import org.apache.lucene.document.FieldSelector; |
| import org.apache.lucene.document.FieldSelectorResult; |
| import org.apache.lucene.index.FieldInfo.IndexOptions; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.document.NumericField; |
| import org.apache.lucene.search.FieldCache; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.NumericRangeQuery; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.util.ReaderUtil; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util._TestUtil; |
| import org.apache.lucene.util.Constants; |
| |
| /* |
| Verify we can read the pre-2.1 file format, do searches |
| against it, and add documents to it. |
| */ |
| |
| public class TestBackwardsCompatibility extends LuceneTestCase { |
| |
| // Uncomment these cases & run them on an older Lucene |
| // version, to generate an index to test backwards |
| // compatibility. Then, cd to build/test/index.cfs and |
| // run "zip index.<VERSION>.cfs.zip *"; cd to |
| // build/test/index.nocfs and run "zip |
| // index.<VERSION>.nocfs.zip *". Then move those 2 zip |
| // files to your trunk checkout and add them to the |
| // oldNames array. |
| |
| /* |
| public void testCreateCFS() throws IOException { |
| createIndex("index.cfs", true, false); |
| } |
| |
| public void testCreateNoCFS() throws IOException { |
| createIndex("index.nocfs", false, false); |
| } |
| */ |
| |
| /* |
| // These are only needed for the special upgrade test to verify |
| // that also optimized indexes are correctly upgraded by IndexUpgrader. |
| // You don't need them to be build for non-3.1 (the test is happy with just one |
| // "old" segment format, version is unimportant: |
| |
| public void testCreateOptimizedCFS() throws IOException { |
| createIndex("index.optimized.cfs", true, true); |
| } |
| |
| public void testCreateOptimizedNoCFS() throws IOException { |
| createIndex("index.optimized.nocfs", false, true); |
| } |
| */ |
| |
| final String[] oldNames = {"19.cfs", |
| "19.nocfs", |
| "20.cfs", |
| "20.nocfs", |
| "21.cfs", |
| "21.nocfs", |
| "22.cfs", |
| "22.nocfs", |
| "23.cfs", |
| "23.nocfs", |
| "24.cfs", |
| "24.nocfs", |
| "29.cfs", |
| "29.nocfs", |
| "30.cfs", |
| "30.nocfs", |
| "31.cfs", |
| "31.nocfs", |
| "32.cfs", |
| "32.nocfs", |
| }; |
| |
| final String[] oldOptimizedNames = {"31.optimized.cfs", |
| "31.optimized.nocfs", |
| }; |
| |
| private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException { |
| int count = 0; |
| final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2; |
| // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields: |
| final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length; |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| try { |
| // look into sub readers and check if raw merge is on/off |
| List<IndexReader> readers = new ArrayList<IndexReader>(); |
| ReaderUtil.gatherSubReaders(readers, reader); |
| for (IndexReader ir : readers) { |
| final FieldsReader fr = ((SegmentReader) ir).getFieldsReader(); |
| assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index", |
| shouldStillBeCompressed != fr.canReadRawDocs()); |
| } |
| |
| // test that decompression works correctly |
| for(int i=0; i<reader.maxDoc(); i++) { |
| if (!reader.isDeleted(i)) { |
| Document d = reader.document(i); |
| if (d.get("content3") != null) continue; |
| count++; |
| Fieldable compressed = d.getFieldable("compressed"); |
| if (Integer.parseInt(d.get("id")) % 2 == 0) { |
| assertFalse(compressed.isBinary()); |
| assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue()); |
| } else { |
| assertTrue(compressed.isBinary()); |
| assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS, compressed.getBinaryValue())); |
| } |
| } |
| } |
| |
| // check if field was decompressed after optimize |
| for(int i=0; i<reader.maxDoc(); i++) { |
| if (!reader.isDeleted(i)) { |
| Document d = reader.document(i, new FieldSelector() { |
| public FieldSelectorResult accept(String fieldName) { |
| return ("compressed".equals(fieldName)) ? FieldSelectorResult.SIZE : FieldSelectorResult.LOAD; |
| } |
| }); |
| if (d.get("content3") != null) continue; |
| count++; |
| // read the size from the binary value using DataInputStream (this prevents us from doing the shift ops ourselves): |
| final DataInputStream ds = new DataInputStream(new ByteArrayInputStream(d.getFieldable("compressed").getBinaryValue())); |
| final int actualSize = ds.readInt(); |
| ds.close(); |
| final int compressedSize = Integer.parseInt(d.get("compressedSize")); |
| final boolean binary = Integer.parseInt(d.get("id")) % 2 > 0; |
| final int shouldSize = shouldStillBeCompressed ? |
| compressedSize : |
| (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); |
| assertEquals("size incorrect", shouldSize, actualSize); |
| if (!shouldStillBeCompressed) { |
| assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize); |
| } |
| } |
| } |
| assertEquals("correct number of tests", 34 * 2, count); |
| } finally { |
| reader.close(); |
| } |
| } |
| |
| public void testUpgrade29Compression() throws IOException { |
| int hasTested29 = 0; |
| |
| for(int i=0;i<oldNames.length;i++) { |
| File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]); |
| _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir); |
| Directory dir = newFSDirectory(oldIndxeDir); |
| |
| if (oldNames[i].startsWith("29.")) { |
| assertCompressedFields29(dir, true); |
| hasTested29++; |
| } |
| |
| new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false) |
| .upgrade(); |
| |
| if (oldNames[i].startsWith("29.")) { |
| assertCompressedFields29(dir, false); |
| hasTested29++; |
| } |
| |
| dir.close(); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| |
| assertEquals("test for compressed field should have run 4 times", 4, hasTested29); |
| } |
| |
| public void testAddOldIndexes() throws IOException { |
| for (String name : oldNames) { |
| File oldIndxeDir = _TestUtil.getTempDir(name); |
| _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); |
| Directory dir = newFSDirectory(oldIndxeDir); |
| |
| Directory targetDir = newDirectory(); |
| IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); |
| w.addIndexes(new Directory[] { dir }); |
| w.close(); |
| |
| dir.close(); |
| targetDir.close(); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| public void testAddOldIndexesReader() throws IOException { |
| for (String name : oldNames) { |
| File oldIndxeDir = _TestUtil.getTempDir(name); |
| _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); |
| Directory dir = newFSDirectory(oldIndxeDir); |
| IndexReader reader = IndexReader.open(dir); |
| |
| Directory targetDir = newDirectory(); |
| IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); |
| w.addIndexes(new IndexReader[] { reader }); |
| w.close(); |
| reader.close(); |
| |
| dir.close(); |
| targetDir.close(); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| public void testSearchOldIndex() throws IOException { |
| for(int i=0;i<oldNames.length;i++) { |
| File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]); |
| _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir); |
| searchIndex(oldIndxeDir, oldNames[i]); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| public void testIndexOldIndexNoAdds() throws IOException { |
| for(int i=0;i<oldNames.length;i++) { |
| File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]); |
| _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir); |
| changeIndexNoAdds(random, oldIndxeDir); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| public void testIndexOldIndex() throws IOException { |
| for(int i=0;i<oldNames.length;i++) { |
| if (VERBOSE) { |
| System.out.println("TEST: oldName=" + oldNames[i]); |
| } |
| File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]); |
| _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir); |
| changeIndexWithAdds(random, oldIndxeDir, oldNames[i]); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| private void testHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) throws IOException { |
| final int hitCount = hits.length; |
| assertEquals("wrong number of hits", expectedCount, hitCount); |
| for(int i=0;i<hitCount;i++) { |
| reader.document(hits[i].doc); |
| reader.getTermFreqVectors(hits[i].doc); |
| } |
| } |
| |
| public void searchIndex(File indexDir, String oldName) throws IOException { |
| //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); |
| //Query query = parser.parse("handle:1"); |
| |
| Directory dir = newFSDirectory(indexDir); |
| IndexSearcher searcher = new IndexSearcher(dir, true); |
| IndexReader reader = searcher.getIndexReader(); |
| |
| _TestUtil.checkIndex(dir); |
| |
| for(int i=0;i<35;i++) { |
| if (!reader.isDeleted(i)) { |
| Document d = reader.document(i); |
| List<Fieldable> fields = d.getFields(); |
| if (!oldName.startsWith("19.") && |
| !oldName.startsWith("20.") && |
| !oldName.startsWith("21.") && |
| !oldName.startsWith("22.")) { |
| |
| if (d.getField("content3") == null) { |
| final int numFields = oldName.startsWith("29.") ? 7 : 5; |
| assertEquals(numFields, fields.size()); |
| Field f = d.getField("id"); |
| assertEquals(""+i, f.stringValue()); |
| |
| f = d.getField("utf8"); |
| assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); |
| |
| f = d.getField("autf8"); |
| assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); |
| |
| f = d.getField("content2"); |
| assertEquals("here is more content with aaa aaa aaa", f.stringValue()); |
| |
| f = d.getField("fie\u2C77ld"); |
| assertEquals("field with non-ascii name", f.stringValue()); |
| } |
| |
| TermFreqVector tfv = reader.getTermFreqVector(i, "utf8"); |
| assertNotNull("docID=" + i + " index=" + indexDir.getName(), tfv); |
| assertTrue(tfv instanceof TermPositionVector); |
| } |
| } else |
| // Only ID 7 is deleted |
| assertEquals(7, i); |
| } |
| |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| |
| // First document should be #21 since it's norm was |
| // increased: |
| Document d = searcher.doc(hits[0].doc); |
| assertEquals("didn't get the right document first", "21", d.get("id")); |
| |
| testHits(hits, 34, searcher.getIndexReader()); |
| |
| if (!oldName.startsWith("19.") && |
| !oldName.startsWith("20.") && |
| !oldName.startsWith("21.") && |
| !oldName.startsWith("22.")) { |
| // Test on indices >= 2.3 |
| hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs; |
| assertEquals(34, hits.length); |
| hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs; |
| assertEquals(34, hits.length); |
| hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs; |
| assertEquals(34, hits.length); |
| } |
| |
| searcher.close(); |
| dir.close(); |
| } |
| |
| private int compare(String name, String v) { |
| int v0 = Integer.parseInt(name.substring(0, 2)); |
| int v1 = Integer.parseInt(v); |
| return v0 - v1; |
| } |
| |
| /* Open pre-lockless index, add docs, do a delete & |
| * setNorm, and search */ |
| public void changeIndexWithAdds(Random random, File oldIndexDir, String origOldName) throws IOException { |
| Directory dir = newFSDirectory(oldIndexDir); |
| // open writer |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| // add 10 docs |
| for(int i=0;i<10;i++) { |
| addDoc(writer, 35+i); |
| } |
| |
| // make sure writer sees right total -- writer seems not to know about deletes in .del? |
| final int expected; |
| if (compare(origOldName, "24") < 0) { |
| expected = 44; |
| } else { |
| expected = 45; |
| } |
| assertEquals("wrong doc count", expected, writer.numDocs()); |
| writer.close(); |
| |
| // make sure searching sees right # hits |
| IndexSearcher searcher = new IndexSearcher(dir, true); |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| Document d = searcher.doc(hits[0].doc); |
| assertEquals("wrong first document", "21", d.get("id")); |
| testHits(hits, 44, searcher.getIndexReader()); |
| searcher.close(); |
| |
| // make sure we can do delete & setNorm against this |
| // pre-lockless segment: |
| IndexReader reader = IndexReader.open(dir, false); |
| searcher = newSearcher(reader); |
| Term searchTerm = new Term("id", "6"); |
| int delCount = reader.deleteDocuments(searchTerm); |
| assertEquals("wrong delete count", 1, delCount); |
| reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", (float) 2.0); |
| reader.close(); |
| searcher.close(); |
| |
| // make sure they "took": |
| searcher = new IndexSearcher(dir, true); |
| hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| assertEquals("wrong number of hits", 43, hits.length); |
| d = searcher.doc(hits[0].doc); |
| assertEquals("wrong first document", "22", d.get("id")); |
| testHits(hits, 43, searcher.getIndexReader()); |
| searcher.close(); |
| |
| // optimize |
| writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); |
| writer.optimize(); |
| writer.close(); |
| |
| searcher = new IndexSearcher(dir, true); |
| hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| assertEquals("wrong number of hits", 43, hits.length); |
| d = searcher.doc(hits[0].doc); |
| testHits(hits, 43, searcher.getIndexReader()); |
| assertEquals("wrong first document", "22", d.get("id")); |
| searcher.close(); |
| |
| dir.close(); |
| } |
| |
| /* Open pre-lockless index, add docs, do a delete & |
| * setNorm, and search */ |
| public void changeIndexNoAdds(Random random, File oldIndexDir) throws IOException { |
| |
| Directory dir = newFSDirectory(oldIndexDir); |
| |
| // make sure searching sees right # hits |
| IndexSearcher searcher = new IndexSearcher(dir, true); |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| assertEquals("wrong number of hits", 34, hits.length); |
| Document d = searcher.doc(hits[0].doc); |
| assertEquals("wrong first document", "21", d.get("id")); |
| searcher.close(); |
| |
| // make sure we can do a delete & setNorm against this |
| // pre-lockless segment: |
| IndexReader reader = IndexReader.open(dir, false); |
| Term searchTerm = new Term("id", "6"); |
| int delCount = reader.deleteDocuments(searchTerm); |
| assertEquals("wrong delete count", 1, delCount); |
| reader.setNorm(22, "content", (float) 2.0); |
| reader.close(); |
| |
| // make sure they "took": |
| searcher = new IndexSearcher(dir, true); |
| hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| assertEquals("wrong number of hits", 33, hits.length); |
| d = searcher.doc(hits[0].doc); |
| assertEquals("wrong first document", "22", d.get("id")); |
| testHits(hits, 33, searcher.getIndexReader()); |
| searcher.close(); |
| |
| // optimize |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); |
| writer.optimize(); |
| writer.close(); |
| |
| searcher = new IndexSearcher(dir, true); |
| hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; |
| assertEquals("wrong number of hits", 33, hits.length); |
| d = searcher.doc(hits[0].doc); |
| assertEquals("wrong first document", "22", d.get("id")); |
| testHits(hits, 33, searcher.getIndexReader()); |
| searcher.close(); |
| |
| dir.close(); |
| } |
| |
| public File createIndex(String dirName, boolean doCFS, boolean optimized) throws IOException { |
| // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: |
| File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName); |
| _TestUtil.rmDir(indexDir); |
| Directory dir = newFSDirectory(indexDir); |
| LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); |
| mp.setUseCompoundFile(doCFS); |
| mp.setNoCFSRatio(1.0); |
| IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) |
| .setMaxBufferedDocs(10).setMergePolicy(mp); |
| IndexWriter writer = new IndexWriter(dir, conf); |
| |
| for(int i=0;i<35;i++) { |
| addDoc(writer, i); |
| } |
| assertEquals("wrong doc count", 35, writer.maxDoc()); |
| if (optimized) { |
| writer.optimize(); |
| } |
| writer.close(); |
| |
| if (!optimized) { |
| // open fresh writer so we get no prx file in the added segment |
| mp = new LogByteSizeMergePolicy(); |
| mp.setUseCompoundFile(doCFS); |
| mp.setNoCFSRatio(1.0); |
| conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) |
| .setMaxBufferedDocs(10).setMergePolicy(mp); |
| writer = new IndexWriter(dir, conf); |
| addNoProxDoc(writer); |
| writer.close(); |
| |
| // Delete one doc so we get a .del file: |
| IndexReader reader = IndexReader.open(dir, false); |
| Term searchTerm = new Term("id", "7"); |
| int delCount = reader.deleteDocuments(searchTerm); |
| assertEquals("didn't delete the right number of documents", 1, delCount); |
| |
| // Set one norm so we get a .s0 file: |
| reader.setNorm(21, "content", (float) 1.5); |
| reader.close(); |
| } |
| |
| dir.close(); |
| |
| return indexDir; |
| } |
| |
| /* Verifies that the expected file names were produced */ |
| |
| public void testExactFileNames() throws IOException { |
| |
| String outputDirName = "lucene.backwardscompat0.index"; |
| File outputDir = _TestUtil.getTempDir(outputDirName); |
| _TestUtil.rmDir(outputDir); |
| |
| try { |
| Directory dir = newFSDirectory(outputDir); |
| |
| LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); |
| mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS |
| IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0) |
| .setMergePolicy(mergePolicy); |
| IndexWriter writer = new IndexWriter(dir, conf); |
| for(int i=0;i<35;i++) { |
| addDoc(writer, i); |
| } |
| assertEquals("wrong doc count", 35, writer.maxDoc()); |
| writer.close(); |
| |
| // Delete one doc so we get a .del file: |
| IndexReader reader = IndexReader.open(dir, false); |
| Term searchTerm = new Term("id", "7"); |
| int delCount = reader.deleteDocuments(searchTerm); |
| assertEquals("didn't delete the right number of documents", 1, delCount); |
| |
| // Set one norm so we get a .s0 file: |
| reader.setNorm(21, "content", (float) 1.5); |
| reader.close(); |
| |
| // The numbering of fields can vary depending on which |
| // JRE is in use. On some JREs we see content bound to |
| // field 0; on others, field 1. So, here we have to |
| // figure out which field number corresponds to |
| // "content", and then set our expected file names below |
| // accordingly: |
| CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); |
| FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); |
| int contentFieldIndex = -1; |
| for(int i=0;i<fieldInfos.size();i++) { |
| FieldInfo fi = fieldInfos.fieldInfo(i); |
| if (fi.name.equals("content")) { |
| contentFieldIndex = i; |
| break; |
| } |
| } |
| cfsReader.close(); |
| assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1); |
| |
| // Now verify file names: |
| String[] expected = new String[] {"_0.cfs", |
| "_0_1.del", |
| "_0_1.s" + contentFieldIndex, |
| "segments_2", |
| "segments.gen"}; |
| |
| String[] actual = dir.listAll(); |
| Arrays.sort(expected); |
| Arrays.sort(actual); |
| if (!Arrays.equals(expected, actual)) { |
| fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual)); |
| } |
| dir.close(); |
| } finally { |
| _TestUtil.rmDir(outputDir); |
| } |
| } |
| |
| private String asString(String[] l) { |
| String s = ""; |
| for(int i=0;i<l.length;i++) { |
| if (i > 0) { |
| s += "\n "; |
| } |
| s += l[i]; |
| } |
| return s; |
| } |
| |
| private void addDoc(IndexWriter writer, int id) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); |
| doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| /* This was used in 2.9 to generate an index with compressed field: |
| if (id % 2 == 0) { |
| doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED)); |
| doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| } else { |
| doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS)); |
| doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| } |
| */ |
| // add numeric fields, to test if later versions preserve encoding |
| doc.add(new NumericField("trieInt", 4).setIntValue(id)); |
| doc.add(new NumericField("trieLong", 4).setLongValue(id)); |
| writer.addDocument(doc); |
| } |
| |
| private void addNoProxDoc(IndexWriter writer) throws IOException { |
| Document doc = new Document(); |
| Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED); |
| f.setIndexOptions(IndexOptions.DOCS_ONLY); |
| doc.add(f); |
| f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO); |
| f.setIndexOptions(IndexOptions.DOCS_ONLY); |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| |
| static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in 3.0 as an uncompressed field after merge"; |
| // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally handled as binary; |
| // do it in the same way like FieldsWriter, do not use CompressionTools.compressString() for compressed fields: |
| /* This was used in 2.9 to generate an index with compressed field: |
| static final int TEXT_COMPRESSED_LENGTH; |
| static { |
| try { |
| TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length; |
| } catch (Exception e) { |
| throw new RuntimeException(); |
| } |
| } |
| */ |
| static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}; |
| /* This was used in 2.9 to generate an index with compressed field: |
| static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length; |
| */ |
| |
| public void testNumericFields() throws Exception { |
| for(int i=0;i<oldNames.length;i++) { |
| // only test indexes >= 3.0 |
| if (oldNames[i].compareTo("30.") < 0) continue; |
| |
| File oldIndexDir = _TestUtil.getTempDir(oldNames[i]); |
| _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndexDir); |
| Directory dir = newFSDirectory(oldIndexDir); |
| IndexSearcher searcher = new IndexSearcher(dir, true); |
| |
| for (int id=10; id<15; id++) { |
| ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs; |
| assertEquals("wrong number of hits", 1, hits.length); |
| Document d = searcher.doc(hits[0].doc); |
| assertEquals(String.valueOf(id), d.get("id")); |
| |
| hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs; |
| assertEquals("wrong number of hits", 1, hits.length); |
| d = searcher.doc(hits[0].doc); |
| assertEquals(String.valueOf(id), d.get("id")); |
| } |
| |
| // check that also lower-precision fields are ok |
| ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs; |
| assertEquals("wrong number of hits", 34, hits.length); |
| |
| hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs; |
| assertEquals("wrong number of hits", 34, hits.length); |
| |
| // check decoding into field cache |
| int[] fci = FieldCache.DEFAULT.getInts(searcher.getIndexReader(), "trieInt"); |
| for (int val : fci) { |
| assertTrue("value in id bounds", val >= 0 && val < 35); |
| } |
| |
| long[] fcl = FieldCache.DEFAULT.getLongs(searcher.getIndexReader(), "trieLong"); |
| for (long val : fcl) { |
| assertTrue("value in id bounds", val >= 0L && val < 35L); |
| } |
| |
| searcher.close(); |
| dir.close(); |
| _TestUtil.rmDir(oldIndexDir); |
| } |
| } |
| |
| private int checkAllSegmentsUpgraded(Directory dir) throws IOException { |
| final SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| if (VERBOSE) { |
| System.out.println("checkAllSegmentsUpgraded: " + infos); |
| } |
| for (SegmentInfo si : infos) { |
| assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion()); |
| } |
| return infos.size(); |
| } |
| |
| private int getNumberOfSegments(Directory dir) throws IOException { |
| final SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| return infos.size(); |
| } |
| |
| public void testUpgradeOldIndex() throws Exception { |
| List<String> names = new ArrayList<String>(oldNames.length + oldOptimizedNames.length); |
| names.addAll(Arrays.asList(oldNames)); |
| names.addAll(Arrays.asList(oldOptimizedNames)); |
| for(String name : names) { |
| if (VERBOSE) { |
| System.out.println("testUpgradeOldIndex: index=" +name); |
| } |
| File oldIndxeDir = _TestUtil.getTempDir(name); |
| _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); |
| Directory dir = newFSDirectory(oldIndxeDir); |
| |
| new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false) |
| .upgrade(); |
| |
| checkAllSegmentsUpgraded(dir); |
| |
| dir.close(); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception { |
| for (String name : oldOptimizedNames) { |
| if (VERBOSE) { |
| System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name); |
| } |
| File oldIndxeDir = _TestUtil.getTempDir(name); |
| _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); |
| Directory dir = newFSDirectory(oldIndxeDir); |
| |
| assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir)); |
| |
| // create a bunch of dummy segments |
| int id = 40; |
| RAMDirectory ramDir = new RAMDirectory(); |
| for (int i = 0; i < 3; i++) { |
| // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge: |
| MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); |
| IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) |
| .setMergePolicy(mp); |
| IndexWriter w = new IndexWriter(ramDir, iwc); |
| // add few more docs: |
| for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) { |
| addDoc(w, id++); |
| } |
| w.close(false); |
| } |
| |
| // add dummy segments (which are all in current version) to optimized index |
| MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); |
| IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null) |
| .setMergePolicy(mp); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.setInfoStream(VERBOSE ? System.out : null); |
| w.addIndexes(ramDir); |
| w.close(false); |
| |
| // determine count of segments in modified index |
| final int origSegCount = getNumberOfSegments(dir); |
| |
| new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false) |
| .upgrade(); |
| |
| final int segCount = checkAllSegmentsUpgraded(dir); |
| assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged", |
| origSegCount, segCount); |
| |
| dir.close(); |
| _TestUtil.rmDir(oldIndxeDir); |
| } |
| } |
| |
| } |