| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| import java.io.UncheckedIOException; |
| import java.nio.charset.StandardCharsets; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Random; |
| import java.util.concurrent.atomic.AtomicReference; |
| import java.util.function.Function; |
| import java.util.stream.Collectors; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.StoredFieldsFormat; |
| import org.apache.lucene.codecs.simpletext.SimpleTextCodec; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.IntPoint; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.NumericDocValuesField; |
| import org.apache.lucene.document.StoredField; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.SortField; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.MMapDirectory; |
| import org.apache.lucene.store.MockDirectoryWrapper.Throttling; |
| import org.apache.lucene.store.MockDirectoryWrapper; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.TestUtil; |
| |
| import com.carrotsearch.randomizedtesting.generators.RandomNumbers; |
| import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| import com.carrotsearch.randomizedtesting.generators.RandomStrings; |
| |
| /** |
| * Base class aiming at testing {@link StoredFieldsFormat stored fields formats}. |
| * To test a new format, all you need is to register a new {@link Codec} which |
| * uses it and extend this class and override {@link #getCodec()}. |
| * @lucene.experimental |
| */ |
| public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormatTestCase { |
| |
| @Override |
| protected void addRandomFields(Document d) { |
| final int numValues = random().nextInt(3); |
| for (int i = 0; i < numValues; ++i) { |
| d.add(new StoredField("f", TestUtil.randomSimpleString(random(), 100))); |
| } |
| } |
| |
| public void testRandomStoredFields() throws IOException { |
| Directory dir = newDirectory(); |
| Random rand = random(); |
| RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(rand, 5, 20))); |
| //w.w.setNoCFSRatio(0.0); |
| final int docCount = atLeast(200); |
| final int fieldCount = TestUtil.nextInt(rand, 1, 5); |
| |
| final List<Integer> fieldIDs = new ArrayList<>(); |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setTokenized(false); |
| Field idField = newField("id", "", customType); |
| |
| for(int i=0;i<fieldCount;i++) { |
| fieldIDs.add(i); |
| } |
| |
| final Map<String,Document> docs = new HashMap<>(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: build index docCount=" + docCount); |
| } |
| |
| FieldType customType2 = new FieldType(); |
| customType2.setStored(true); |
| for(int i=0;i<docCount;i++) { |
| Document doc = new Document(); |
| doc.add(idField); |
| final String id = ""+i; |
| idField.setStringValue(id); |
| docs.put(id, doc); |
| if (VERBOSE) { |
| System.out.println("TEST: add doc id=" + id); |
| } |
| |
| for(int field: fieldIDs) { |
| final String s; |
| if (rand.nextInt(4) != 3) { |
| s = TestUtil.randomUnicodeString(rand, 1000); |
| doc.add(newField("f"+field, s, customType2)); |
| } else { |
| s = null; |
| } |
| } |
| w.addDocument(doc); |
| if (rand.nextInt(50) == 17) { |
| // mixup binding of field name -> Number every so often |
| Collections.shuffle(fieldIDs, random()); |
| } |
| if (rand.nextInt(5) == 3 && i > 0) { |
| final String delID = ""+rand.nextInt(i); |
| if (VERBOSE) { |
| System.out.println("TEST: delete doc id=" + delID); |
| } |
| w.deleteDocuments(new Term("id", delID)); |
| docs.remove(delID); |
| } |
| } |
| |
| if (VERBOSE) { |
| System.out.println("TEST: " + docs.size() + " docs in index; now load fields"); |
| } |
| if (docs.size() > 0) { |
| String[] idsList = docs.keySet().toArray(new String[docs.size()]); |
| |
| for(int x=0;x<2;x++) { |
| DirectoryReader r = maybeWrapWithMergingReader(w.getReader()); |
| IndexSearcher s = newSearcher(r); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: cycle x=" + x + " r=" + r); |
| } |
| |
| int num = atLeast(100); |
| for(int iter=0;iter<num;iter++) { |
| String testID = idsList[rand.nextInt(idsList.length)]; |
| if (VERBOSE) { |
| System.out.println("TEST: test id=" + testID); |
| } |
| TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1); |
| assertEquals(1, hits.totalHits.value); |
| Document doc = r.document(hits.scoreDocs[0].doc); |
| Document docExp = docs.get(testID); |
| for(int i=0;i<fieldCount;i++) { |
| assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i)); |
| } |
| } |
| r.close(); |
| w.forceMerge(1); |
| } |
| } |
| w.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1727: make sure doc fields are stored in order |
| public void testStoredFieldsOrder() throws Throwable { |
| Directory d = newDirectory(); |
| IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(); |
| customType.setStored(true); |
| doc.add(newField("zzz", "a b c", customType)); |
| doc.add(newField("aaa", "a b c", customType)); |
| doc.add(newField("zzz", "1 2 3", customType)); |
| w.addDocument(doc); |
| IndexReader r = maybeWrapWithMergingReader(w.getReader()); |
| Document doc2 = r.document(0); |
| Iterator<IndexableField> it = doc2.getFields().iterator(); |
| assertTrue(it.hasNext()); |
| Field f = (Field) it.next(); |
| assertEquals(f.name(), "zzz"); |
| assertEquals(f.stringValue(), "a b c"); |
| |
| assertTrue(it.hasNext()); |
| f = (Field) it.next(); |
| assertEquals(f.name(), "aaa"); |
| assertEquals(f.stringValue(), "a b c"); |
| |
| assertTrue(it.hasNext()); |
| f = (Field) it.next(); |
| assertEquals(f.name(), "zzz"); |
| assertEquals(f.stringValue(), "1 2 3"); |
| assertFalse(it.hasNext()); |
| r.close(); |
| w.close(); |
| d.close(); |
| } |
| |
| // LUCENE-1219 |
| public void testBinaryFieldOffsetLength() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| byte[] b = new byte[50]; |
| for(int i=0;i<50;i++) |
| b[i] = (byte) (i+77); |
| |
| Document doc = new Document(); |
| Field f = new StoredField("binary", b, 10, 17); |
| byte[] bx = f.binaryValue().bytes; |
| assertTrue(bx != null); |
| assertEquals(50, bx.length); |
| assertEquals(10, f.binaryValue().offset); |
| assertEquals(17, f.binaryValue().length); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader ir = DirectoryReader.open(dir); |
| Document doc2 = ir.document(0); |
| IndexableField f2 = doc2.getField("binary"); |
| b = f2.binaryValue().bytes; |
| assertTrue(b != null); |
| assertEquals(17, b.length, 17); |
| assertEquals(87, b[0]); |
| ir.close(); |
| dir.close(); |
| } |
| |
| public void testNumericField() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| final int numDocs = atLeast(500); |
| final Number[] answers = new Number[numDocs]; |
| final Class<?>[] typeAnswers = new Class<?>[numDocs]; |
| for(int id=0;id<numDocs;id++) { |
| Document doc = new Document(); |
| final Field nf; |
| final Number answer; |
| final Class<?> typeAnswer; |
| if (random().nextBoolean()) { |
| // float/double |
| if (random().nextBoolean()) { |
| final float f = random().nextFloat(); |
| answer = Float.valueOf(f); |
| nf = new StoredField("nf", f); |
| typeAnswer = Float.class; |
| } else { |
| final double d = random().nextDouble(); |
| answer = Double.valueOf(d); |
| nf = new StoredField("nf", d); |
| typeAnswer = Double.class; |
| } |
| } else { |
| // int/long |
| if (random().nextBoolean()) { |
| final int i = random().nextInt(); |
| answer = Integer.valueOf(i); |
| nf = new StoredField("nf", i); |
| typeAnswer = Integer.class; |
| } else { |
| final long l = random().nextLong(); |
| answer = Long.valueOf(l); |
| nf = new StoredField("nf", l); |
| typeAnswer = Long.class; |
| } |
| } |
| doc.add(nf); |
| answers[id] = answer; |
| typeAnswers[id] = typeAnswer; |
| doc.add(new StoredField("id", id)); |
| doc.add(new IntPoint("id", id)); |
| doc.add(new NumericDocValuesField("id", id)); |
| w.addDocument(doc); |
| } |
| final DirectoryReader r = maybeWrapWithMergingReader(w.getReader()); |
| w.close(); |
| |
| assertEquals(numDocs, r.numDocs()); |
| |
| for(LeafReaderContext ctx : r.leaves()) { |
| final LeafReader sub = ctx.reader(); |
| final NumericDocValues ids = DocValues.getNumeric(sub, "id"); |
| for(int docID=0;docID<sub.numDocs();docID++) { |
| final Document doc = sub.document(docID); |
| final Field f = (Field) doc.getField("nf"); |
| assertTrue("got f=" + f, f instanceof StoredField); |
| assertEquals(docID, ids.nextDoc()); |
| assertEquals(answers[(int) ids.longValue()], f.numericValue()); |
| } |
| } |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testIndexedBit() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| Document doc = new Document(); |
| FieldType onlyStored = new FieldType(); |
| onlyStored.setStored(true); |
| doc.add(new Field("field", "value", onlyStored)); |
| doc.add(new StringField("field2", "value", Field.Store.YES)); |
| w.addDocument(doc); |
| IndexReader r = maybeWrapWithMergingReader(w.getReader()); |
| w.close(); |
| assertEquals(IndexOptions.NONE, r.document(0).getField("field").fieldType().indexOptions()); |
| assertNotNull(r.document(0).getField("field2").fieldType().indexOptions()); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testReadSkip() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); |
| |
| FieldType ft = new FieldType(); |
| ft.setStored(true); |
| ft.freeze(); |
| |
| final String string = TestUtil.randomSimpleString(random(), 50); |
| final byte[] bytes = string.getBytes(StandardCharsets.UTF_8); |
| final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong(); |
| final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt(); |
| final float f = random().nextFloat(); |
| final double d = random().nextDouble(); |
| |
| List<Field> fields = Arrays.asList( |
| new Field("bytes", bytes, ft), |
| new Field("string", string, ft), |
| new StoredField("long", l), |
| new StoredField("int", i), |
| new StoredField("float", f), |
| new StoredField("double", d) |
| ); |
| |
| for (int k = 0; k < 100; ++k) { |
| Document doc = new Document(); |
| for (Field fld : fields) { |
| doc.add(fld); |
| } |
| iw.w.addDocument(doc); |
| } |
| iw.commit(); |
| |
| final DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir)); |
| final int docID = random().nextInt(100); |
| for (Field fld : fields) { |
| String fldName = fld.name(); |
| final Document sDoc = reader.document(docID, Collections.singleton(fldName)); |
| final IndexableField sField = sDoc.getField(fldName); |
| if (Field.class.equals(fld.getClass())) { |
| assertEquals(fld.binaryValue(), sField.binaryValue()); |
| assertEquals(fld.stringValue(), sField.stringValue()); |
| } else { |
| assertEquals(fld.numericValue(), sField.numericValue()); |
| } |
| } |
| reader.close(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| public void testEmptyDocs() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); |
| |
| // make sure that the fact that documents might be empty is not a problem |
| final Document emptyDoc = new Document(); |
| final int numDocs = random().nextBoolean() ? 1 : atLeast(1000); |
| for (int i = 0; i < numDocs; ++i) { |
| iw.addDocument(emptyDoc); |
| } |
| iw.commit(); |
| final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir)); |
| for (int i = 0; i < numDocs; ++i) { |
| final Document doc = rd.document(i); |
| assertNotNull(doc); |
| assertTrue(doc.getFields().isEmpty()); |
| } |
| rd.close(); |
| |
| iw.close(); |
| dir.close(); |
| } |
| |
| public void testConcurrentReads() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); |
| |
| // make sure the readers are properly cloned |
| final Document doc = new Document(); |
| final Field field = new StringField("fld", "", Store.YES); |
| doc.add(field); |
| final int numDocs = atLeast(1000); |
| for (int i = 0; i < numDocs; ++i) { |
| field.setStringValue("" + i); |
| iw.addDocument(doc); |
| } |
| iw.commit(); |
| |
| final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir)); |
| final IndexSearcher searcher = new IndexSearcher(rd); |
| final int concurrentReads = atLeast(5); |
| final int readsPerThread = atLeast(50); |
| final List<Thread> readThreads = new ArrayList<>(); |
| final AtomicReference<Exception> ex = new AtomicReference<>(); |
| for (int i = 0; i < concurrentReads; ++i) { |
| readThreads.add(new Thread() { |
| |
| int[] queries; |
| |
| { |
| queries = new int[readsPerThread]; |
| for (int i = 0; i < queries.length; ++i) { |
| queries[i] = random().nextInt(numDocs); |
| } |
| } |
| |
| @Override |
| public void run() { |
| for (int q : queries) { |
| final Query query = new TermQuery(new Term("fld", "" + q)); |
| try { |
| final TopDocs topDocs = searcher.search(query, 1); |
| if (topDocs.totalHits.value != 1) { |
| throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits.value); |
| } |
| final Document sdoc = rd.document(topDocs.scoreDocs[0].doc); |
| if (sdoc == null || sdoc.get("fld") == null) { |
| throw new IllegalStateException("Could not find document " + q); |
| } |
| if (!Integer.toString(q).equals(sdoc.get("fld"))) { |
| throw new IllegalStateException("Expected " + q + ", but got " + sdoc.get("fld")); |
| } |
| } catch (Exception e) { |
| ex.compareAndSet(null, e); |
| } |
| } |
| } |
| }); |
| } |
| for (Thread thread : readThreads) { |
| thread.start(); |
| } |
| for (Thread thread : readThreads) { |
| thread.join(); |
| } |
| rd.close(); |
| if (ex.get() != null) { |
| throw ex.get(); |
| } |
| |
| iw.close(); |
| dir.close(); |
| } |
| |
| private byte[] randomByteArray(int length, int max) { |
| final byte[] result = new byte[length]; |
| for (int i = 0; i < length; ++i) { |
| result[i] = (byte) random().nextInt(max); |
| } |
| return result; |
| } |
| |
| public void testWriteReadMerge() throws IOException { |
| // get another codec, other than the default: so we are merging segments across different codecs |
| final Codec otherCodec; |
| if ("SimpleText".equals(Codec.getDefault().getName())) { |
| otherCodec = TestUtil.getDefaultCodec(); |
| } else { |
| otherCodec = new SimpleTextCodec(); |
| } |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); |
| |
| final int docCount = atLeast(200); |
| final byte[][][] data = new byte [docCount][][]; |
| for (int i = 0; i < docCount; ++i) { |
| final int fieldCount = rarely() |
| ? RandomNumbers.randomIntBetween(random(), 1, 500) |
| : RandomNumbers.randomIntBetween(random(), 1, 5); |
| data[i] = new byte[fieldCount][]; |
| for (int j = 0; j < fieldCount; ++j) { |
| final int length = rarely() |
| ? random().nextInt(1000) |
| : random().nextInt(10); |
| final int max = rarely() ? 256 : 2; |
| data[i][j] = randomByteArray(length, max); |
| } |
| } |
| |
| final FieldType type = new FieldType(StringField.TYPE_STORED); |
| type.setIndexOptions(IndexOptions.NONE); |
| type.freeze(); |
| IntPoint id = new IntPoint("id", 0); |
| StoredField idStored = new StoredField("id", 0); |
| for (int i = 0; i < data.length; ++i) { |
| Document doc = new Document(); |
| doc.add(id); |
| doc.add(idStored); |
| id.setIntValue(i); |
| idStored.setIntValue(i); |
| for (int j = 0; j < data[i].length; ++j) { |
| Field f = new Field("bytes" + j, data[i][j], type); |
| doc.add(f); |
| } |
| iw.w.addDocument(doc); |
| if (random().nextBoolean() && (i % (data.length / 10) == 0)) { |
| iw.w.close(); |
| IndexWriterConfig iwConfNew = newIndexWriterConfig(new MockAnalyzer(random())); |
| // test merging against a non-compressing codec |
| if (iwConf.getCodec() == otherCodec) { |
| iwConfNew.setCodec(Codec.getDefault()); |
| } else { |
| iwConfNew.setCodec(otherCodec); |
| } |
| iwConf = iwConfNew; |
| iw = new RandomIndexWriter(random(), dir, iwConf); |
| } |
| } |
| |
| for (int i = 0; i < 10; ++i) { |
| final int min = random().nextInt(data.length); |
| final int max = min + random().nextInt(20); |
| iw.deleteDocuments(IntPoint.newRangeQuery("id", min, max-1)); |
| } |
| |
| iw.forceMerge(2); // force merges with deletions |
| |
| iw.commit(); |
| |
| final DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir)); |
| assertTrue(ir.numDocs() > 0); |
| int numDocs = 0; |
| for (int i = 0; i < ir.maxDoc(); ++i) { |
| final Document doc = ir.document(i); |
| if (doc == null) { |
| continue; |
| } |
| ++ numDocs; |
| final int docId = doc.getField("id").numericValue().intValue(); |
| assertEquals(data[docId].length + 1, doc.getFields().size()); |
| for (int j = 0; j < data[docId].length; ++j) { |
| final byte[] arr = data[docId][j]; |
| final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j); |
| final byte[] arr2 = BytesRef.deepCopyOf(arr2Ref).bytes; |
| assertArrayEquals(arr, arr2); |
| } |
| } |
| assertTrue(ir.numDocs() <= numDocs); |
| ir.close(); |
| |
| iw.deleteAll(); |
| iw.commit(); |
| iw.forceMerge(1); |
| |
| iw.close(); |
| dir.close(); |
| } |
| |
| /** A dummy filter reader that reverse the order of documents in stored fields. */ |
| private static class DummyFilterLeafReader extends FilterLeafReader { |
| |
| public DummyFilterLeafReader(LeafReader in) { |
| super(in); |
| } |
| |
| @Override |
| public void document(int docID, StoredFieldVisitor visitor) throws IOException { |
| super.document(maxDoc() - 1 - docID, visitor); |
| } |
| |
| @Override |
| public CacheHelper getCoreCacheHelper() { |
| return null; |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return null; |
| } |
| |
| } |
| |
| private static class DummyFilterDirectoryReader extends FilterDirectoryReader { |
| |
| public DummyFilterDirectoryReader(DirectoryReader in) throws IOException { |
| super(in, new SubReaderWrapper() { |
| @Override |
| public LeafReader wrap(LeafReader reader) { |
| return new DummyFilterLeafReader(reader); |
| } |
| }); |
| } |
| |
| @Override |
| protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException { |
| return new DummyFilterDirectoryReader(in); |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return null; |
| } |
| |
| } |
| |
| public void testMergeFilterReader() throws IOException { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| final int numDocs = atLeast(200); |
| final String[] stringValues = new String[10]; |
| for (int i = 0; i < stringValues.length; ++i) { |
| stringValues[i] = RandomStrings.randomRealisticUnicodeOfLength(random(), 10); |
| } |
| Document[] docs = new Document[numDocs]; |
| for (int i = 0; i < numDocs; ++i) { |
| Document doc = new Document(); |
| doc.add(new StringField("to_delete", random().nextBoolean() ? "yes" : "no", Store.NO)); |
| doc.add(new StoredField("id", i)); |
| doc.add(new StoredField("i", random().nextInt(50))); |
| doc.add(new StoredField("l", random().nextLong())); |
| doc.add(new StoredField("d", random().nextDouble())); |
| doc.add(new StoredField("f", random().nextFloat())); |
| doc.add(new StoredField("s", RandomPicks.randomFrom(random(), stringValues))); |
| doc.add(new StoredField("b", new BytesRef(RandomPicks.randomFrom(random(), stringValues)))); |
| docs[i] = doc; |
| w.addDocument(doc); |
| } |
| if (random().nextBoolean()) { |
| w.deleteDocuments(new Term("to_delete", "yes")); |
| } |
| w.commit(); |
| w.close(); |
| |
| DirectoryReader reader = new DummyFilterDirectoryReader(maybeWrapWithMergingReader(DirectoryReader.open(dir))); |
| |
| Directory dir2 = newDirectory(); |
| w = new RandomIndexWriter(random(), dir2); |
| TestUtil.addIndexesSlowly(w.w, reader); |
| reader.close(); |
| dir.close(); |
| |
| reader = maybeWrapWithMergingReader(w.getReader()); |
| for (int i = 0; i < reader.maxDoc(); ++i) { |
| final Document doc = reader.document(i); |
| final int id = doc.getField("id").numericValue().intValue(); |
| final Document expected = docs[id]; |
| assertEquals(expected.get("s"), doc.get("s")); |
| assertEquals(expected.getField("i").numericValue(), doc.getField("i").numericValue()); |
| assertEquals(expected.getField("l").numericValue(), doc.getField("l").numericValue()); |
| assertEquals(expected.getField("d").numericValue(), doc.getField("d").numericValue()); |
| assertEquals(expected.getField("f").numericValue(), doc.getField("f").numericValue()); |
| assertEquals(expected.getField("b").binaryValue(), doc.getField("b").binaryValue()); |
| } |
| |
| reader.close(); |
| w.close(); |
| TestUtil.checkIndex(dir2); |
| dir2.close(); |
| } |
| |
| @Nightly |
| public void testBigDocuments() throws IOException { |
| assumeWorkingMMapOnWindows(); |
| |
| // "big" as "much bigger than the chunk size" |
| // for this test we force a FS dir |
| // we can't just use newFSDirectory, because this test doesn't really index anything. |
| // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) |
| Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments"))); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); |
| |
| if (dir instanceof MockDirectoryWrapper) { |
| ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER); |
| } |
| |
| final Document emptyDoc = new Document(); // emptyDoc |
| final Document bigDoc1 = new Document(); // lot of small fields |
| final Document bigDoc2 = new Document(); // 1 very big field |
| |
| final Field idField = new StringField("id", "", Store.NO); |
| emptyDoc.add(idField); |
| bigDoc1.add(idField); |
| bigDoc2.add(idField); |
| |
| final FieldType onlyStored = new FieldType(StringField.TYPE_STORED); |
| onlyStored.setIndexOptions(IndexOptions.NONE); |
| |
| final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored); |
| final int numFields = RandomNumbers.randomIntBetween(random(), 500000, 1000000); |
| for (int i = 0; i < numFields; ++i) { |
| bigDoc1.add(smallField); |
| } |
| |
| final Field bigField = new Field("fld", randomByteArray(RandomNumbers.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored); |
| bigDoc2.add(bigField); |
| |
| final int numDocs = atLeast(5); |
| final Document[] docs = new Document[numDocs]; |
| for (int i = 0; i < numDocs; ++i) { |
| docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2)); |
| } |
| for (int i = 0; i < numDocs; ++i) { |
| idField.setStringValue("" + i); |
| iw.addDocument(docs[i]); |
| if (random().nextInt(numDocs) == 0) { |
| iw.commit(); |
| } |
| } |
| iw.commit(); |
| iw.forceMerge(1); // look at what happens when big docs are merged |
| final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir)); |
| final IndexSearcher searcher = new IndexSearcher(rd); |
| for (int i = 0; i < numDocs; ++i) { |
| final Query query = new TermQuery(new Term("id", "" + i)); |
| final TopDocs topDocs = searcher.search(query, 1); |
| assertEquals("" + i, 1, topDocs.totalHits.value); |
| final Document doc = rd.document(topDocs.scoreDocs[0].doc); |
| assertNotNull(doc); |
| final IndexableField[] fieldValues = doc.getFields("fld"); |
| assertEquals(docs[i].getFields("fld").length, fieldValues.length); |
| if (fieldValues.length > 0) { |
| assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue()); |
| } |
| } |
| rd.close(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| public void testBulkMergeWithDeletes() throws IOException { |
| final int numDocs = atLeast(200); |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); |
| for (int i = 0; i < numDocs; ++i) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", Integer.toString(i), Store.YES)); |
| doc.add(new StoredField("f", TestUtil.randomSimpleString(random()))); |
| w.addDocument(doc); |
| } |
| final int deleteCount = TestUtil.nextInt(random(), 5, numDocs); |
| for (int i = 0; i < deleteCount; ++i) { |
| final int id = random().nextInt(numDocs); |
| w.deleteDocuments(new Term("id", Integer.toString(id))); |
| } |
| w.commit(); |
| w.close(); |
| w = new RandomIndexWriter(random(), dir); |
| w.forceMerge(TestUtil.nextInt(random(), 1, 3)); |
| w.commit(); |
| w.close(); |
| TestUtil.checkIndex(dir); |
| dir.close(); |
| } |
| |
| /** mix up field numbers, merge, and check that data is correct */ |
| public void testMismatchedFields() throws Exception { |
| Directory dirs[] = new Directory[10]; |
| for (int i = 0; i < dirs.length; i++) { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(null); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| for (int j = 0; j < 10; j++) { |
| // add fields where name=value (e.g. 3=3) so we can detect if stuff gets screwed up. |
| doc.add(new StringField(Integer.toString(j), Integer.toString(j), Field.Store.YES)); |
| } |
| for (int j = 0; j < 10; j++) { |
| iw.addDocument(doc); |
| } |
| |
| DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(iw)); |
| // mix up fields explicitly |
| if (random().nextBoolean()) { |
| reader = new MismatchedDirectoryReader(reader, random()); |
| } |
| dirs[i] = newDirectory(); |
| IndexWriter adder = new IndexWriter(dirs[i], new IndexWriterConfig(null)); |
| TestUtil.addIndexesSlowly(adder, reader); |
| adder.commit(); |
| adder.close(); |
| |
| IOUtils.close(reader, iw, dir); |
| } |
| |
| Directory everything = newDirectory(); |
| IndexWriter iw = new IndexWriter(everything, new IndexWriterConfig(null)); |
| iw.addIndexes(dirs); |
| iw.forceMerge(1); |
| |
| LeafReader ir = getOnlyLeafReader(DirectoryReader.open(iw)); |
| for (int i = 0; i < ir.maxDoc(); i++) { |
| Document doc = ir.document(i); |
| assertEquals(10, doc.getFields().size()); |
| for (int j = 0; j < 10; j++) { |
| assertEquals(Integer.toString(j), doc.get(Integer.toString(j))); |
| } |
| } |
| |
| IOUtils.close(iw, ir, everything); |
| IOUtils.close(dirs); |
| } |
| |
| public void testRandomStoredFieldsWithIndexSort() throws Exception { |
| final SortField[] sortFields; |
| if (random().nextBoolean()) { |
| sortFields = |
| new SortField[]{ |
| new SortField("sort-1", SortField.Type.LONG), |
| new SortField("sort-2", SortField.Type.INT) |
| }; |
| } else { |
| sortFields = new SortField[]{new SortField("sort-1", SortField.Type.LONG)}; |
| } |
| List<String> storedFields = new ArrayList<>(); |
| int numFields = TestUtil.nextInt(random(), 1, 10); |
| for (int i = 0; i < numFields; i++) { |
| storedFields.add("f-" + i); |
| } |
| FieldType storeType = new FieldType(TextField.TYPE_STORED); |
| storeType.setStored(true); |
| Function<String, Document> documentFactory = |
| id -> { |
| Document doc = new Document(); |
| doc.add(new StringField("id", id, random().nextBoolean() ? Store.YES : Store.NO)); |
| if (random().nextInt(100) <= 5) { |
| Collections.shuffle(storedFields, random()); |
| } |
| for (String fieldName : storedFields) { |
| if (random().nextBoolean()) { |
| String s = TestUtil.randomUnicodeString(random(), 100); |
| doc.add(newField(fieldName, s, storeType)); |
| } |
| } |
| for (SortField sortField : sortFields) { |
| doc.add( |
| new NumericDocValuesField( |
| sortField.getField(), TestUtil.nextInt(random(), 0, 10000))); |
| } |
| return doc; |
| }; |
| |
| Map<String, Document> docs = new HashMap<>(); |
| int numDocs = atLeast(100); |
| for (int i = 0; i < numDocs; i++) { |
| String id = Integer.toString(i); |
| docs.put(id, documentFactory.apply(id)); |
| } |
| |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 5, 20)); |
| iwc.setIndexSort(new Sort(sortFields)); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); |
| List<String> addedIds = new ArrayList<>(); |
| Runnable verifyStoreFields = |
| () -> { |
| if (addedIds.isEmpty()) { |
| return; |
| } |
| try (DirectoryReader reader = maybeWrapWithMergingReader(iw.getReader())) { |
| IndexSearcher searcher = new IndexSearcher(reader); |
| int iters = TestUtil.nextInt(random(), 1, 10); |
| for (int i = 0; i < iters; i++) { |
| String testID = addedIds.get(random().nextInt(addedIds.size())); |
| if (VERBOSE) { |
| System.out.println("TEST: test id=" + testID); |
| } |
| TopDocs hits = searcher.search(new TermQuery(new Term("id", testID)), 1); |
| assertEquals(1, hits.totalHits.value); |
| List<IndexableField> expectedFields = |
| docs.get(testID).getFields().stream() |
| .filter(f -> f.fieldType().stored()) |
| .collect(Collectors.toList()); |
| Document actualDoc = reader.document(hits.scoreDocs[0].doc); |
| assertEquals(expectedFields.size(), actualDoc.getFields().size()); |
| for (IndexableField expectedField : expectedFields) { |
| IndexableField[] actualFields = actualDoc.getFields(expectedField.name()); |
| assertEquals(1, actualFields.length); |
| assertEquals(expectedField.stringValue(), actualFields[0].stringValue()); |
| } |
| } |
| } catch (IOException e) { |
| throw new UncheckedIOException(e); |
| } |
| }; |
| final List<String> ids = new ArrayList<>(docs.keySet()); |
| Collections.shuffle(ids, random()); |
| for (String id : ids) { |
| if (random().nextInt(100) < 5) { |
| // add via foreign reader |
| IndexWriterConfig otherIwc = newIndexWriterConfig(); |
| otherIwc.setIndexSort(new Sort(sortFields)); |
| try (Directory otherDir = newDirectory(); |
| RandomIndexWriter otherIw = new RandomIndexWriter(random(), otherDir, otherIwc)) { |
| otherIw.addDocument(docs.get(id)); |
| try (DirectoryReader otherReader = otherIw.getReader()) { |
| TestUtil.addIndexesSlowly(iw.w, otherReader); |
| } |
| } |
| } else { |
| // add normally |
| iw.addDocument(docs.get(id)); |
| } |
| addedIds.add(id); |
| if (random().nextInt(100) < 5) { |
| String deletingId = addedIds.remove(random().nextInt(addedIds.size())); |
| if (random().nextBoolean()) { |
| iw.deleteDocuments(new TermQuery(new Term("id", deletingId))); |
| addedIds.remove(deletingId); |
| } else { |
| final Document newDoc = documentFactory.apply(deletingId); |
| docs.put(deletingId, newDoc); |
| iw.updateDocument(new Term("id", deletingId), newDoc); |
| } |
| } |
| if (random().nextInt(100) < 5) { |
| verifyStoreFields.run(); |
| } |
| if (random().nextInt(100) < 2) { |
| iw.forceMerge(TestUtil.nextInt(random(), 1, 3)); |
| } |
| } |
| verifyStoreFields.run(); |
| iw.forceMerge(TestUtil.nextInt(random(), 1, 3)); |
| verifyStoreFields.run(); |
| IOUtils.close(iw, dir); |
| } |
| |
| } |