blob: 3912795ad67fd534ee045bb5417d40095deb5a69 [file] [log] [blame]
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestGlobalFieldNumbers extends LuceneTestCase {
public void testGlobalFieldNumberFiles() throws IOException {
int num = atLeast(3);
for (int i = 0; i < num; i++) {
Directory dir = newDirectory();
{
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
IndexWriter writer = new IndexWriter(dir, config);
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field"));
d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field"));
writer.addDocument(d);
for (String string : writer.getIndexFileNames()) {
assertFalse(string.endsWith(".fnx"));
}
writer.commit();
Collection<String> files = writer.getIndexFileNames();
files.remove("1.fnx");
for (String string : files) {
assertFalse(string.endsWith(".fnx"));
}
assertFNXFiles(dir, "1.fnx");
d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3 }));
writer.addDocument(d);
writer.commit();
files = writer.getIndexFileNames();
files.remove("2.fnx");
for (String string : files) {
assertFalse(string.endsWith(".fnx"));
}
assertFNXFiles(dir, "2.fnx");
writer.close();
assertFNXFiles(dir, "2.fnx");
}
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d3 first field"));
d.add(new Field("f2", TextField.TYPE_STORED, "d3 second field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3, 4, 5 }));
writer.addDocument(d);
writer.close();
Collection<String> files = writer.getIndexFileNames();
files.remove("2.fnx");
for (String string : files) {
assertFalse(string.endsWith(".fnx"));
}
assertFNXFiles(dir, "2.fnx");
}
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer.optimize();
assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges);
writer.close();
assertFNXFiles(dir, "2.fnx");
dir.close();
}
}
public void testIndexReaderCommit() throws IOException {
int num = atLeast(3);
for (int i = 0; i < num; i++) {
Directory dir = newDirectory();
{
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
IndexWriter writer = new IndexWriter(dir, config);
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field"));
d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field"));
writer.addDocument(d);
writer.commit();
assertFNXFiles(dir, "1.fnx");
d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3 }));
writer.addDocument(d);
writer.commit();
assertFNXFiles(dir, "2.fnx");
writer.close();
assertFNXFiles(dir, "2.fnx");
}
IndexReader reader = IndexReader.open(dir, false);
reader.deleteDocument(0);
reader.commit();
reader.close();
// make sure this reader can not modify the field map
assertFNXFiles(dir, "2.fnx");
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer.optimize();
assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges);
writer.close();
assertFNXFiles(dir, "2.fnx");
dir.close();
}
}
public void testGlobalFieldNumberFilesAcrossCommits() throws IOException {
int num = atLeast(3);
for (int i = 0; i < num; i++) {
Directory dir = newDirectory();
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(
new KeepAllDeletionPolicy()));
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field"));
d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field"));
writer.addDocument(d);
writer.commit();
assertFNXFiles(dir, "1.fnx");
d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3 }));
writer.addDocument(d);
writer.commit();
writer.commit();
writer.commit();
assertFNXFiles(dir, "1.fnx", "2.fnx");
writer.close();
assertFNXFiles(dir, "1.fnx", "2.fnx");
}
{
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d3 first field"));
d.add(new Field("f2", TextField.TYPE_STORED, "d3 second field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3, 4, 5 }));
writer.addDocument(d);
writer.close();
assertFNXFiles(dir, "2.fnx");
}
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer.optimize();
assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges);
writer.close();
assertFNXFiles(dir, "2.fnx");
dir.close();
}
}
public void testGlobalFieldNumberOnOldCommit() throws IOException {
int num = atLeast(3);
for (int i = 0; i < num; i++) {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(
new KeepAllDeletionPolicy()));
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field"));
d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field"));
writer.addDocument(d);
writer.commit();
assertFNXFiles(dir, "1.fnx");
d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3 }));
writer.addDocument(d);
assertFNXFiles(dir, "1.fnx");
writer.close();
assertFNXFiles(dir, "1.fnx", "2.fnx");
// open first commit
List<IndexCommit> listCommits = IndexReader.listCommits(dir);
assertEquals(2, listCommits.size());
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setIndexDeletionPolicy(
new KeepAllDeletionPolicy()).setIndexCommit(listCommits.get(0)));
d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field"));
d.add(new BinaryField("f3", new byte[] { 1, 2, 3 }));
writer.addDocument(d);
writer.commit();
// now we have 3 files since f3 is not present in the first commit
assertFNXFiles(dir, "1.fnx", "2.fnx", "3.fnx");
writer.close();
assertFNXFiles(dir, "1.fnx", "2.fnx", "3.fnx");
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)));
writer.commit();
listCommits = IndexReader.listCommits(dir);
assertEquals(1, listCommits.size());
assertFNXFiles(dir, "3.fnx");
writer.close();
assertFNXFiles(dir, "3.fnx");
dir.close();
}
}
private final Directory buildRandomIndex(String[] fieldNames, int numDocs,
IndexWriterConfig conf) throws CorruptIndexException,
LockObtainFailedException, IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
final int numFields = 1 + random.nextInt(fieldNames.length);
for (int j = 0; j < numFields; j++) {
FieldType customType = new FieldType();
customType.setIndexed(true);
customType.setTokenized(random.nextBoolean());
customType.setOmitNorms(random.nextBoolean());
doc.add(newField(fieldNames[random.nextInt(fieldNames.length)],
_TestUtil.randomRealisticUnicodeString(random),
customType));
}
writer.addDocument(doc);
if (random.nextInt(20) == 0) {
writer.commit();
}
}
writer.close();
return dir;
}
public void testOptimize() throws IOException {
for (int i = 0; i < 2*RANDOM_MULTIPLIER; i++) {
Set<String> fieldNames = new HashSet<String>();
final int numFields = 2 + (TEST_NIGHTLY ? random.nextInt(200) : random.nextInt(20));
for (int j = 0; j < numFields; j++) {
fieldNames.add("field_" + j);
}
Directory base = buildRandomIndex(fieldNames.toArray(new String[0]),
20 + random.nextInt(100),
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
IndexWriter writer = new IndexWriter(base, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
FieldNumberBiMap globalFieldMap = writer.segmentInfos
.getOrLoadGlobalFieldNumberMap(base);
Set<Entry<String, Integer>> entries = globalFieldMap.entries();
writer.optimize();
writer.commit();
writer.close();
Set<Entry<String, Integer>> afterOptmize = globalFieldMap.entries();
assertEquals(entries, afterOptmize);
base.close();
}
}
public void testAddIndexesStableFieldNumbers() throws IOException {
for (int i = 0; i < 2*RANDOM_MULTIPLIER; i++) {
Set<String> fieldNames = new HashSet<String>();
final int numFields = 2 + (TEST_NIGHTLY ? random.nextInt(50) : random.nextInt(10));
for (int j = 0; j < numFields; j++) {
fieldNames.add("field_" + j);
}
Directory base = newDirectory();
IndexWriter writer = new IndexWriter(base, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document doc = new Document();
for (String string : fieldNames) {
FieldType customType = new FieldType();
customType.setIndexed(true);
customType.setTokenized(random.nextBoolean());
customType.setOmitNorms(random.nextBoolean());
doc.add(newField(string,
_TestUtil.randomRealisticUnicodeString(random),
customType));
}
writer.addDocument(doc);
writer.commit();
FieldNumberBiMap globalFieldMap = writer.segmentInfos
.getOrLoadGlobalFieldNumberMap(base);
final Set<Entry<String, Integer>> entries = globalFieldMap.entries();
assertEquals(entries.size(), fieldNames.size());
for (Entry<String, Integer> entry : entries) {
// all fields are in this fieldMap
assertTrue(fieldNames.contains(entry.getKey()));
}
writer.close();
int numIndexes = 1 + random.nextInt(10);
for (int j = 0; j < numIndexes; j++) {
Directory toAdd = buildRandomIndex(fieldNames.toArray(new String[0]),
1 + random.nextInt(50),
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
IndexWriter w = new IndexWriter(base, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
if (random.nextBoolean()) {
IndexReader open = IndexReader.open(toAdd);
w.addIndexes(open);
open.close();
} else {
w.addIndexes(toAdd);
}
w.close();
FieldNumberBiMap map = w.segmentInfos
.getOrLoadGlobalFieldNumberMap(toAdd);
assertEquals(entries, map.entries());
toAdd.close();
}
IndexWriter w = new IndexWriter(base, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(
new LogByteSizeMergePolicy()));
w.optimize();
w.close();
SegmentInfos sis = new SegmentInfos();
sis.read(base);
SegmentInfo segmentInfo = sis.info(sis.size() - 1);// last segment must
// have all fields with
// consistent numbers
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
assertEquals(fieldInfos.size(), entries.size());
for (Entry<String, Integer> entry : entries) {
assertEquals(entry.getValue(),
Integer.valueOf(fieldInfos.fieldNumber(entry.getKey())));
assertEquals(entry.getKey(), fieldInfos.fieldName(entry.getValue()));
}
base.close();
}
}
final String[] oldNames = { "30.cfs", "30.nocfs", "31.cfs", "31.nocfs", };
public void testAddOldIndex() throws IOException {
int i = random.nextInt(oldNames.length);
File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
Directory dir = null;
try {
_TestUtil
.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
dir = newFSDirectory(oldIndxeDir);
SegmentInfos infos = new SegmentInfos();
infos.read(dir);
SortedMap<Integer, String> sortedMap = new TreeMap<Integer, String>();
FieldNumberBiMap biMap = new FieldNumberBiMap();
int maxFieldNum = Integer.MIN_VALUE;
for (SegmentInfo segmentInfo : infos) {
for (FieldInfo fieldInfo : segmentInfo.getFieldInfos()) {
int globNumber = biMap.addOrGet(fieldInfo.name, fieldInfo.number);
maxFieldNum = Math.max(maxFieldNum, globNumber);
sortedMap.put(globNumber, fieldInfo.name);
}
}
Directory base = newDirectory();
IndexWriter writer = new IndexWriter(base, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(
NoMergePolicy.NO_COMPOUND_FILES));
SortedMap<Integer, String> copySortedMap = new TreeMap<Integer, String>(
sortedMap);
while (!sortedMap.isEmpty()) { // add every field at least once
Document doc = new Document();
int nextField = random.nextInt(maxFieldNum + 1);
sortedMap.remove(nextField);
String name = copySortedMap.get(nextField);
assertNotNull(name);
FieldType customType = new FieldType();
customType.setIndexed(true);
customType.setTokenized(random.nextBoolean());
customType.setOmitNorms(random.nextBoolean());
doc.add(newField(name, _TestUtil.randomRealisticUnicodeString(random),
customType));
writer.addDocument(doc);
if (random.nextInt(10) == 0) {
writer.commit();
}
}
Set<Entry<String, Integer>> expectedEntries = writer.segmentInfos
.getOrLoadGlobalFieldNumberMap(base).entries();
writer.addIndexes(dir); // add the old index
writer.close();
writer = new IndexWriter(base, newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES));
writer.commit(); // make sure the old index is the latest segment
writer.close();
// we don't merge here since we use NoMergePolicy
SegmentInfos sis = new SegmentInfos();
sis.read(base);
// check that the latest global field numbers are consistent and carried
// over from the 4.0 index
FieldNumberBiMap actualGlobalMap = sis
.getOrLoadGlobalFieldNumberMap(base);
assertEquals(expectedEntries, actualGlobalMap.entries());
base.close();
} finally {
if (dir != null)
dir.close();
_TestUtil.rmDir(oldIndxeDir);
}
}
public void testFilesOnOldIndex() throws IOException {
int i = random.nextInt(oldNames.length);
File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
Directory dir = null;
MergePolicy policy = random.nextBoolean() ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES;
try {
_TestUtil
.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
dir = newFSDirectory(oldIndxeDir);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(policy));
SegmentInfos segmentInfos = writer.segmentInfos;
assertTrue(DefaultSegmentInfosWriter.FORMAT_4_0 < segmentInfos.getFormat());
assertEquals(0, segmentInfos.getGlobalFieldMapVersion());
for (String string : writer.getIndexFileNames()) {
assertFalse(string.endsWith(".fnx"));
}
writer.commit();
assertTrue(DefaultSegmentInfosWriter.FORMAT_4_0 < segmentInfos.getFormat());
assertEquals(0, segmentInfos.getGlobalFieldMapVersion());
Collection<String> files = writer.getIndexFileNames();
for (String string : files) {
assertFalse(string.endsWith(".fnx"));
}
Document d = new Document();
d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field"));
writer.addDocument(d);
writer.prepareCommit();
// the fnx file should still be under control of the SIS
assertTrue(DefaultSegmentInfosWriter.FORMAT_4_0 < segmentInfos.getFormat());
assertEquals(0, segmentInfos.getLastGlobalFieldMapVersion());
assertEquals(1, segmentInfos.getGlobalFieldMapVersion());
files = writer.getIndexFileNames();
for (String string : files) {
assertFalse(string.endsWith(".fnx"));
}
writer.commit();
// now we should see the fnx file even if this is a 3.x segment
assertTrue(DefaultSegmentInfosWriter.FORMAT_4_0 < segmentInfos.getFormat());
assertEquals(1, segmentInfos.getGlobalFieldMapVersion());
assertEquals(1, segmentInfos.getLastGlobalFieldMapVersion());
files = writer.getIndexFileNames();
assertTrue(files.remove("1.fnx"));
for (String string : files) {
assertFalse(string.endsWith(".fnx"));
}
writer.close();
} finally {
if (dir != null)
dir.close();
_TestUtil.rmDir(oldIndxeDir);
}
}
class KeepAllDeletionPolicy implements IndexDeletionPolicy {
public void onInit(List<? extends IndexCommit> commits) throws IOException {
}
public void onCommit(List<? extends IndexCommit> commits)
throws IOException {
}
}
public static void assertFNXFiles(Directory dir, String... expectedFnxFiles)
throws IOException {
String[] listAll = dir.listAll();
Set<String> fnxFiles = new HashSet<String>();
for (String string : listAll) {
if (string.endsWith(".fnx")) {
fnxFiles.add(string);
}
}
assertEquals("" + fnxFiles, expectedFnxFiles.length, fnxFiles.size());
for (String string : expectedFnxFiles) {
assertTrue(" missing fnx file: " + string, fnxFiles.contains(string));
}
}
}