blob: 12f381ce29b5584485427540c69ee478c7650038 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
public class TestDirectoryTaxonomyWriter extends FacetTestCase {
// A No-Op TaxonomyWriterCache which always discards all given categories, and
// always returns true in put(), to indicate some cache entries were cleared.
private static TaxonomyWriterCache NO_OP_CACHE = new TaxonomyWriterCache() {
@Override
public void close() {}
@Override
public int get(FacetLabel categoryPath) { return -1; }
@Override
public boolean put(FacetLabel categoryPath, int ordinal) { return true; }
@Override
public boolean isFull() { return true; }
@Override
public void clear() {}
@Override
public int size() { return 0; }
};
@Test
public void testCommit() throws Exception {
// Verifies that nothing is committed to the underlying Directory, if
// commit() wasn't called.
Directory dir = newDirectory();
DirectoryTaxonomyWriter ltw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
assertFalse(DirectoryReader.indexExists(dir));
ltw.commit(); // first commit, so that an index will be created
ltw.addCategory(new FacetLabel("a"));
IndexReader r = DirectoryReader.open(dir);
assertEquals("No categories should have been committed to the underlying directory", 1, r.numDocs());
r.close();
ltw.close();
dir.close();
}
@Test
public void testCommitUserData() throws Exception {
// Verifies taxonomy commit data
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
assertTrue(taxoWriter.getCache() == NO_OP_CACHE);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.addCategory(new FacetLabel("b"));
Map<String, String> userCommitData = new HashMap<>();
userCommitData.put("testing", "1 2 3");
taxoWriter.setLiveCommitData(userCommitData.entrySet());
taxoWriter.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals("2 categories plus root should have been committed to the underlying directory", 3, r.numDocs());
Map <String, String> readUserCommitData = r.getIndexCommit().getUserData();
assertTrue("wrong value extracted from commit data",
"1 2 3".equals(readUserCommitData.get("testing")));
assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
r.close();
// open DirTaxoWriter again and commit, INDEX_EPOCH should still exist
// in the commit data, otherwise DirTaxoReader.refresh() might not detect
// that the taxonomy index has been recreated.
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
taxoWriter.addCategory(new FacetLabel("c")); // add a category so that commit will happen
taxoWriter.setLiveCommitData(new HashMap<String, String>(){{
put("just", "data");
}}.entrySet());
taxoWriter.commit();
// verify taxoWriter.getCommitData()
Map<String,String> data = new HashMap<>();
Iterable<Map.Entry<String,String>> iter = taxoWriter.getLiveCommitData();
if (iter != null) {
for(Map.Entry<String,String> ent : iter) {
data.put(ent.getKey(), ent.getValue());
}
}
assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH
+ " not found in taoxWriter.commitData", data.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
taxoWriter.close();
r = DirectoryReader.open(dir);
readUserCommitData = r.getIndexCommit().getUserData();
assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
r.close();
dir.close();
}
@Test
public void testRollback() throws Exception {
// Verifies that if rollback is called, DTW is closed.
Directory dir = newDirectory();
DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
assertTrue(dtw.getCache() instanceof UTF8TaxonomyWriterCache);
dtw.addCategory(new FacetLabel("a"));
dtw.rollback();
// should not have succeeded to add a category following rollback.
expectThrows(AlreadyClosedException.class, () -> {
dtw.addCategory(new FacetLabel("a"));
});
dir.close();
}
@Test
public void testRecreateRollback() throws Exception {
// Tests rollback with OpenMode.CREATE
Directory dir = newDirectory();
new DirectoryTaxonomyWriter(dir).close();
assertEquals(1, getEpoch(dir));
new DirectoryTaxonomyWriter(dir, OpenMode.CREATE).rollback();
assertEquals(1, getEpoch(dir));
dir.close();
}
@Test
public void testEnsureOpen() throws Exception {
// verifies that an exception is thrown if DTW was closed
Directory dir = newDirectory();
DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
dtw.close();
// should not succeed to add a category following close.
expectThrows(AlreadyClosedException.class, () -> {
dtw.addCategory(new FacetLabel("a"));
});
dir.close();
}
private void touchTaxo(DirectoryTaxonomyWriter taxoWriter, FacetLabel cp) throws IOException {
taxoWriter.addCategory(cp);
taxoWriter.setLiveCommitData(new HashMap<String, String>(){{
put("just", "data");
}}.entrySet());
taxoWriter.commit();
}
@Test
public void testRecreateAndRefresh() throws Exception {
// DirTaxoWriter lost the INDEX_EPOCH property if it was opened in
// CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
// DirTaxoReader succeeding to refresh().
try (Directory dir = newDirectory()) {
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
touchTaxo(taxoWriter, new FacetLabel("a"));
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
touchTaxo(taxoWriter, new FacetLabel("b"));
TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
taxoReader.close();
taxoReader = newtr;
assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
// now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
touchTaxo(taxoWriter, new FacetLabel("c"));
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
touchTaxo(taxoWriter, new FacetLabel("d"));
taxoWriter.close();
newtr = TaxonomyReader.openIfChanged(taxoReader);
taxoReader.close();
taxoReader = newtr;
assertEquals(2, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
taxoReader.close();
}
}
@Test
public void testBackwardsCompatibility() throws Exception {
// tests that if the taxonomy index doesn't have the INDEX_EPOCH
// property (supports pre-3.6 indexes), all still works.
Directory dir = newDirectory();
// create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1.
new IndexWriter(dir, new IndexWriterConfig(null)).close();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
taxoWriter.close();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
assertNull(TaxonomyReader.openIfChanged(taxoReader));
taxoReader.close();
dir.close();
}
public void testConcurrency() throws Exception {
final int ncats = TEST_NIGHTLY ? atLeast(100000) : atLeast(1000); // at night, add many categories
final int range = ncats * 3; // affects the categories selection
final AtomicInteger numCats = new AtomicInteger(ncats);
final Directory dir = newDirectory();
final ConcurrentHashMap<String,String> values = new ConcurrentHashMap<>();
final double d = random().nextDouble();
final TaxonomyWriterCache cache;
if (d < 0.7) {
// this is the fastest, yet most memory consuming
cache = new UTF8TaxonomyWriterCache();
} else if (TEST_NIGHTLY && d > 0.98) {
// this is the slowest, but tests the writer concurrency when no caching is done.
// only pick it during NIGHTLY tests, and even then, with very low chances.
cache = NO_OP_CACHE;
} else {
// this is slower than UTF8, but less memory consuming, and exercises finding categories on disk too.
cache = new LruTaxonomyWriterCache(ncats / 10);
}
if (VERBOSE) {
System.out.println("TEST: use cache=" + cache);
}
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
Thread[] addThreads = new Thread[atLeast(4)];
for (int z = 0; z < addThreads.length; z++) {
addThreads[z] = new Thread() {
@Override
public void run() {
Random random = random();
while (numCats.decrementAndGet() > 0) {
try {
int value = random.nextInt(range);
FacetLabel cp = new FacetLabel(Integer.toString(value / 1000), Integer.toString(value / 10000),
Integer.toString(value / 100000), Integer.toString(value));
int ord = tw.addCategory(cp);
assertTrue("invalid parent for ordinal " + ord + ", category " + cp, tw.getParent(ord) != -1);
String l1 = FacetsConfig.pathToString(cp.components, 1);
String l2 = FacetsConfig.pathToString(cp.components, 2);
String l3 = FacetsConfig.pathToString(cp.components, 3);
String l4 = FacetsConfig.pathToString(cp.components, 4);
values.put(l1, l1);
values.put(l2, l2);
values.put(l3, l3);
values.put(l4, l4);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
};
}
for (Thread t : addThreads) t.start();
for (Thread t : addThreads) t.join();
tw.close();
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
// +1 for root category
if (values.size() + 1 != dtr.getSize()) {
for(String value : values.keySet()) {
FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
if (dtr.getOrdinal(label) == -1) {
System.out.println("FAIL: path=" + label + " not recognized");
}
}
fail("mismatch number of categories");
}
int[] parents = dtr.getParallelTaxonomyArrays().parents();
for (String cat : values.keySet()) {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
int level = cp.length;
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
FacetLabel path = new FacetLabel();
for (int i = 0; i < level; i++) {
path = cp.subpath(i + 1);
int ord = dtr.getOrdinal(path);
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
parentOrd = ord; // next level should have this parent
}
}
IOUtils.close(dtr, dir);
}
private long getEpoch(Directory taxoDir) throws IOException {
SegmentInfos infos = SegmentInfos.readLatestCommit(taxoDir);
return Long.parseLong(infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH));
}
@Test
public void testReplaceTaxonomy() throws Exception {
Directory input = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
int ordA = taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.close();
Directory dir = newDirectory();
taxoWriter = new DirectoryTaxonomyWriter(dir);
int ordB = taxoWriter.addCategory(new FacetLabel("b"));
taxoWriter.addCategory(new FacetLabel("c"));
taxoWriter.commit();
long origEpoch = getEpoch(dir);
// replace the taxonomy with the input one
taxoWriter.replaceTaxonomy(input);
// LUCENE-4633: make sure that category "a" is not added again in any case
taxoWriter.addTaxonomy(input, new MemoryOrdinalMap());
assertEquals("no categories should have been added", 2, taxoWriter.getSize()); // root + 'a'
assertEquals("category 'a' received new ordinal?", ordA, taxoWriter.addCategory(new FacetLabel("a")));
// add the same category again -- it should not receive the same ordinal !
int newOrdB = taxoWriter.addCategory(new FacetLabel("b"));
assertNotSame("new ordinal cannot be the original ordinal", ordB, newOrdB);
assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdB);
taxoWriter.close();
long newEpoch = getEpoch(dir);
assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch);
dir.close();
input.close();
}
@Test
public void testReaderFreshness() throws Exception {
// ensures that the internal index reader is always kept fresh. Previously,
// this simple scenario failed, if the cache just evicted the category that
// is being added.
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
int o1 = taxoWriter.addCategory(new FacetLabel("a"));
int o2 = taxoWriter.addCategory(new FacetLabel("a"));
assertTrue("ordinal for same category that is added twice should be the same !", o1 == o2);
taxoWriter.close();
dir.close();
}
@Test
public void testCommitNoEmptyCommits() throws Exception {
// LUCENE-4972: DTW used to create empty commits even if no changes were made
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.commit();
long gen1 = SegmentInfos.getLastCommitGeneration(dir);
taxoWriter.commit();
long gen2 = SegmentInfos.getLastCommitGeneration(dir);
assertEquals("empty commit should not have changed the index", gen1, gen2);
taxoWriter.close();
dir.close();
}
@Test
public void testCloseNoEmptyCommits() throws Exception {
// LUCENE-4972: DTW used to create empty commits even if no changes were made
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.commit();
long gen1 = SegmentInfos.getLastCommitGeneration(dir);
taxoWriter.close();
long gen2 = SegmentInfos.getLastCommitGeneration(dir);
assertEquals("empty commit should not have changed the index", gen1, gen2);
taxoWriter.close();
dir.close();
}
@Test
public void testPrepareCommitNoEmptyCommits() throws Exception {
// LUCENE-4972: DTW used to create empty commits even if no changes were made
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.prepareCommit();
taxoWriter.commit();
long gen1 = SegmentInfos.getLastCommitGeneration(dir);
taxoWriter.prepareCommit();
taxoWriter.commit();
long gen2 = SegmentInfos.getLastCommitGeneration(dir);
assertEquals("empty commit should not have changed the index", gen1, gen2);
taxoWriter.close();
dir.close();
}
// TODO: this test can hit pathological cases: it adds only a few docs, what is going on?
@Test @Nightly
public void testHugeLabel() throws Exception {
Directory indexDir = newDirectory(), taxoDir = newDirectory();
IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new UTF8TaxonomyWriterCache());
FacetsConfig config = new FacetsConfig();
// Add one huge label:
String bigs = null;
int ordinal = -1;
int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator
bigs = TestUtil.randomSimpleString(random(), len, len);
FacetField ff = new FacetField("dim", bigs);
FacetLabel cp = new FacetLabel("dim", bigs);
ordinal = taxoWriter.addCategory(cp);
Document doc = new Document();
doc.add(ff);
indexWriter.addDocument(config.build(taxoWriter, doc));
// Add tiny ones to cause a re-hash
for (int i = 0; i < 3; i++) {
String s = TestUtil.randomSimpleString(random(), 1, 10);
taxoWriter.addCategory(new FacetLabel("dim", s));
doc = new Document();
doc.add(new FacetField("dim", s));
indexWriter.addDocument(config.build(taxoWriter, doc));
}
// when too large components were allowed to be added, this resulted in a new added category
assertEquals(ordinal, taxoWriter.addCategory(cp));
indexWriter.close();
IOUtils.close(taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig());
ddq.add("dim", bigs);
assertEquals(1, searcher.search(ddq, 10).totalHits.value);
IOUtils.close(indexReader, taxoReader, indexDir, taxoDir);
}
@Test
public void testReplaceTaxoWithLargeTaxonomy() throws Exception {
Directory srcTaxoDir = newDirectory(), targetTaxoDir = newDirectory();
// build source, large, taxonomy
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(srcTaxoDir);
int ord = taxoWriter.addCategory(new FacetLabel("A", "1", "1", "1", "1", "1", "1"));
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(targetTaxoDir);
int ordinal = taxoWriter.addCategory(new FacetLabel("B", "1"));
assertEquals(1, taxoWriter.getParent(ordinal)); // call getParent to initialize taxoArrays
taxoWriter.commit();
taxoWriter.replaceTaxonomy(srcTaxoDir);
assertEquals(ord - 1, taxoWriter.getParent(ord));
taxoWriter.close();
srcTaxoDir.close();
targetTaxoDir.close();
}
}