blob: 2c1e7087a7ea0a4fd35c95655a2645375b843f0c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.SlowDirectory;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.junit.Test;
@SuppressCodecs("SimpleText")
public class TestTaxonomyCombined extends FacetTestCase {
/**
* The following categories will be added to the taxonomy by fillTaxonomy(), and tested by all
* tests below:
*/
private static final String[][] categories = {
{"Author", "Tom Clancy"},
{"Author", "Richard Dawkins"},
{"Author", "Richard Adams"},
{"Price", "10", "11"},
{"Price", "10", "12"},
{"Price", "20", "27"},
{"Date", "2006", "05"},
{"Date", "2005"},
{"Date", "2006"},
{"Subject", "Nonfiction", "Children", "Animals"},
{"Author", "Stephen Jay Gould"},
{"Author", "\u05e0\u05d3\u05d1\u3042\u0628"},
};
/**
* When adding the above categories with TaxonomyWriter.addCategory(), the following paths are
* expected to be returned: (note that currently the full path is not returned, and therefore not
* tested - rather, just the last component, the ordinal, is returned and tested.
*/
private static final int[][] expectedPaths = {
{1, 2},
{1, 3},
{1, 4},
{5, 6, 7},
{5, 6, 8},
{5, 9, 10},
{11, 12, 13},
{11, 14},
{11, 12},
{15, 16, 17, 18},
{1, 19},
{1, 20}
};
/**
* The taxonomy index is expected to then contain the following generated categories, with
* increasing ordinals (note how parent categories are be added automatically when subcategories
* are added).
*/
private static final String[][] expectedCategories = {
{}, // the root category
{"Author"},
{"Author", "Tom Clancy"},
{"Author", "Richard Dawkins"},
{"Author", "Richard Adams"},
{"Price"},
{"Price", "10"},
{"Price", "10", "11"},
{"Price", "10", "12"},
{"Price", "20"},
{"Price", "20", "27"},
{"Date"},
{"Date", "2006"},
{"Date", "2006", "05"},
{"Date", "2005"},
{"Subject"},
{"Subject", "Nonfiction"},
{"Subject", "Nonfiction", "Children"},
{"Subject", "Nonfiction", "Children", "Animals"},
{"Author", "Stephen Jay Gould"},
{"Author", "\u05e0\u05d3\u05d1\u3042\u0628"},
};
/**
* fillTaxonomy adds the categories in the categories[] array, and asserts that the additions
* return exactly the ordinals (in the past - paths) specified in expectedPaths[]. Note that this
* assumes that fillTaxonomy() is called on an empty taxonomy index. Calling it after something
* else was already added to the taxonomy index will surely have this method fail.
*/
public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
for (int i = 0; i < categories.length; i++) {
int ordinal = tw.addCategory(new FacetLabel(categories[i]));
int expectedOrdinal = expectedPaths[i][expectedPaths[i].length - 1];
if (ordinal != expectedOrdinal) {
fail(
"For category "
+ showcat(categories[i])
+ " expected ordinal "
+ expectedOrdinal
+ ", but got "
+ ordinal);
}
}
}
public static String showcat(String[] path) {
if (path == null) {
return "<null>";
}
if (path.length == 0) {
return "<empty>";
}
if (path.length == 1 && path[0].length() == 0) {
return "<\"\">";
}
StringBuilder sb = new StringBuilder(path[0]);
for (int i = 1; i < path.length; i++) {
sb.append('/');
sb.append(path[i]);
}
return sb.toString();
}
private String showcat(FacetLabel path) {
if (path == null) {
return "<null>";
}
if (path.length == 0) {
return "<empty>";
}
return "<" + path.toString() + ">";
}
/**
* Basic tests for TaxonomyWriter. Basically, we test that IndexWriter.addCategory works, i.e.
* returns the expected ordinals (this is tested by calling the fillTaxonomy() method above). We
* do not test here that after writing the index can be read - this will be done in more tests
* below.
*/
@Test
public void testWriter() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// Also check TaxonomyWriter.getSize() - see that the taxonomy's size
// is what we expect it to be.
assertEquals(expectedCategories.length, tw.getSize());
tw.close();
indexDir.close();
}
/**
* testWriterTwice is exactly like testWriter, except that after adding all the categories, we add
* them again, and see that we get the same old ids again - not new categories.
*/
@Test
public void testWriterTwice() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// run fillTaxonomy again - this will try to add the same categories
// again, and check that we see the same ordinal paths again, not
// different ones.
fillTaxonomy(tw);
// Let's check the number of categories again, to see that no
// extraneous categories were created:
assertEquals(expectedCategories.length, tw.getSize());
tw.close();
indexDir.close();
}
/**
* testWriterTwice2 is similar to testWriterTwice, except that the index is closed and reopened
* before attempting to write to it the same categories again. While testWriterTwice can get along
* with writing and reading correctly just to the cache, testWriterTwice2 checks also the actual
* disk read part of the writer:
*/
@Test
public void testWriterTwice2() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
tw = new DirectoryTaxonomyWriter(indexDir);
// run fillTaxonomy again - this will try to add the same categories
// again, and check that we see the same ordinals again, not different
// ones, and that the number of categories hasn't grown by the new
// additions
fillTaxonomy(tw);
assertEquals(expectedCategories.length, tw.getSize());
tw.close();
indexDir.close();
}
/**
* testWriterTwice3 is yet another test which tests creating a taxonomy in two separate writing
* sessions. This test used to fail because of a bug involving commit(), explained below, and now
* should succeed.
*/
@Test
public void testWriterTwice3() throws Exception {
Directory indexDir = newDirectory();
// First, create and fill the taxonomy
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
// Now, open the same taxonomy and add the same categories again.
// After a few categories, the LuceneTaxonomyWriter implementation
// will stop looking for each category on disk, and rather read them
// all into memory and close its reader. The bug was that it closed
// the reader, but forgot that it did (because it didn't set the reader
// reference to null).
tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// Add one new category, just to make commit() do something:
tw.addCategory(new FacetLabel("hi"));
// Do a commit(). Here was a bug - if tw had a reader open, it should
// be reopened after the commit. However, in our case the reader should
// not be open (as explained above) but because it was not set to null,
// we forgot that, tried to reopen it, and got an AlreadyClosedException.
tw.commit();
assertEquals(expectedCategories.length + 1, tw.getSize());
tw.close();
indexDir.close();
}
/**
* Another set of tests for the writer, which don't use an array and try to distill the different
* cases, and therefore may be more helpful for debugging a problem than testWriter() which is
* hard to know why or where it failed.
*/
@Test
public void testWriterSimpler() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
assertEquals(1, tw.getSize()); // the root only
// Test that adding a new top-level category works
assertEquals(1, tw.addCategory(new FacetLabel("a")));
assertEquals(2, tw.getSize());
// Test that adding the same category again is noticed, and the
// same ordinal (and not a new one) is returned.
assertEquals(1, tw.addCategory(new FacetLabel("a")));
assertEquals(2, tw.getSize());
// Test that adding another top-level category returns a new ordinal,
// not the same one
assertEquals(2, tw.addCategory(new FacetLabel("b")));
assertEquals(3, tw.getSize());
// Test that adding a category inside one of the above adds just one
// new ordinal:
assertEquals(3, tw.addCategory(new FacetLabel("a", "c")));
assertEquals(4, tw.getSize());
// Test that adding the same second-level category doesn't do anything:
assertEquals(3, tw.addCategory(new FacetLabel("a", "c")));
assertEquals(4, tw.getSize());
// Test that adding a second-level category with two new components
// indeed adds two categories
assertEquals(5, tw.addCategory(new FacetLabel("d", "e")));
assertEquals(6, tw.getSize());
// Verify that the parents were added above in the order we expected
assertEquals(4, tw.addCategory(new FacetLabel("d")));
// Similar, but inside a category that already exists:
assertEquals(7, tw.addCategory(new FacetLabel("b", "d", "e")));
assertEquals(8, tw.getSize());
// And now inside two levels of categories that already exist:
assertEquals(8, tw.addCategory(new FacetLabel("b", "d", "f")));
assertEquals(9, tw.getSize());
tw.close();
indexDir.close();
}
/**
* Test writing an empty index, and seeing that a reader finds in it the root category, and only
* it. We check all the methods on that root category return the expected results.
*/
@Test
public void testRootOnly() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
// right after opening the index, it should already contain the
// root, so have size 1:
assertEquals(1, tw.getSize());
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length);
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
assertEquals(0, tr.getOrdinal(new FacetLabel()));
tr.close();
indexDir.close();
}
/**
* The following test is exactly the same as testRootOnly, except we do not close the writer
* before opening the reader. We want to see that the root is visible to the reader not only after
* the writer is closed, but immediately after it is created.
*/
@Test
public void testRootOnly2() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length);
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
assertEquals(0, tr.getOrdinal(new FacetLabel()));
tw.close();
tr.close();
indexDir.close();
}
/**
* Basic tests for TaxonomyReader's category &lt;=&gt; ordinal transformations (getSize(),
* getCategory() and getOrdinal()). We test that after writing the index, it can be read and all
* the categories and ordinals are there just as we expected them to be.
*/
@Test
public void testReaderBasic() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// test TaxonomyReader.getSize():
assertEquals(expectedCategories.length, tr.getSize());
// test round trips of ordinal => category => ordinal
for (int i = 0; i < tr.getSize(); i++) {
assertEquals(i, tr.getOrdinal(tr.getPath(i)));
}
// test TaxonomyReader.getCategory():
for (int i = 1; i < tr.getSize(); i++) {
FacetLabel expectedCategory = new FacetLabel(expectedCategories[i]);
FacetLabel category = tr.getPath(i);
if (!expectedCategory.equals(category)) {
fail(
"For ordinal "
+ i
+ " expected category "
+ showcat(expectedCategory)
+ ", but got "
+ showcat(category));
}
}
// (also test invalid ordinals:)
assertNull(tr.getPath(-1));
assertNull(tr.getPath(tr.getSize()));
assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
// test TaxonomyReader.getOrdinal():
for (int i = 1; i < expectedCategories.length; i++) {
int expectedOrdinal = i;
int ordinal = tr.getOrdinal(new FacetLabel(expectedCategories[i]));
if (expectedOrdinal != ordinal) {
fail(
"For category "
+ showcat(expectedCategories[i])
+ " expected ordinal "
+ expectedOrdinal
+ ", but got "
+ ordinal);
}
}
// (also test invalid categories:)
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("non-existant")));
assertEquals(
TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("Author", "Jules Verne")));
tr.close();
indexDir.close();
}
/**
* Tests for TaxonomyReader's getParent() method. We check it by comparing its results to those we
* could have gotten by looking at the category string paths (where the parentage is obvious).
* Note that after testReaderBasic(), we already know we can trust the ordinal &lt;=&gt; category
* conversions.
*
* <p>Note: At the moment, the parent methods in the reader are deprecated, but this does not mean
* they should not be tested! Until they are removed (*if* they are removed), these tests should
* remain to see that they still work correctly.
*/
@Test
public void testReaderParent() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// check that the parent of the root ordinal is the invalid ordinal:
int[] parents = tr.getParallelTaxonomyArrays().parents();
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]);
// check parent of non-root ordinals:
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
FacetLabel me = tr.getPath(ordinal);
int parentOrdinal = parents[ordinal];
FacetLabel parent = tr.getPath(parentOrdinal);
if (parent == null) {
fail(
"Parent of "
+ ordinal
+ " is "
+ parentOrdinal
+ ", but this is not a valid category.");
}
// verify that the parent is indeed my parent, according to the strings
if (!me.subpath(me.length - 1).equals(parent)) {
fail(
"Got parent "
+ parentOrdinal
+ " for ordinal "
+ ordinal
+ " but categories are "
+ showcat(parent)
+ " and "
+ showcat(me)
+ " respectively.");
}
}
tr.close();
indexDir.close();
}
/**
* Tests for TaxonomyWriter's getParent() method. We check it by comparing its results to those we
* could have gotten by looking at the category string paths using a TaxonomyReader (where the
* parentage is obvious). Note that after testReaderBasic(), we already know we can trust the
* ordinal &lt;=&gt; category conversions from TaxonomyReader.
*
* <p>The difference between testWriterParent1 and testWriterParent2 is that the former closes the
* taxonomy writer before reopening it, while the latter does not.
*
* <p>This test code is virtually identical to that of testReaderParent().
*/
@Test
public void testWriterParent1() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
tw = new DirectoryTaxonomyWriter(indexDir);
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
checkWriterParent(tr, tw);
tw.close();
tr.close();
indexDir.close();
}
@Test
public void testWriterParent2() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
checkWriterParent(tr, tw);
tw.close();
tr.close();
indexDir.close();
}
private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception {
// check that the parent of the root ordinal is the invalid ordinal:
assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0));
// check parent of non-root ordinals:
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
FacetLabel me = tr.getPath(ordinal);
int parentOrdinal = tw.getParent(ordinal);
FacetLabel parent = tr.getPath(parentOrdinal);
if (parent == null) {
fail(
"Parent of "
+ ordinal
+ " is "
+ parentOrdinal
+ ", but this is not a valid category.");
}
// verify that the parent is indeed my parent, according to the
// strings
if (!me.subpath(me.length - 1).equals(parent)) {
fail(
"Got parent "
+ parentOrdinal
+ " for ordinal "
+ ordinal
+ " but categories are "
+ showcat(parent)
+ " and "
+ showcat(me)
+ " respectively.");
}
}
// check parent of of invalid ordinals:
expectThrows(
IndexOutOfBoundsException.class,
() -> {
tw.getParent(-1);
});
expectThrows(
IndexOutOfBoundsException.class,
() -> {
tw.getParent(TaxonomyReader.INVALID_ORDINAL);
});
expectThrows(
IndexOutOfBoundsException.class,
() -> {
tw.getParent(tr.getSize());
});
}
/**
* Test TaxonomyReader's child browsing method, getChildrenArrays() This only tests for
* correctness of the data on one example - we have below further tests on data refresh etc.
*/
@Test
public void testChildrenArrays() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
int[] youngestChildArray = ca.children();
assertEquals(tr.getSize(), youngestChildArray.length);
int[] olderSiblingArray = ca.siblings();
assertEquals(tr.getSize(), olderSiblingArray.length);
for (int i = 0; i < expectedCategories.length; i++) {
// find expected children by looking at all expectedCategories
// for children
ArrayList<Integer> expectedChildren = new ArrayList<>();
for (int j = expectedCategories.length - 1; j >= 0; j--) {
if (expectedCategories[j].length != expectedCategories[i].length + 1) {
continue; // not longer by 1, so can't be a child
}
boolean ischild = true;
for (int k = 0; k < expectedCategories[i].length; k++) {
if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
ischild = false;
break;
}
}
if (ischild) {
expectedChildren.add(j);
}
}
// check that children and expectedChildren are the same, with the
// correct reverse (youngest to oldest) order:
if (expectedChildren.size() == 0) {
assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
} else {
int child = youngestChildArray[i];
assertEquals(expectedChildren.get(0).intValue(), child);
for (int j = 1; j < expectedChildren.size(); j++) {
child = olderSiblingArray[child];
assertEquals(expectedChildren.get(j).intValue(), child);
// if child is INVALID_ORDINAL we should stop, but
// assertEquals would fail in this case anyway.
}
// When we're done comparing, olderSiblingArray should now point
// to INVALID_ORDINAL, saying there are no more children. If it
// doesn't, we found too many children...
assertEquals(-1, olderSiblingArray[child]);
}
}
tr.close();
indexDir.close();
}
/**
* Similar to testChildrenArrays, except rather than look at expected results, we test for several
* "invariants" that the results should uphold, e.g., that a child of a category indeed has this
* category as its parent. This sort of test can more easily be extended to larger example
* taxonomies, because we do not need to build the expected list of categories like we did in the
* above test.
*/
@Test
public void testChildrenArraysInvariants() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
int[] children = ca.children();
assertEquals(tr.getSize(), children.length);
int[] olderSiblingArray = ca.siblings();
assertEquals(tr.getSize(), olderSiblingArray.length);
// test that the "youngest child" of every category is indeed a child:
int[] parents = tr.getParallelTaxonomyArrays().parents();
for (int i = 0; i < tr.getSize(); i++) {
int youngestChild = children[i];
if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
assertEquals(i, parents[youngestChild]);
}
}
// test that the "older sibling" of every category is indeed older (lower)
// (it can also be INVALID_ORDINAL, which is lower than any ordinal)
for (int i = 0; i < tr.getSize(); i++) {
assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray[i] < i);
}
// test that the "older sibling" of every category is indeed a sibling
// (they share the same parent)
for (int i = 0; i < tr.getSize(); i++) {
int sibling = olderSiblingArray[i];
if (sibling == TaxonomyReader.INVALID_ORDINAL) {
continue;
}
assertEquals(parents[i], parents[sibling]);
}
// And now for slightly more complex (and less "invariant-like"...)
// tests:
// test that the "youngest child" is indeed the youngest (so we don't
// miss the first children in the chain)
for (int i = 0; i < tr.getSize(); i++) {
// Find the really youngest child:
int j;
for (j = tr.getSize() - 1; j > i; j--) {
if (parents[j] == i) {
break; // found youngest child
}
}
if (j == i) { // no child found
j = TaxonomyReader.INVALID_ORDINAL;
}
assertEquals(j, children[i]);
}
// test that the "older sibling" is indeed the least oldest one - and
// not a too old one or -1 (so we didn't miss some children in the
// middle or the end of the chain).
for (int i = 0; i < tr.getSize(); i++) {
// Find the youngest older sibling:
int j;
for (j = i - 1; j >= 0; j--) {
if (parents[j] == parents[i]) {
break; // found youngest older sibling
}
}
if (j < 0) { // no sibling found
j = TaxonomyReader.INVALID_ORDINAL;
}
assertEquals(j, olderSiblingArray[i]);
}
tr.close();
indexDir.close();
}
/** Test how getChildrenArrays() deals with the taxonomy's growth: */
@Test
public void testChildrenArraysGrowth() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.addCategory(new FacetLabel("hi", "there"));
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
assertEquals(3, tr.getSize());
assertEquals(3, ca.siblings().length);
assertEquals(3, ca.children().length);
assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children()));
assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings()));
tw.addCategory(new FacetLabel("hi", "ho"));
tw.addCategory(new FacetLabel("hello"));
tw.commit();
// Before refresh, nothing changed..
ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
assertSame(newca, ca); // we got exactly the same object
assertEquals(3, tr.getSize());
assertEquals(3, ca.siblings().length);
assertEquals(3, ca.children().length);
// After the refresh, things change:
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
assertNotNull(newtr);
tr.close();
tr = newtr;
ca = tr.getParallelTaxonomyArrays();
assertEquals(5, tr.getSize());
assertEquals(5, ca.siblings().length);
assertEquals(5, ca.children().length);
assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children()));
assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings()));
tw.close();
tr.close();
indexDir.close();
}
// Test that getParentArrays is valid when retrieved during refresh
@Test
public void testTaxonomyReaderRefreshRaces() throws Exception {
// compute base child arrays - after first chunk, and after the other
Directory indexDirBase = newDirectory();
TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
twBase.addCategory(new FacetLabel("a", "0"));
final FacetLabel abPath = new FacetLabel("a", "b");
twBase.addCategory(abPath);
twBase.commit();
TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
final int abOrd = trBase.getOrdinal(abPath);
final int abYoungChildBase1 = ca1.children()[abOrd];
final int numCategories = atLeast(200);
for (int i = 0; i < numCategories; i++) {
twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i)));
}
twBase.close();
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase);
assertNotNull(newTaxoReader);
trBase.close();
trBase = newTaxoReader;
final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
final int abYoungChildBase2 = ca2.children()[abOrd];
int numRetries = atLeast(10);
for (int retry = 0; retry < numRetries; retry++) {
assertConsistentYoungestChild(
abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
}
trBase.close();
indexDirBase.close();
}
private void assertConsistentYoungestChild(
final FacetLabel abPath,
final int abOrd,
final int abYoungChildBase1,
final int abYoungChildBase2,
final int retry,
int numCategories)
throws Exception {
SlowDirectory indexDir = new SlowDirectory(-1, null); // no slowness for initialization
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.addCategory(new FacetLabel("a", "0"));
tw.addCategory(abPath);
tw.commit();
final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
for (int i = 0; i < numCategories; i++) {
final FacetLabel cp = new FacetLabel("a", "b", Integer.toString(i));
tw.addCategory(cp);
assertEquals(
"Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed",
TaxonomyReader.INVALID_ORDINAL,
tr.getOrdinal(cp));
}
tw.close();
final AtomicBoolean stop = new AtomicBoolean(false);
final Throwable[] error = new Throwable[] {null};
final int retrieval[] = {0};
Thread thread =
new Thread("Child Arrays Verifier") {
@Override
public void run() {
setPriority(1 + getPriority());
try {
while (!stop.get()) {
int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1;
assertNotNull(
"path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
}
} catch (Throwable e) {
error[0] = e;
stop.set(true);
}
}
private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
final int abYoungChild = ca.children()[abOrd];
assertTrue(
"Retry "
+ retry
+ ": retrieval: "
+ retrieval
+ ": wrong youngest child for category "
+ abPath
+ " (ord="
+ abOrd
+ ") - must be either "
+ abYoungChildBase1
+ " or "
+ abYoungChildBase2
+ " but was: "
+ abYoungChild,
abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]);
}
};
thread.start();
indexDir.setSleepMillis(1); // some delay for refresh
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
if (newTaxoReader != null) {
newTaxoReader.close();
}
stop.set(true);
thread.join();
assertNull(
"Unexpcted exception at retry "
+ retry
+ " retrieval "
+ retrieval[0]
+ ": \n"
+ stackTraceStr(error[0]),
error[0]);
tr.close();
}
/**
* Grab the stack trace into a string since the exception was thrown in a thread and we want the
* assert outside the thread to show the stack trace in case of failure.
*/
private String stackTraceStr(final Throwable error) {
if (error == null) {
return "";
}
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
error.printStackTrace(pw);
pw.close();
return sw.toString();
}
/**
* Test that if separate reader and writer objects are opened, new categories written into the
* writer are available to a reader only after a commit(). Note that this test obviously doesn't
* cover all the different concurrency scenarios, all different methods, and so on. We may want to
* write more tests of this sort.
*
* <p>This test simulates what would happen when there are two separate processes, one doing
* indexing, and the other searching, and each opens its own object (with obviously no connection
* between the objects) using the same disk files. Note, though, that this test does not test what
* happens when the two processes do their actual work at exactly the same time. It also doesn't
* test multi-threading.
*/
@Test
public void testSeparateReaderAndWriter() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
tw.addCategory(new FacetLabel("Author"));
assertEquals(1, tr.getSize()); // still root only...
assertNull(
TaxonomyReader.openIfChanged(
tr)); // this is not enough, because tw.commit() hasn't been done yet
assertEquals(1, tr.getSize()); // still root only...
tw.commit();
assertEquals(1, tr.getSize()); // still root only...
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
assertNotNull(newTaxoReader);
tr.close();
tr = newTaxoReader;
int author = 1;
try {
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]);
// ok
} catch (ArrayIndexOutOfBoundsException e) {
fail(
"After category addition, commit() and refresh(), getParent for "
+ author
+ " should NOT throw exception");
}
assertEquals(2, tr.getSize()); // finally, see there are two categories
// now, add another category, and verify that after commit and refresh
// the parent of this category is correct (this requires the reader
// to correctly update its prefetched parent vector), and that the
// old information also wasn't ruined:
tw.addCategory(new FacetLabel("Author", "Richard Dawkins"));
int dawkins = 2;
tw.commit();
newTaxoReader = TaxonomyReader.openIfChanged(tr);
assertNotNull(newTaxoReader);
tr.close();
tr = newTaxoReader;
int[] parents = tr.getParallelTaxonomyArrays().parents();
assertEquals(author, parents[dawkins]);
assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]);
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]);
assertEquals(3, tr.getSize());
tw.close();
tr.close();
indexDir.close();
}
@Test
public void testSeparateReaderAndWriter2() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// Test getOrdinal():
FacetLabel author = new FacetLabel("Author");
assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
tw.addCategory(author);
// before commit and refresh, no change:
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
assertEquals(1, tr.getSize()); // still root only...
assertNull(
TaxonomyReader.openIfChanged(
tr)); // this is not enough, because tw.commit() hasn't been done yet
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
assertEquals(1, tr.getSize()); // still root only...
tw.commit();
// still not enough before refresh:
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
assertEquals(1, tr.getSize()); // still root only...
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
assertNotNull(newTaxoReader);
tr.close();
tr = newTaxoReader;
assertEquals(1, tr.getOrdinal(author));
assertEquals(2, tr.getSize());
tw.close();
tr.close();
indexDir.close();
}
/**
* fillTaxonomyCheckPaths adds the categories in the categories[] array, and asserts that the
* additions return exactly paths specified in expectedPaths[]. This is the same add
* fillTaxonomy() but also checks the correctness of getParent(), not just addCategory(). Note
* that this assumes that fillTaxonomyCheckPaths() is called on an empty taxonomy index. Calling
* it after something else was already added to the taxonomy index will surely have this method
* fail.
*/
public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
for (int i = 0; i < categories.length; i++) {
int ordinal = tw.addCategory(new FacetLabel(categories[i]));
int expectedOrdinal = expectedPaths[i][expectedPaths[i].length - 1];
if (ordinal != expectedOrdinal) {
fail(
"For category "
+ showcat(categories[i])
+ " expected ordinal "
+ expectedOrdinal
+ ", but got "
+ ordinal);
}
for (int j = expectedPaths[i].length - 2; j >= 0; j--) {
ordinal = tw.getParent(ordinal);
expectedOrdinal = expectedPaths[i][j];
if (ordinal != expectedOrdinal) {
fail(
"For category "
+ showcat(categories[i])
+ " expected ancestor level "
+ (expectedPaths[i].length - 1 - j)
+ " was "
+ expectedOrdinal
+ ", but got "
+ ordinal);
}
}
}
}
// After fillTaxonomy returned successfully, checkPaths() checks that
// the getParent() calls return as expected, from the table
public static void checkPaths(TaxonomyWriter tw) throws IOException {
for (int i = 0; i < categories.length; i++) {
int ordinal = expectedPaths[i][expectedPaths[i].length - 1];
for (int j = expectedPaths[i].length - 2; j >= 0; j--) {
ordinal = tw.getParent(ordinal);
int expectedOrdinal = expectedPaths[i][j];
if (ordinal != expectedOrdinal) {
fail(
"For category "
+ showcat(categories[i])
+ " expected ancestor level "
+ (expectedPaths[i].length - 1 - j)
+ " was "
+ expectedOrdinal
+ ", but got "
+ ordinal);
}
}
assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0]));
}
assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL));
}
/**
* Basic test for TaxonomyWriter.getParent(). This is similar to testWriter above, except we also
* check the parents of the added categories, not just the categories themselves.
*/
@Test
public void testWriterCheckPaths() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomyCheckPaths(tw);
// Also check TaxonomyWriter.getSize() - see that the taxonomy's size
// is what we expect it to be.
assertEquals(expectedCategories.length, tw.getSize());
tw.close();
indexDir.close();
}
/**
* testWriterCheckPaths2 is the path-checking variant of testWriterTwice and testWriterTwice2.
* After adding all the categories, we add them again, and see that we get the same old ids and
* paths. We repeat the path checking yet again after closing and opening the index for writing
* again - to see that the reading of existing data from disk works as well.
*/
@Test
public void testWriterCheckPaths2() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
checkPaths(tw);
fillTaxonomy(tw);
checkPaths(tw);
tw.close();
tw = new DirectoryTaxonomyWriter(indexDir);
checkPaths(tw);
fillTaxonomy(tw);
checkPaths(tw);
tw.close();
indexDir.close();
}
@Test
public void testNRT() throws Exception {
Directory dir = newDirectory();
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
FacetLabel cp = new FacetLabel("a");
writer.addCategory(cp);
TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader);
assertNotNull("expected a new instance", newReader);
assertEquals(2, newReader.getSize());
assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp));
reader.close();
reader = newReader;
writer.close();
reader.close();
dir.close();
}
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
// using the same object (simulating threads) or different objects
// (simulating processes).
}