| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.facet.taxonomy; |
| |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| import java.io.StringWriter; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| import org.apache.lucene.facet.FacetTestCase; |
| import org.apache.lucene.facet.SlowDirectory; |
| import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; |
| import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| import org.junit.Test; |
| |
| @SuppressCodecs("SimpleText") |
| public class TestTaxonomyCombined extends FacetTestCase { |
| |
| /** |
| * The following categories will be added to the taxonomy by fillTaxonomy(), and tested by all |
| * tests below: |
| */ |
| private static final String[][] categories = { |
| {"Author", "Tom Clancy"}, |
| {"Author", "Richard Dawkins"}, |
| {"Author", "Richard Adams"}, |
| {"Price", "10", "11"}, |
| {"Price", "10", "12"}, |
| {"Price", "20", "27"}, |
| {"Date", "2006", "05"}, |
| {"Date", "2005"}, |
| {"Date", "2006"}, |
| {"Subject", "Nonfiction", "Children", "Animals"}, |
| {"Author", "Stephen Jay Gould"}, |
| {"Author", "\u05e0\u05d3\u05d1\u3042\u0628"}, |
| }; |
| |
| /** |
| * When adding the above categories with TaxonomyWriter.addCategory(), the following paths are |
| * expected to be returned: (note that currently the full path is not returned, and therefore not |
| * tested - rather, just the last component, the ordinal, is returned and tested. |
| */ |
| private static final int[][] expectedPaths = { |
| {1, 2}, |
| {1, 3}, |
| {1, 4}, |
| {5, 6, 7}, |
| {5, 6, 8}, |
| {5, 9, 10}, |
| {11, 12, 13}, |
| {11, 14}, |
| {11, 12}, |
| {15, 16, 17, 18}, |
| {1, 19}, |
| {1, 20} |
| }; |
| |
| /** |
| * The taxonomy index is expected to then contain the following generated categories, with |
| * increasing ordinals (note how parent categories are be added automatically when subcategories |
| * are added). |
| */ |
| private static final String[][] expectedCategories = { |
| {}, // the root category |
| {"Author"}, |
| {"Author", "Tom Clancy"}, |
| {"Author", "Richard Dawkins"}, |
| {"Author", "Richard Adams"}, |
| {"Price"}, |
| {"Price", "10"}, |
| {"Price", "10", "11"}, |
| {"Price", "10", "12"}, |
| {"Price", "20"}, |
| {"Price", "20", "27"}, |
| {"Date"}, |
| {"Date", "2006"}, |
| {"Date", "2006", "05"}, |
| {"Date", "2005"}, |
| {"Subject"}, |
| {"Subject", "Nonfiction"}, |
| {"Subject", "Nonfiction", "Children"}, |
| {"Subject", "Nonfiction", "Children", "Animals"}, |
| {"Author", "Stephen Jay Gould"}, |
| {"Author", "\u05e0\u05d3\u05d1\u3042\u0628"}, |
| }; |
| |
| /** |
| * fillTaxonomy adds the categories in the categories[] array, and asserts that the additions |
| * return exactly the ordinals (in the past - paths) specified in expectedPaths[]. Note that this |
| * assumes that fillTaxonomy() is called on an empty taxonomy index. Calling it after something |
| * else was already added to the taxonomy index will surely have this method fail. |
| */ |
| public static void fillTaxonomy(TaxonomyWriter tw) throws IOException { |
| for (int i = 0; i < categories.length; i++) { |
| int ordinal = tw.addCategory(new FacetLabel(categories[i])); |
| int expectedOrdinal = expectedPaths[i][expectedPaths[i].length - 1]; |
| if (ordinal != expectedOrdinal) { |
| fail( |
| "For category " |
| + showcat(categories[i]) |
| + " expected ordinal " |
| + expectedOrdinal |
| + ", but got " |
| + ordinal); |
| } |
| } |
| } |
| |
| public static String showcat(String[] path) { |
| if (path == null) { |
| return "<null>"; |
| } |
| if (path.length == 0) { |
| return "<empty>"; |
| } |
| if (path.length == 1 && path[0].length() == 0) { |
| return "<\"\">"; |
| } |
| StringBuilder sb = new StringBuilder(path[0]); |
| for (int i = 1; i < path.length; i++) { |
| sb.append('/'); |
| sb.append(path[i]); |
| } |
| return sb.toString(); |
| } |
| |
| private String showcat(FacetLabel path) { |
| if (path == null) { |
| return "<null>"; |
| } |
| if (path.length == 0) { |
| return "<empty>"; |
| } |
| return "<" + path.toString() + ">"; |
| } |
| |
| /** |
| * Basic tests for TaxonomyWriter. Basically, we test that IndexWriter.addCategory works, i.e. |
| * returns the expected ordinals (this is tested by calling the fillTaxonomy() method above). We |
| * do not test here that after writing the index can be read - this will be done in more tests |
| * below. |
| */ |
| @Test |
| public void testWriter() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| // Also check TaxonomyWriter.getSize() - see that the taxonomy's size |
| // is what we expect it to be. |
| assertEquals(expectedCategories.length, tw.getSize()); |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * testWriterTwice is exactly like testWriter, except that after adding all the categories, we add |
| * them again, and see that we get the same old ids again - not new categories. |
| */ |
| @Test |
| public void testWriterTwice() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| // run fillTaxonomy again - this will try to add the same categories |
| // again, and check that we see the same ordinal paths again, not |
| // different ones. |
| fillTaxonomy(tw); |
| // Let's check the number of categories again, to see that no |
| // extraneous categories were created: |
| assertEquals(expectedCategories.length, tw.getSize()); |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * testWriterTwice2 is similar to testWriterTwice, except that the index is closed and reopened |
| * before attempting to write to it the same categories again. While testWriterTwice can get along |
| * with writing and reading correctly just to the cache, testWriterTwice2 checks also the actual |
| * disk read part of the writer: |
| */ |
| @Test |
| public void testWriterTwice2() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| tw = new DirectoryTaxonomyWriter(indexDir); |
| // run fillTaxonomy again - this will try to add the same categories |
| // again, and check that we see the same ordinals again, not different |
| // ones, and that the number of categories hasn't grown by the new |
| // additions |
| fillTaxonomy(tw); |
| assertEquals(expectedCategories.length, tw.getSize()); |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * testWriterTwice3 is yet another test which tests creating a taxonomy in two separate writing |
| * sessions. This test used to fail because of a bug involving commit(), explained below, and now |
| * should succeed. |
| */ |
| @Test |
| public void testWriterTwice3() throws Exception { |
| Directory indexDir = newDirectory(); |
| // First, create and fill the taxonomy |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| // Now, open the same taxonomy and add the same categories again. |
| // After a few categories, the LuceneTaxonomyWriter implementation |
| // will stop looking for each category on disk, and rather read them |
| // all into memory and close its reader. The bug was that it closed |
| // the reader, but forgot that it did (because it didn't set the reader |
| // reference to null). |
| tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| // Add one new category, just to make commit() do something: |
| tw.addCategory(new FacetLabel("hi")); |
| // Do a commit(). Here was a bug - if tw had a reader open, it should |
| // be reopened after the commit. However, in our case the reader should |
| // not be open (as explained above) but because it was not set to null, |
| // we forgot that, tried to reopen it, and got an AlreadyClosedException. |
| tw.commit(); |
| assertEquals(expectedCategories.length + 1, tw.getSize()); |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * Another set of tests for the writer, which don't use an array and try to distill the different |
| * cases, and therefore may be more helpful for debugging a problem than testWriter() which is |
| * hard to know why or where it failed. |
| */ |
| @Test |
| public void testWriterSimpler() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| assertEquals(1, tw.getSize()); // the root only |
| // Test that adding a new top-level category works |
| assertEquals(1, tw.addCategory(new FacetLabel("a"))); |
| assertEquals(2, tw.getSize()); |
| // Test that adding the same category again is noticed, and the |
| // same ordinal (and not a new one) is returned. |
| assertEquals(1, tw.addCategory(new FacetLabel("a"))); |
| assertEquals(2, tw.getSize()); |
| // Test that adding another top-level category returns a new ordinal, |
| // not the same one |
| assertEquals(2, tw.addCategory(new FacetLabel("b"))); |
| assertEquals(3, tw.getSize()); |
| // Test that adding a category inside one of the above adds just one |
| // new ordinal: |
| assertEquals(3, tw.addCategory(new FacetLabel("a", "c"))); |
| assertEquals(4, tw.getSize()); |
| // Test that adding the same second-level category doesn't do anything: |
| assertEquals(3, tw.addCategory(new FacetLabel("a", "c"))); |
| assertEquals(4, tw.getSize()); |
| // Test that adding a second-level category with two new components |
| // indeed adds two categories |
| assertEquals(5, tw.addCategory(new FacetLabel("d", "e"))); |
| assertEquals(6, tw.getSize()); |
| // Verify that the parents were added above in the order we expected |
| assertEquals(4, tw.addCategory(new FacetLabel("d"))); |
| // Similar, but inside a category that already exists: |
| assertEquals(7, tw.addCategory(new FacetLabel("b", "d", "e"))); |
| assertEquals(8, tw.getSize()); |
| // And now inside two levels of categories that already exist: |
| assertEquals(8, tw.addCategory(new FacetLabel("b", "d", "f"))); |
| assertEquals(9, tw.getSize()); |
| |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * Test writing an empty index, and seeing that a reader finds in it the root category, and only |
| * it. We check all the methods on that root category return the expected results. |
| */ |
| @Test |
| public void testRootOnly() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| // right after opening the index, it should already contain the |
| // root, so have size 1: |
| assertEquals(1, tw.getSize()); |
| tw.close(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| assertEquals(1, tr.getSize()); |
| assertEquals(0, tr.getPath(0).length); |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]); |
| assertEquals(0, tr.getOrdinal(new FacetLabel())); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * The following test is exactly the same as testRootOnly, except we do not close the writer |
| * before opening the reader. We want to see that the root is visible to the reader not only after |
| * the writer is closed, but immediately after it is created. |
| */ |
| @Test |
| public void testRootOnly2() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| tw.commit(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| assertEquals(1, tr.getSize()); |
| assertEquals(0, tr.getPath(0).length); |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]); |
| assertEquals(0, tr.getOrdinal(new FacetLabel())); |
| tw.close(); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * Basic tests for TaxonomyReader's category <=> ordinal transformations (getSize(), |
| * getCategory() and getOrdinal()). We test that after writing the index, it can be read and all |
| * the categories and ordinals are there just as we expected them to be. |
| */ |
| @Test |
| public void testReaderBasic() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| |
| // test TaxonomyReader.getSize(): |
| assertEquals(expectedCategories.length, tr.getSize()); |
| |
| // test round trips of ordinal => category => ordinal |
| for (int i = 0; i < tr.getSize(); i++) { |
| assertEquals(i, tr.getOrdinal(tr.getPath(i))); |
| } |
| |
| // test TaxonomyReader.getCategory(): |
| for (int i = 1; i < tr.getSize(); i++) { |
| FacetLabel expectedCategory = new FacetLabel(expectedCategories[i]); |
| FacetLabel category = tr.getPath(i); |
| if (!expectedCategory.equals(category)) { |
| fail( |
| "For ordinal " |
| + i |
| + " expected category " |
| + showcat(expectedCategory) |
| + ", but got " |
| + showcat(category)); |
| } |
| } |
| // (also test invalid ordinals:) |
| assertNull(tr.getPath(-1)); |
| assertNull(tr.getPath(tr.getSize())); |
| assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL)); |
| |
| // test TaxonomyReader.getOrdinal(): |
| for (int i = 1; i < expectedCategories.length; i++) { |
| int expectedOrdinal = i; |
| int ordinal = tr.getOrdinal(new FacetLabel(expectedCategories[i])); |
| if (expectedOrdinal != ordinal) { |
| fail( |
| "For category " |
| + showcat(expectedCategories[i]) |
| + " expected ordinal " |
| + expectedOrdinal |
| + ", but got " |
| + ordinal); |
| } |
| } |
| // (also test invalid categories:) |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("non-existant"))); |
| assertEquals( |
| TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("Author", "Jules Verne"))); |
| |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * Tests for TaxonomyReader's getParent() method. We check it by comparing its results to those we |
| * could have gotten by looking at the category string paths (where the parentage is obvious). |
| * Note that after testReaderBasic(), we already know we can trust the ordinal <=> category |
| * conversions. |
| * |
| * <p>Note: At the moment, the parent methods in the reader are deprecated, but this does not mean |
| * they should not be tested! Until they are removed (*if* they are removed), these tests should |
| * remain to see that they still work correctly. |
| */ |
| @Test |
| public void testReaderParent() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| |
| // check that the parent of the root ordinal is the invalid ordinal: |
| int[] parents = tr.getParallelTaxonomyArrays().parents(); |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]); |
| |
| // check parent of non-root ordinals: |
| for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) { |
| FacetLabel me = tr.getPath(ordinal); |
| int parentOrdinal = parents[ordinal]; |
| FacetLabel parent = tr.getPath(parentOrdinal); |
| if (parent == null) { |
| fail( |
| "Parent of " |
| + ordinal |
| + " is " |
| + parentOrdinal |
| + ", but this is not a valid category."); |
| } |
| // verify that the parent is indeed my parent, according to the strings |
| if (!me.subpath(me.length - 1).equals(parent)) { |
| fail( |
| "Got parent " |
| + parentOrdinal |
| + " for ordinal " |
| + ordinal |
| + " but categories are " |
| + showcat(parent) |
| + " and " |
| + showcat(me) |
| + " respectively."); |
| } |
| } |
| |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * Tests for TaxonomyWriter's getParent() method. We check it by comparing its results to those we |
| * could have gotten by looking at the category string paths using a TaxonomyReader (where the |
| * parentage is obvious). Note that after testReaderBasic(), we already know we can trust the |
| * ordinal <=> category conversions from TaxonomyReader. |
| * |
| * <p>The difference between testWriterParent1 and testWriterParent2 is that the former closes the |
| * taxonomy writer before reopening it, while the latter does not. |
| * |
| * <p>This test code is virtually identical to that of testReaderParent(). |
| */ |
| @Test |
| public void testWriterParent1() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| tw = new DirectoryTaxonomyWriter(indexDir); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| |
| checkWriterParent(tr, tw); |
| |
| tw.close(); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| @Test |
| public void testWriterParent2() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.commit(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| |
| checkWriterParent(tr, tw); |
| |
| tw.close(); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception { |
| // check that the parent of the root ordinal is the invalid ordinal: |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0)); |
| |
| // check parent of non-root ordinals: |
| for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) { |
| FacetLabel me = tr.getPath(ordinal); |
| int parentOrdinal = tw.getParent(ordinal); |
| FacetLabel parent = tr.getPath(parentOrdinal); |
| if (parent == null) { |
| fail( |
| "Parent of " |
| + ordinal |
| + " is " |
| + parentOrdinal |
| + ", but this is not a valid category."); |
| } |
| // verify that the parent is indeed my parent, according to the |
| // strings |
| if (!me.subpath(me.length - 1).equals(parent)) { |
| fail( |
| "Got parent " |
| + parentOrdinal |
| + " for ordinal " |
| + ordinal |
| + " but categories are " |
| + showcat(parent) |
| + " and " |
| + showcat(me) |
| + " respectively."); |
| } |
| } |
| |
| // check parent of of invalid ordinals: |
| expectThrows( |
| IndexOutOfBoundsException.class, |
| () -> { |
| tw.getParent(-1); |
| }); |
| expectThrows( |
| IndexOutOfBoundsException.class, |
| () -> { |
| tw.getParent(TaxonomyReader.INVALID_ORDINAL); |
| }); |
| expectThrows( |
| IndexOutOfBoundsException.class, |
| () -> { |
| tw.getParent(tr.getSize()); |
| }); |
| } |
| |
| /** |
| * Test TaxonomyReader's child browsing method, getChildrenArrays() This only tests for |
| * correctness of the data on one example - we have below further tests on data refresh etc. |
| */ |
| @Test |
| public void testChildrenArrays() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); |
| int[] youngestChildArray = ca.children(); |
| assertEquals(tr.getSize(), youngestChildArray.length); |
| int[] olderSiblingArray = ca.siblings(); |
| assertEquals(tr.getSize(), olderSiblingArray.length); |
| for (int i = 0; i < expectedCategories.length; i++) { |
| // find expected children by looking at all expectedCategories |
| // for children |
| ArrayList<Integer> expectedChildren = new ArrayList<>(); |
| for (int j = expectedCategories.length - 1; j >= 0; j--) { |
| if (expectedCategories[j].length != expectedCategories[i].length + 1) { |
| continue; // not longer by 1, so can't be a child |
| } |
| boolean ischild = true; |
| for (int k = 0; k < expectedCategories[i].length; k++) { |
| if (!expectedCategories[j][k].equals(expectedCategories[i][k])) { |
| ischild = false; |
| break; |
| } |
| } |
| if (ischild) { |
| expectedChildren.add(j); |
| } |
| } |
| // check that children and expectedChildren are the same, with the |
| // correct reverse (youngest to oldest) order: |
| if (expectedChildren.size() == 0) { |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]); |
| } else { |
| int child = youngestChildArray[i]; |
| assertEquals(expectedChildren.get(0).intValue(), child); |
| for (int j = 1; j < expectedChildren.size(); j++) { |
| child = olderSiblingArray[child]; |
| assertEquals(expectedChildren.get(j).intValue(), child); |
| // if child is INVALID_ORDINAL we should stop, but |
| // assertEquals would fail in this case anyway. |
| } |
| // When we're done comparing, olderSiblingArray should now point |
| // to INVALID_ORDINAL, saying there are no more children. If it |
| // doesn't, we found too many children... |
| assertEquals(-1, olderSiblingArray[child]); |
| } |
| } |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * Similar to testChildrenArrays, except rather than look at expected results, we test for several |
| * "invariants" that the results should uphold, e.g., that a child of a category indeed has this |
| * category as its parent. This sort of test can more easily be extended to larger example |
| * taxonomies, because we do not need to build the expected list of categories like we did in the |
| * above test. |
| */ |
| @Test |
| public void testChildrenArraysInvariants() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| tw.close(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); |
| int[] children = ca.children(); |
| assertEquals(tr.getSize(), children.length); |
| int[] olderSiblingArray = ca.siblings(); |
| assertEquals(tr.getSize(), olderSiblingArray.length); |
| |
| // test that the "youngest child" of every category is indeed a child: |
| int[] parents = tr.getParallelTaxonomyArrays().parents(); |
| for (int i = 0; i < tr.getSize(); i++) { |
| int youngestChild = children[i]; |
| if (youngestChild != TaxonomyReader.INVALID_ORDINAL) { |
| assertEquals(i, parents[youngestChild]); |
| } |
| } |
| |
| // test that the "older sibling" of every category is indeed older (lower) |
| // (it can also be INVALID_ORDINAL, which is lower than any ordinal) |
| for (int i = 0; i < tr.getSize(); i++) { |
| assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray[i] < i); |
| } |
| |
| // test that the "older sibling" of every category is indeed a sibling |
| // (they share the same parent) |
| for (int i = 0; i < tr.getSize(); i++) { |
| int sibling = olderSiblingArray[i]; |
| if (sibling == TaxonomyReader.INVALID_ORDINAL) { |
| continue; |
| } |
| assertEquals(parents[i], parents[sibling]); |
| } |
| |
| // And now for slightly more complex (and less "invariant-like"...) |
| // tests: |
| |
| // test that the "youngest child" is indeed the youngest (so we don't |
| // miss the first children in the chain) |
| for (int i = 0; i < tr.getSize(); i++) { |
| // Find the really youngest child: |
| int j; |
| for (j = tr.getSize() - 1; j > i; j--) { |
| if (parents[j] == i) { |
| break; // found youngest child |
| } |
| } |
| if (j == i) { // no child found |
| j = TaxonomyReader.INVALID_ORDINAL; |
| } |
| assertEquals(j, children[i]); |
| } |
| |
| // test that the "older sibling" is indeed the least oldest one - and |
| // not a too old one or -1 (so we didn't miss some children in the |
| // middle or the end of the chain). |
| for (int i = 0; i < tr.getSize(); i++) { |
| // Find the youngest older sibling: |
| int j; |
| for (j = i - 1; j >= 0; j--) { |
| if (parents[j] == parents[i]) { |
| break; // found youngest older sibling |
| } |
| } |
| if (j < 0) { // no sibling found |
| j = TaxonomyReader.INVALID_ORDINAL; |
| } |
| assertEquals(j, olderSiblingArray[i]); |
| } |
| |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** Test how getChildrenArrays() deals with the taxonomy's growth: */ |
| @Test |
| public void testChildrenArraysGrowth() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| tw.addCategory(new FacetLabel("hi", "there")); |
| tw.commit(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); |
| assertEquals(3, tr.getSize()); |
| assertEquals(3, ca.siblings().length); |
| assertEquals(3, ca.children().length); |
| assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children())); |
| assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings())); |
| tw.addCategory(new FacetLabel("hi", "ho")); |
| tw.addCategory(new FacetLabel("hello")); |
| tw.commit(); |
| // Before refresh, nothing changed.. |
| ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays(); |
| assertSame(newca, ca); // we got exactly the same object |
| assertEquals(3, tr.getSize()); |
| assertEquals(3, ca.siblings().length); |
| assertEquals(3, ca.children().length); |
| // After the refresh, things change: |
| TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); |
| assertNotNull(newtr); |
| tr.close(); |
| tr = newtr; |
| ca = tr.getParallelTaxonomyArrays(); |
| assertEquals(5, tr.getSize()); |
| assertEquals(5, ca.siblings().length); |
| assertEquals(5, ca.children().length); |
| assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children())); |
| assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings())); |
| tw.close(); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| // Test that getParentArrays is valid when retrieved during refresh |
| @Test |
| public void testTaxonomyReaderRefreshRaces() throws Exception { |
| // compute base child arrays - after first chunk, and after the other |
| Directory indexDirBase = newDirectory(); |
| TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase); |
| twBase.addCategory(new FacetLabel("a", "0")); |
| final FacetLabel abPath = new FacetLabel("a", "b"); |
| twBase.addCategory(abPath); |
| twBase.commit(); |
| TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase); |
| |
| final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays(); |
| |
| final int abOrd = trBase.getOrdinal(abPath); |
| final int abYoungChildBase1 = ca1.children()[abOrd]; |
| |
| final int numCategories = atLeast(200); |
| for (int i = 0; i < numCategories; i++) { |
| twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i))); |
| } |
| twBase.close(); |
| |
| TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase); |
| assertNotNull(newTaxoReader); |
| trBase.close(); |
| trBase = newTaxoReader; |
| |
| final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays(); |
| final int abYoungChildBase2 = ca2.children()[abOrd]; |
| |
| int numRetries = atLeast(10); |
| for (int retry = 0; retry < numRetries; retry++) { |
| assertConsistentYoungestChild( |
| abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); |
| } |
| |
| trBase.close(); |
| indexDirBase.close(); |
| } |
| |
| private void assertConsistentYoungestChild( |
| final FacetLabel abPath, |
| final int abOrd, |
| final int abYoungChildBase1, |
| final int abYoungChildBase2, |
| final int retry, |
| int numCategories) |
| throws Exception { |
| SlowDirectory indexDir = new SlowDirectory(-1, null); // no slowness for initialization |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| tw.addCategory(new FacetLabel("a", "0")); |
| tw.addCategory(abPath); |
| tw.commit(); |
| |
| final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| for (int i = 0; i < numCategories; i++) { |
| final FacetLabel cp = new FacetLabel("a", "b", Integer.toString(i)); |
| tw.addCategory(cp); |
| assertEquals( |
| "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed", |
| TaxonomyReader.INVALID_ORDINAL, |
| tr.getOrdinal(cp)); |
| } |
| tw.close(); |
| |
| final AtomicBoolean stop = new AtomicBoolean(false); |
| final Throwable[] error = new Throwable[] {null}; |
| final int retrieval[] = {0}; |
| |
| Thread thread = |
| new Thread("Child Arrays Verifier") { |
| @Override |
| public void run() { |
| setPriority(1 + getPriority()); |
| try { |
| while (!stop.get()) { |
| int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1; |
| assertNotNull( |
| "path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd)); |
| assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++); |
| sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms |
| } |
| } catch (Throwable e) { |
| error[0] = e; |
| stop.set(true); |
| } |
| } |
| |
| private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) { |
| final int abYoungChild = ca.children()[abOrd]; |
| assertTrue( |
| "Retry " |
| + retry |
| + ": retrieval: " |
| + retrieval |
| + ": wrong youngest child for category " |
| + abPath |
| + " (ord=" |
| + abOrd |
| + ") - must be either " |
| + abYoungChildBase1 |
| + " or " |
| + abYoungChildBase2 |
| + " but was: " |
| + abYoungChild, |
| abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]); |
| } |
| }; |
| thread.start(); |
| |
| indexDir.setSleepMillis(1); // some delay for refresh |
| TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); |
| if (newTaxoReader != null) { |
| newTaxoReader.close(); |
| } |
| |
| stop.set(true); |
| thread.join(); |
| assertNull( |
| "Unexpcted exception at retry " |
| + retry |
| + " retrieval " |
| + retrieval[0] |
| + ": \n" |
| + stackTraceStr(error[0]), |
| error[0]); |
| |
| tr.close(); |
| } |
| |
| /** |
| * Grab the stack trace into a string since the exception was thrown in a thread and we want the |
| * assert outside the thread to show the stack trace in case of failure. |
| */ |
| private String stackTraceStr(final Throwable error) { |
| if (error == null) { |
| return ""; |
| } |
| StringWriter sw = new StringWriter(); |
| PrintWriter pw = new PrintWriter(sw); |
| error.printStackTrace(pw); |
| pw.close(); |
| return sw.toString(); |
| } |
| |
| /** |
| * Test that if separate reader and writer objects are opened, new categories written into the |
| * writer are available to a reader only after a commit(). Note that this test obviously doesn't |
| * cover all the different concurrency scenarios, all different methods, and so on. We may want to |
| * write more tests of this sort. |
| * |
| * <p>This test simulates what would happen when there are two separate processes, one doing |
| * indexing, and the other searching, and each opens its own object (with obviously no connection |
| * between the objects) using the same disk files. Note, though, that this test does not test what |
| * happens when the two processes do their actual work at exactly the same time. It also doesn't |
| * test multi-threading. |
| */ |
| @Test |
| public void testSeparateReaderAndWriter() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| tw.commit(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| |
| assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root) |
| tw.addCategory(new FacetLabel("Author")); |
| assertEquals(1, tr.getSize()); // still root only... |
| assertNull( |
| TaxonomyReader.openIfChanged( |
| tr)); // this is not enough, because tw.commit() hasn't been done yet |
| assertEquals(1, tr.getSize()); // still root only... |
| tw.commit(); |
| assertEquals(1, tr.getSize()); // still root only... |
| TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); |
| assertNotNull(newTaxoReader); |
| tr.close(); |
| tr = newTaxoReader; |
| |
| int author = 1; |
| try { |
| assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]); |
| // ok |
| } catch (ArrayIndexOutOfBoundsException e) { |
| fail( |
| "After category addition, commit() and refresh(), getParent for " |
| + author |
| + " should NOT throw exception"); |
| } |
| assertEquals(2, tr.getSize()); // finally, see there are two categories |
| |
| // now, add another category, and verify that after commit and refresh |
| // the parent of this category is correct (this requires the reader |
| // to correctly update its prefetched parent vector), and that the |
| // old information also wasn't ruined: |
| tw.addCategory(new FacetLabel("Author", "Richard Dawkins")); |
| int dawkins = 2; |
| tw.commit(); |
| newTaxoReader = TaxonomyReader.openIfChanged(tr); |
| assertNotNull(newTaxoReader); |
| tr.close(); |
| tr = newTaxoReader; |
| int[] parents = tr.getParallelTaxonomyArrays().parents(); |
| assertEquals(author, parents[dawkins]); |
| assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]); |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); |
| assertEquals(3, tr.getSize()); |
| tw.close(); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| @Test |
| public void testSeparateReaderAndWriter2() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| tw.commit(); |
| TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); |
| |
| // Test getOrdinal(): |
| FacetLabel author = new FacetLabel("Author"); |
| |
| assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root) |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); |
| tw.addCategory(author); |
| // before commit and refresh, no change: |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); |
| assertEquals(1, tr.getSize()); // still root only... |
| assertNull( |
| TaxonomyReader.openIfChanged( |
| tr)); // this is not enough, because tw.commit() hasn't been done yet |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); |
| assertEquals(1, tr.getSize()); // still root only... |
| tw.commit(); |
| // still not enough before refresh: |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); |
| assertEquals(1, tr.getSize()); // still root only... |
| TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); |
| assertNotNull(newTaxoReader); |
| tr.close(); |
| tr = newTaxoReader; |
| assertEquals(1, tr.getOrdinal(author)); |
| assertEquals(2, tr.getSize()); |
| tw.close(); |
| tr.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * fillTaxonomyCheckPaths adds the categories in the categories[] array, and asserts that the |
| * additions return exactly paths specified in expectedPaths[]. This is the same add |
| * fillTaxonomy() but also checks the correctness of getParent(), not just addCategory(). Note |
| * that this assumes that fillTaxonomyCheckPaths() is called on an empty taxonomy index. Calling |
| * it after something else was already added to the taxonomy index will surely have this method |
| * fail. |
| */ |
| public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException { |
| for (int i = 0; i < categories.length; i++) { |
| int ordinal = tw.addCategory(new FacetLabel(categories[i])); |
| int expectedOrdinal = expectedPaths[i][expectedPaths[i].length - 1]; |
| if (ordinal != expectedOrdinal) { |
| fail( |
| "For category " |
| + showcat(categories[i]) |
| + " expected ordinal " |
| + expectedOrdinal |
| + ", but got " |
| + ordinal); |
| } |
| for (int j = expectedPaths[i].length - 2; j >= 0; j--) { |
| ordinal = tw.getParent(ordinal); |
| expectedOrdinal = expectedPaths[i][j]; |
| if (ordinal != expectedOrdinal) { |
| fail( |
| "For category " |
| + showcat(categories[i]) |
| + " expected ancestor level " |
| + (expectedPaths[i].length - 1 - j) |
| + " was " |
| + expectedOrdinal |
| + ", but got " |
| + ordinal); |
| } |
| } |
| } |
| } |
| |
| // After fillTaxonomy returned successfully, checkPaths() checks that |
| // the getParent() calls return as expected, from the table |
| public static void checkPaths(TaxonomyWriter tw) throws IOException { |
| for (int i = 0; i < categories.length; i++) { |
| int ordinal = expectedPaths[i][expectedPaths[i].length - 1]; |
| for (int j = expectedPaths[i].length - 2; j >= 0; j--) { |
| ordinal = tw.getParent(ordinal); |
| int expectedOrdinal = expectedPaths[i][j]; |
| if (ordinal != expectedOrdinal) { |
| fail( |
| "For category " |
| + showcat(categories[i]) |
| + " expected ancestor level " |
| + (expectedPaths[i].length - 1 - j) |
| + " was " |
| + expectedOrdinal |
| + ", but got " |
| + ordinal); |
| } |
| } |
| assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0])); |
| } |
| assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL)); |
| } |
| |
| /** |
| * Basic test for TaxonomyWriter.getParent(). This is similar to testWriter above, except we also |
| * check the parents of the added categories, not just the categories themselves. |
| */ |
| @Test |
| public void testWriterCheckPaths() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomyCheckPaths(tw); |
| // Also check TaxonomyWriter.getSize() - see that the taxonomy's size |
| // is what we expect it to be. |
| assertEquals(expectedCategories.length, tw.getSize()); |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| /** |
| * testWriterCheckPaths2 is the path-checking variant of testWriterTwice and testWriterTwice2. |
| * After adding all the categories, we add them again, and see that we get the same old ids and |
| * paths. We repeat the path checking yet again after closing and opening the index for writing |
| * again - to see that the reading of existing data from disk works as well. |
| */ |
| @Test |
| public void testWriterCheckPaths2() throws Exception { |
| Directory indexDir = newDirectory(); |
| TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); |
| fillTaxonomy(tw); |
| checkPaths(tw); |
| fillTaxonomy(tw); |
| checkPaths(tw); |
| tw.close(); |
| |
| tw = new DirectoryTaxonomyWriter(indexDir); |
| checkPaths(tw); |
| fillTaxonomy(tw); |
| checkPaths(tw); |
| tw.close(); |
| indexDir.close(); |
| } |
| |
| @Test |
| public void testNRT() throws Exception { |
| Directory dir = newDirectory(); |
| DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); |
| TaxonomyReader reader = new DirectoryTaxonomyReader(writer); |
| |
| FacetLabel cp = new FacetLabel("a"); |
| writer.addCategory(cp); |
| TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader); |
| assertNotNull("expected a new instance", newReader); |
| assertEquals(2, newReader.getSize()); |
| assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp)); |
| reader.close(); |
| reader = newReader; |
| |
| writer.close(); |
| reader.close(); |
| |
| dir.close(); |
| } |
| |
| // TODO (Facet): test multiple readers, one writer. Have the multiple readers |
| // using the same object (simulating threads) or different objects |
| // (simulating processes). |
| } |