blob: c08eaca5d28226c01f01f839e375e1d7ca8ce342 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.cloud.api.collections.CategoryRoutedAlias;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.UpdateCommand;
import org.apache.solr.util.LogLevel;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CategoryRoutedAliasUpdateProcessorTest extends RoutedAliasUpdateProcessorTest {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// use this for example categories
private static final String[] SHIPS = {
"Constructor",
"Heart of Gold",
"Stunt Ship",
"B-ark",
"Bi$tromath"
};
private static final String categoryField = "ship_name_en";
private static final String intField = "integer_i";
private int lastDocId = 0;
private static CloudSolrClient solrClient;
private int numDocsDeletedOrFailed = 0;
@Before
public void doBefore() throws Exception {
configureCluster(4).configure();
solrClient = getCloudSolrClient(cluster);
//log this to help debug potential causes of problems
if (log.isInfoEnabled()) {
log.info("SolrClient: {}", solrClient);
log.info("ClusterStateProvider {}", solrClient.getClusterStateProvider()); // nowarn
}
}
@After
public void doAfter() throws Exception {
IOUtils.close(solrClient);
if (null != cluster) {
shutdownCluster();
}
}
@AfterClass
public static void cleanUpAfterClass() throws Exception {
solrClient = null;
}
public void testNonEnglish() throws Exception {
// test to document the expected behavior with non-english text for categories
// the present expectation is that non-latin text and many accented latin characters
// will get replaced with '_'. This is necessary to maintain collection naming
// conventions. The net effect is that documents get sorted by the number of characters
// in the category rather than the actual categories.
// This should be changed in an enhancement (wherein the category is RFC-4648 url-safe encoded).
// For now document it as an expected limitation.
String somethingInChinese = "中文的东西"; // 5 chars
String somethingInHebrew = "משהו בסינית"; // 11 chars
String somethingInThai = "บางอย่างในภาษาจีน"; // 17 chars
String somethingInArabic = "شيء في الصينية"; // 14 chars
String somethingInGreek = "κάτι κινεζικό"; // 13 chars
String somethingInGujarati = "િનીમાં કંઈક"; // 11 chars (same as hebrew)
String ONE_ = "_";
String TWO_ = "__";
String THREE_ = "___";
String FOUR_ = "____";
String FIVE_ = "_____";
String collectionChinese = getAlias() + "__CRA__" + FIVE_;
String collectionHebrew = getAlias() + "__CRA__" + FIVE_ + FIVE_ + ONE_;
String collectionThai = getAlias() + "__CRA__" + FIVE_ + FIVE_ + FIVE_ + TWO_;
String collectionArabic = getAlias() + "__CRA__" + FIVE_ + FIVE_ + FOUR_;
String collectionGreek = getAlias() + "__CRA__" + FIVE_ + FIVE_ + THREE_;
// Note Gujarati not listed, because it duplicates hebrew.
String configName = getSaferTestName();
createConfigSet(configName);
List<String> retrievedConfigSetNames = new ConfigSetAdminRequest.List().process(solrClient).getConfigSets();
List<String> expectedConfigSetNames = Arrays.asList("_default", configName);
// config sets leak between tests so we can't be any more specific than this on the next 2 asserts
assertTrue("We expect at least 2 configSets",
retrievedConfigSetNames.size() >= expectedConfigSetNames.size());
assertTrue("ConfigNames should include :" + expectedConfigSetNames, retrievedConfigSetNames.containsAll(expectedConfigSetNames));
CollectionAdminRequest.createCategoryRoutedAlias(getAlias(), categoryField, 20,
CollectionAdminRequest.createCollection("_unused_", configName, 1, 1)
.setMaxShardsPerNode(2))
.process(solrClient);
addDocsAndCommit(true,
newDoc(somethingInChinese),
newDoc(somethingInHebrew),
newDoc(somethingInThai),
newDoc(somethingInArabic),
newDoc(somethingInGreek),
newDoc(somethingInGujarati));
// Note Gujarati not listed, because it duplicates hebrew.
assertInvariants(collectionChinese, collectionHebrew, collectionThai, collectionArabic, collectionGreek);
assertColHasDocCount(collectionChinese, 1);
assertColHasDocCount(collectionHebrew, 2);
assertColHasDocCount(collectionThai, 1);
assertColHasDocCount(collectionArabic, 1);
assertColHasDocCount(collectionGreek, 1);
}
private void assertColHasDocCount(String collectionChinese, int expected) throws SolrServerException, IOException {
final QueryResponse colResponse = solrClient.query(collectionChinese, params(
"q", "*:*",
"rows", "0"));
long aliasNumFound = colResponse.getResults().getNumFound();
assertEquals(expected,aliasNumFound);
}
@Slow
@Test
public void test() throws Exception {
String configName = getSaferTestName();
createConfigSet(configName);
// Start with one collection manually created (and use higher numShards & replicas than we'll use for others)
// This tests we may pre-create the collection and it's acceptable.
final String colVogon = getAlias() + "__CRA__" + SHIPS[0];
// we expect changes ensuring a legal collection name.
final String colHoG = getAlias() + "__CRA__" + noSpaces(SHIPS[1]);
final String colStunt = getAlias() + "__CRA__" + noSpaces(SHIPS[2]);
final String colArk = getAlias() + "__CRA__" + noDashes(SHIPS[3]);
final String colBistro = getAlias() + "__CRA__" + noDollar(SHIPS[4]);
List<String> retrievedConfigSetNames = new ConfigSetAdminRequest.List().process(solrClient).getConfigSets();
List<String> expectedConfigSetNames = Arrays.asList("_default", configName);
// config sets leak between tests so we can't be any more specific than this on the next 2 asserts
assertTrue("We expect at least 2 configSets",
retrievedConfigSetNames.size() >= expectedConfigSetNames.size());
assertTrue("ConfigNames should include :" + expectedConfigSetNames, retrievedConfigSetNames.containsAll(expectedConfigSetNames));
CollectionAdminRequest.createCategoryRoutedAlias(getAlias(), categoryField, 20,
CollectionAdminRequest.createCollection("_unused_", configName, 1, 1)
.setMaxShardsPerNode(2))
.process(solrClient);
// now we index a document
addDocsAndCommit(true, newDoc(SHIPS[0]));
//assertDocRoutedToCol(lastDocId, col23rd);
String uninitialized = getAlias() + "__CRA__" + CategoryRoutedAlias.UNINITIALIZED;
// important to test that we don't try to delete the temp collection on the first document. If we did so
// we would be at risk of out of order execution of the deletion/creation which would leave a window
// of time where there were no collections in the alias. That would likely break all manner of other
// parts of solr.
assertInvariants(colVogon, uninitialized);
addDocsAndCommit(true,
newDoc(SHIPS[1]),
newDoc(SHIPS[2]),
newDoc(SHIPS[3]),
newDoc(SHIPS[4]));
// NOW the temp collection should be gone!
assertInvariants(colVogon, colHoG, colStunt, colArk, colBistro);
// make sure we fail if we have no value to route on.
testFailedDocument(newDoc(null), "Route value is null");
testFailedDocument(newDoc("foo__CRA__bar"), "7 character sequence __CRA__");
testFailedDocument(newDoc("fóóCRAóóbar"), "7 character sequence __CRA__");
}
private String noSpaces(String ship) {
return ship.replaceAll("\\s", "_");
}
private String noDashes(String ship) {
return ship.replaceAll("-", "_");
}
private String noDollar(String ship) {
return ship.replaceAll("\\$", "_");
}
@Slow
@Test
public void testMustMatch() throws Exception {
String configName = getSaferTestName();
createConfigSet(configName);
final String mustMatchRegex = "HHS\\s.+_solr";
final int maxCardinality = Integer.MAX_VALUE; // max cardinality for current test
// Start with one collection manually created (and use higher numShards & replicas than we'll use for others)
// This tests we may pre-create the collection and it's acceptable.
final String colVogon = getAlias() + "__CRA__" + noSpaces("HHS "+ SHIPS[0]) + "_solr";
// we expect changes ensuring a legal collection name.
final String colHoG = getAlias() + "__CRA__" + noSpaces("HHS "+ SHIPS[1]) + "_solr";
List<String> retrievedConfigSetNames = new ConfigSetAdminRequest.List().process(solrClient).getConfigSets();
List<String> expectedConfigSetNames = Arrays.asList("_default", configName);
// config sets leak between tests so we can't be any more specific than this on the next 2 asserts
assertTrue("We expect at least 2 configSets",
retrievedConfigSetNames.size() >= expectedConfigSetNames.size());
assertTrue("ConfigNames should include :" + expectedConfigSetNames, retrievedConfigSetNames.containsAll(expectedConfigSetNames));
CollectionAdminRequest.createCategoryRoutedAlias(getAlias(), categoryField, maxCardinality,
CollectionAdminRequest.createCollection("_unused_", configName, 1, 1)
.setMaxShardsPerNode(2))
.setMustMatch(mustMatchRegex)
.process(solrClient);
// now we index a document
addDocsAndCommit(true, newDoc("HHS " + SHIPS[0] + "_solr"));
//assertDocRoutedToCol(lastDocId, col23rd);
String uninitialized = getAlias() + "__CRA__" + CategoryRoutedAlias.UNINITIALIZED;
assertInvariants(colVogon, uninitialized);
addDocsAndCommit(true, newDoc("HHS "+ SHIPS[1] + "_solr"));
assertInvariants(colVogon, colHoG);
// should fail since max cardinality is reached
testFailedDocument(newDoc(SHIPS[2]), "does not match " + CategoryRoutedAlias.ROUTER_MUST_MATCH);
assertInvariants(colVogon, colHoG);
}
@Slow
@Test
public void testInvalidMustMatch() throws Exception {
String configName = getSaferTestName();
createConfigSet(configName);
// Not a valid regex
final String mustMatchRegex = "+_solr";
final int maxCardinality = Integer.MAX_VALUE; // max cardinality for current test
List<String> retrievedConfigSetNames = new ConfigSetAdminRequest.List().process(solrClient).getConfigSets();
List<String> expectedConfigSetNames = Arrays.asList("_default", configName);
// config sets leak between tests so we can't be any more specific than this on the next 2 asserts
assertTrue("We expect at least 2 configSets",
retrievedConfigSetNames.size() >= expectedConfigSetNames.size());
assertTrue("ConfigNames should include :" + expectedConfigSetNames, retrievedConfigSetNames.containsAll(expectedConfigSetNames));
SolrException e = expectThrows(SolrException.class, () -> CollectionAdminRequest.createCategoryRoutedAlias(getAlias(), categoryField, maxCardinality,
CollectionAdminRequest.createCollection("_unused_", configName, 1, 1)
.setMaxShardsPerNode(2))
.setMustMatch(mustMatchRegex)
.process(solrClient)
);
assertTrue("Create Alias should fail since router.mustMatch must be a valid regular expression",
e.getMessage().contains("router.mustMatch must be a valid regular expression"));
}
@Slow
@Test
public void testMaxCardinality() throws Exception {
String configName = getSaferTestName();
createConfigSet(configName);
final int maxCardinality = 2; // max cardinality for current test
// Start with one collection manually created (and use higher numShards & replicas than we'll use for others)
// This tests we may pre-create the collection and it's acceptable.
final String colVogon = getAlias() + "__CRA__" + SHIPS[0];
// we expect changes ensuring a legal collection name.
final String colHoG = getAlias() + "__CRA__" + SHIPS[1].replaceAll("\\s", "_");
List<String> retrievedConfigSetNames = new ConfigSetAdminRequest.List().process(solrClient).getConfigSets();
List<String> expectedConfigSetNames = Arrays.asList("_default", configName);
// config sets leak between tests so we can't be any more specific than this on the next 2 asserts
assertTrue("We expect at least 2 configSets",
retrievedConfigSetNames.size() >= expectedConfigSetNames.size());
assertTrue("ConfigNames should include :" + expectedConfigSetNames, retrievedConfigSetNames.containsAll(expectedConfigSetNames));
CollectionAdminRequest.createCategoryRoutedAlias(getAlias(), categoryField, maxCardinality,
CollectionAdminRequest.createCollection("_unused_", configName, 1, 1)
.setMaxShardsPerNode(2))
.process(solrClient);
// now we index a document
addDocsAndCommit(true, newDoc(SHIPS[0]));
//assertDocRoutedToCol(lastDocId, col23rd);
String uninitialized = getAlias() + "__CRA__" + CategoryRoutedAlias.UNINITIALIZED;
assertInvariants(colVogon, uninitialized);
addDocsAndCommit(true, newDoc(SHIPS[1]));
assertInvariants(colVogon, colHoG);
// should fail since max cardinality is reached
testFailedDocument(newDoc(SHIPS[2]), "Max cardinality");
assertInvariants(colVogon, colHoG);
}
/**
* Test that the Update Processor Factory routes documents to leader shards and thus
* avoids the possibility of introducing an extra hop to find the leader.
*
* @throws Exception when it blows up unexpectedly :)
*/
@Slow
@Test
@LogLevel("org.apache.solr.update.processor.TrackingUpdateProcessorFactory=DEBUG")
public void testSliceRouting() throws Exception {
String configName = getSaferTestName();
createConfigSet(configName);
// each collection has 4 shards with 3 replicas for 12 possible destinations
// 4 of which are leaders, and 8 of which should fail this test.
final int numShards = 1 + random().nextInt(4);
final int numReplicas = 1 + random().nextInt(3);
CollectionAdminRequest.createCategoryRoutedAlias(getAlias(), categoryField, 20,
CollectionAdminRequest.createCollection("_unused_", configName, numShards, numReplicas)
.setMaxShardsPerNode(numReplicas))
.process(solrClient);
// cause some collections to be created
assertUpdateResponse(solrClient.add(getAlias(), new SolrInputDocument("id","1",categoryField, SHIPS[0])));
assertUpdateResponse(solrClient.add(getAlias(), new SolrInputDocument("id","2",categoryField, SHIPS[1])));
assertUpdateResponse(solrClient.add(getAlias(), new SolrInputDocument("id","3",categoryField, SHIPS[2])));
assertUpdateResponse(solrClient.commit(getAlias()));
// wait for all the collections to exist...
waitColAndAlias(getAlias(), "__CRA__", SHIPS[0], numShards);
waitColAndAlias(getAlias(), "__CRA__", noSpaces(SHIPS[1]), numShards);
waitColAndAlias(getAlias(), "__CRA__", noSpaces(SHIPS[2]), numShards);
// at this point we now have 3 collections with 4 shards each, and 3 replicas per shard for a total of
// 36 total replicas, 1/3 of which are leaders. We will add 3 docs and each has a 33% chance of hitting a
// leader randomly and not causing a failure if the code is broken, but as a whole this test will therefore only have
// about a 3.6% false positive rate (0.33^3). If that's not good enough, add more docs or more replicas per shard :).
final String trackGroupName = getTrackUpdatesGroupName();
final List<UpdateCommand> updateCommands;
try {
TrackingUpdateProcessorFactory.startRecording(trackGroupName);
ModifiableSolrParams params = params("post-processor", "tracking-" + trackGroupName);
List<SolrInputDocument> list = Arrays.asList(
sdoc("id", "4", categoryField, SHIPS[0]),
sdoc("id", "5", categoryField, SHIPS[1]),
sdoc("id", "6", categoryField, SHIPS[2]));
Collections.shuffle(list, random()); // order should not matter here
assertUpdateResponse(add(getAlias(), list,
params));
} finally {
updateCommands = TrackingUpdateProcessorFactory.stopRecording(trackGroupName);
}
assertRouting(numShards, updateCommands);
}
private void assertInvariants(String... expectedColls) throws IOException, SolrServerException {
final int expectNumFound = lastDocId - numDocsDeletedOrFailed; //lastDocId is effectively # generated docs
List<String> cols = new CollectionAdminRequest.ListAliases().process(solrClient).getAliasesAsLists().get(getAlias());
cols = new ArrayList<>(cols);
cols.sort(String::compareTo); // don't really care about the order here.
assert !cols.isEmpty();
int totalNumFound = 0;
for (String col : cols) {
final QueryResponse colResponse = solrClient.query(col, params(
"q", "*:*",
"rows", "0"));
long numFound = colResponse.getResults().getNumFound();
if (numFound > 0) {
totalNumFound += numFound;
}
}
final QueryResponse colResponse = solrClient.query(getAlias(), params(
"q", "*:*",
"rows", "0"));
long aliasNumFound = colResponse.getResults().getNumFound();
List<String> actual = Arrays.asList(expectedColls);
actual.sort(String::compareTo);
assertArrayEquals("Expected " + expectedColls.length + " collections, found " + cols.size() + ":\n" +
cols + " vs \n" + actual, expectedColls, cols.toArray());
assertEquals("Expected collections and alias to have same number of documents",
aliasNumFound, totalNumFound);
assertEquals("Expected to find " + expectNumFound + " docs but found " + aliasNumFound,
expectNumFound, aliasNumFound);
}
private SolrInputDocument newDoc(String routedValue) {
if (routedValue != null) {
return sdoc("id", Integer.toString(++lastDocId),
categoryField, routedValue,
intField, "0"); // always 0
} else {
return sdoc("id", Integer.toString(++lastDocId),
intField, "0"); // always 0
}
}
@Override
public String getAlias() {
return "myAlias";
}
@Override
public CloudSolrClient getSolrClient() {
return solrClient;
}
public static class IncrementURPFactory extends FieldMutatingUpdateProcessorFactory {
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
return FieldValueMutatingUpdateProcessor.valueMutator(getSelector(), next,
(src) -> Integer.valueOf(src.toString()) + 1);
}
}
private void testFailedDocument(SolrInputDocument sdoc, String errorMsg) throws SolrServerException, IOException {
try {
final UpdateResponse resp = solrClient.add(getAlias(), sdoc);
// if we have a TolerantUpdateProcessor then we see it there)
final Object errors = resp.getResponseHeader().get("errors"); // Tolerant URP
assertNotNull(errors);
assertTrue("Expected to find " + errorMsg + " in errors: " + errors.toString(),errors.toString().contains(errorMsg));
} catch (SolrException e) {
assertTrue(e.getMessage().contains(errorMsg));
}
++numDocsDeletedOrFailed;
}
}