blob: 3f289d00a7267e524158a55626c95fb254e0be1c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.testing.randomwalk.shard;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Random;
import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.IntersectingIterator;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.testing.randomwalk.RandWalkEnv;
import org.apache.accumulo.testing.randomwalk.State;
import org.apache.accumulo.testing.randomwalk.Test;
import org.apache.hadoop.io.Text;
public class Grep extends Test {
@Override
public void visit(State state, RandWalkEnv env, Properties props) throws Exception {
// pick a few randoms words... grep for those words and search the index
// ensure both return the same set of documents
String indexTableName = (String) state.get("indexTableName");
String dataTableName = (String) state.get("docTableName");
Random rand = (Random) state.get("rand");
Text words[] = new Text[rand.nextInt(4) + 2];
for (int i = 0; i < words.length; i++) {
words[i] = new Text(Insert.generateRandomWord(rand));
}
BatchScanner bs = env.getAccumuloClient().createBatchScanner(indexTableName,
Authorizations.EMPTY, 16);
IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class.getName());
IntersectingIterator.setColumnFamilies(ii, words);
bs.addScanIterator(ii);
bs.setRanges(Collections.singleton(new Range()));
HashSet<Text> documentsFoundInIndex = new HashSet<>();
for (Entry<Key,Value> entry2 : bs) {
documentsFoundInIndex.add(entry2.getKey().getColumnQualifier());
}
bs.close();
bs = env.getAccumuloClient().createBatchScanner(dataTableName, Authorizations.EMPTY, 16);
for (int i = 0; i < words.length; i++) {
IteratorSetting more = new IteratorSetting(20 + i, "ii" + i, RegExFilter.class);
RegExFilter.setRegexs(more, null, null, null, "(^|(.*\\s))" + words[i] + "($|(\\s.*))",
false);
bs.addScanIterator(more);
}
bs.setRanges(Collections.singleton(new Range()));
HashSet<Text> documentsFoundByGrep = new HashSet<>();
for (Entry<Key,Value> entry2 : bs) {
documentsFoundByGrep.add(entry2.getKey().getRow());
}
bs.close();
if (!documentsFoundInIndex.equals(documentsFoundByGrep)) {
throw new Exception(
"Set of documents found not equal for words " + Arrays.asList(words).toString() + " "
+ documentsFoundInIndex + " " + documentsFoundByGrep);
}
log.debug("Grep and index agree " + Arrays.asList(words).toString() + " "
+ documentsFoundInIndex.size());
}
}