blob: cbeefd90d9500f87d0398e8e3598f9ad7b354345 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.examples.wikisearch.logic;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import junit.framework.Assert;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
import org.apache.accumulo.examples.wikisearch.sample.Document;
import org.apache.accumulo.examples.wikisearch.sample.Field;
import org.apache.accumulo.examples.wikisearch.sample.Results;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.mapreduce.task.MapContextImpl;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.security.Credentials;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.Before;
import org.junit.Test;
public class TestQueryLogic {
private static final String METADATA_TABLE_NAME = "wikiMetadata";
private static final String TABLE_NAME = "wiki";
private static final String INDEX_TABLE_NAME = "wikiIndex";
private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
private static final String TABLE_NAMES[] = {METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME};
private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
@Override
public void write(Text key, Mutation value) throws IOException, InterruptedException {
try {
writerMap.get(key).addMutation(value);
} catch (MutationsRejectedException e) {
throw new IOException("Error adding mutation", e);
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
try {
for (BatchWriter w : writerMap.values()) {
w.flush();
w.close();
}
} catch (MutationsRejectedException e) {
throw new IOException("Error closing Batch Writer", e);
}
}
}
private Connector c = null;
private Configuration conf = new Configuration();
private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
private QueryLogic table = null;
@Before
public void setup() throws Exception {
Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);
conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
MockInstance i = new MockInstance();
c = i.getConnector("root", new PasswordToken(""));
WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
for (String table : TABLE_NAMES) {
writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
}
TaskAttemptID id = new TaskAttemptID( "fake", 1, TaskType.MAP, 1, 1);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, id);
RawLocalFileSystem fs = new RawLocalFileSystem();
fs.setConf(conf);
URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
Assert.assertNotNull(url);
File data = new File(url.toURI());
Path tmpFile = new Path(data.getAbsolutePath());
// Setup the Mapper
WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
AggregatingRecordReader rr = new AggregatingRecordReader();
Path ocPath = new Path(tmpFile, "oc");
OutputCommitter oc = new FileOutputCommitter(ocPath, context);
fs.deleteOnExit(ocPath);
StandaloneStatusReporter sr = new StandaloneStatusReporter();
rr.initialize(split, context);
MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
WikipediaMapper mapper = new WikipediaMapper();
// there are times I wonder, "Why do Java people think this is good?" then I drink more whiskey
final MapContextImpl<LongWritable,Text,Text,Mutation> mapContext = new MapContextImpl<LongWritable,Text,Text,Mutation>(conf, id, rr, rw, oc, sr, split);
// Load data into Mock Accumulo
Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context() {
/**
* Get the input split for this map.
*/
public InputSplit getInputSplit() {
return mapContext.getInputSplit();
}
@Override
public LongWritable getCurrentKey() throws IOException, InterruptedException {
return mapContext.getCurrentKey();
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
return mapContext.getCurrentValue();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
return mapContext.nextKeyValue();
}
@Override
public Counter getCounter(Enum<?> counterName) {
return mapContext.getCounter(counterName);
}
@Override
public Counter getCounter(String groupName, String counterName) {
return mapContext.getCounter(groupName, counterName);
}
@Override
public OutputCommitter getOutputCommitter() {
return mapContext.getOutputCommitter();
}
@Override
public void write(Text key, Mutation value) throws IOException,
InterruptedException {
mapContext.write(key, value);
}
@Override
public String getStatus() {
return mapContext.getStatus();
}
@Override
public TaskAttemptID getTaskAttemptID() {
return mapContext.getTaskAttemptID();
}
@Override
public void setStatus(String msg) {
mapContext.setStatus(msg);
}
@Override
public Path[] getArchiveClassPaths() {
return mapContext.getArchiveClassPaths();
}
@Override
public String[] getArchiveTimestamps() {
return mapContext.getArchiveTimestamps();
}
@Override
public URI[] getCacheArchives() throws IOException {
return mapContext.getCacheArchives();
}
@Override
public URI[] getCacheFiles() throws IOException {
return mapContext.getCacheArchives();
}
@Override
public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass()
throws ClassNotFoundException {
return mapContext.getCombinerClass();
}
@Override
public Configuration getConfiguration() {
return mapContext.getConfiguration();
}
@Override
public Path[] getFileClassPaths() {
return mapContext.getFileClassPaths();
}
@Override
public String[] getFileTimestamps() {
return mapContext.getFileTimestamps();
}
@Override
public RawComparator<?> getGroupingComparator() {
return mapContext.getGroupingComparator();
}
@Override
public Class<? extends InputFormat<?, ?>> getInputFormatClass()
throws ClassNotFoundException {
return mapContext.getInputFormatClass();
}
@Override
public String getJar() {
return mapContext.getJar();
}
@Override
public JobID getJobID() {
return mapContext.getJobID();
}
@Override
public String getJobName() {
return mapContext.getJobName();
}
/*@Override
public boolean userClassesTakesPrecedence() {
return mapContext.userClassesTakesPrecedence();
}*/
@Override
public boolean getJobSetupCleanupNeeded() {
return mapContext.getJobSetupCleanupNeeded();
}
@Override
public boolean getTaskCleanupNeeded() {
return mapContext.getTaskCleanupNeeded();
}
@Override
public Path[] getLocalCacheArchives() throws IOException {
return mapContext.getLocalCacheArchives();
}
@Override
public Path[] getLocalCacheFiles() throws IOException {
return mapContext.getLocalCacheFiles();
}
@Override
public Class<?> getMapOutputKeyClass() {
return mapContext.getMapOutputKeyClass();
}
@Override
public Class<?> getMapOutputValueClass() {
return mapContext.getMapOutputValueClass();
}
@Override
public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass()
throws ClassNotFoundException {
return mapContext.getMapperClass();
}
@Override
public int getMaxMapAttempts() {
return mapContext.getMaxMapAttempts();
}
@Override
public int getMaxReduceAttempts() {
return mapContext.getMaxReduceAttempts();
}
@Override
public int getNumReduceTasks() {
return mapContext.getNumReduceTasks();
}
@Override
public Class<? extends OutputFormat<?, ?>> getOutputFormatClass()
throws ClassNotFoundException {
return mapContext.getOutputFormatClass();
}
@Override
public Class<?> getOutputKeyClass() {
return mapContext.getOutputKeyClass();
}
@Override
public Class<?> getOutputValueClass() {
return mapContext.getOutputValueClass();
}
@Override
public Class<? extends Partitioner<?, ?>> getPartitionerClass()
throws ClassNotFoundException {
return mapContext.getPartitionerClass();
}
@Override
public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass()
throws ClassNotFoundException {
return mapContext.getReducerClass();
}
@Override
public RawComparator<?> getSortComparator() {
return mapContext.getSortComparator();
}
@Override
public boolean getSymlink() {
return mapContext.getSymlink();
}
@Override
public Path getWorkingDirectory() throws IOException {
return mapContext.getWorkingDirectory();
}
@Override
public void progress() {
mapContext.progress();
}
@Override
public boolean getProfileEnabled() {
return mapContext.getProfileEnabled();
}
@Override
public String getProfileParams() {
return mapContext.getProfileParams();
}
@Override
public IntegerRanges getProfileTaskRange(boolean isMap) {
return mapContext.getProfileTaskRange(isMap);
}
@Override
public String getUser() {
return mapContext.getUser();
}
@Override
public Credentials getCredentials() {
return mapContext.getCredentials();
}
@Override
public float getProgress() {
return mapContext.getProgress();
}
};
mapper.run(con);
// Flush and close record writers.
rw.close(context);
table = new QueryLogic();
table.setMetadataTableName(METADATA_TABLE_NAME);
table.setTableName(TABLE_NAME);
table.setIndexTableName(INDEX_TABLE_NAME);
table.setReverseIndexTableName(RINDEX_TABLE_NAME);
table.setUseReadAheadIterator(false);
table.setUnevaluatedFields(Collections.singletonList("TEXT"));
}
void debugQuery(String tableName) throws Exception {
Scanner s = c.createScanner(tableName, new Authorizations("all"));
Range r = new Range();
s.setRange(r);
for (Entry<Key,Value> entry : s)
System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
}
@Test
public void testTitle() throws Exception {
Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
List<String> auths = new ArrayList<String>();
auths.add("enwiki");
Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null);
List<Document> docs = results.getResults();
assertEquals(4, docs.size());
results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
docs = results.getResults();
assertEquals(1, docs.size());
for (Document doc : docs) {
System.out.println("id: " + doc.getId());
for (Field field : doc.getFields())
System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
}
}
}