blob: 938f01bbed0a0394a1801b00833bee388bb47fdb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.examples.wikisearch.logic;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import junit.framework.Assert;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.ContextFactory;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
import org.apache.accumulo.examples.wikisearch.sample.Document;
import org.apache.accumulo.examples.wikisearch.sample.Field;
import org.apache.accumulo.examples.wikisearch.sample.Results;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.Before;
import org.junit.Test;
public class TestQueryLogic {
private static final String METADATA_TABLE_NAME = "wikiMetadata";
private static final String TABLE_NAME = "wiki";
private static final String INDEX_TABLE_NAME = "wikiIndex";
private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
private static final String TABLE_NAMES[] = {METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME};
private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
@Override
public void write(Text key, Mutation value) throws IOException, InterruptedException {
try {
writerMap.get(key).addMutation(value);
} catch (MutationsRejectedException e) {
throw new IOException("Error adding mutation", e);
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
try {
for (BatchWriter w : writerMap.values()) {
w.flush();
w.close();
}
} catch (MutationsRejectedException e) {
throw new IOException("Error closing Batch Writer", e);
}
}
}
private Connector c = null;
private Configuration conf = new Configuration();
private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
private QueryLogic table = null;
@Before
public void setup() throws Exception {
Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);
conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
MockInstance i = new MockInstance();
c = i.getConnector("root", "");
for (String table : TABLE_NAMES) {
try {
c.tableOperations().delete(table);
} catch (Exception ex) {}
c.tableOperations().create(table);
writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
}
TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
RawLocalFileSystem fs = new RawLocalFileSystem();
fs.setConf(conf);
URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
Assert.assertNotNull(url);
File data = new File(url.toURI());
Path tmpFile = new Path(data.getAbsolutePath());
// Setup the Mapper
WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
AggregatingRecordReader rr = new AggregatingRecordReader();
Path ocPath = new Path(tmpFile, "oc");
OutputCommitter oc = new FileOutputCommitter(ocPath, context);
fs.deleteOnExit(ocPath);
StandaloneStatusReporter sr = new StandaloneStatusReporter();
rr.initialize(split, context);
MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
WikipediaMapper mapper = new WikipediaMapper();
// Load data into Mock Accumulo
Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
mapper.run(con);
// Flush and close record writers.
rw.close(context);
table = new QueryLogic();
table.setMetadataTableName(METADATA_TABLE_NAME);
table.setTableName(TABLE_NAME);
table.setIndexTableName(INDEX_TABLE_NAME);
table.setReverseIndexTableName(RINDEX_TABLE_NAME);
table.setUseReadAheadIterator(false);
}
void debugQuery(String tableName) throws Exception {
Scanner s = c.createScanner(tableName, new Authorizations());
Range r = new Range();
s.setRange(r);
for (Entry<Key,Value> entry : s)
System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
}
@Test
public void testTitle() {
Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
List<String> auths = new ArrayList<String>();
auths.add("enwiki");
Results results = table.runQuery(c, auths, "TITLE == 'afghanistanhistory'", null, null, null);
for (Document doc : results.getResults()) {
System.out.println("id: " + doc.getId());
for (Field field : doc.getFields())
System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
}
}
}