Merge remote-tracking branch 'origin/1.4.5-SNAPSHOT' into 1.5.0
Conflicts:
ingest/bin/ingest_parallel.sh
ingest/pom.xml
pom.xml
query/pom.xml
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0f31ce3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+**/target
+.idea
+**/*.iml
+**/lib
\ No newline at end of file
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index acdcbf8..73d582d 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@
#
# Map/Reduce job
#
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.5-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.5.0.jar
CONF=$SCRIPT_DIR/../conf/wikipedia.xml
HDFS_DATA_DIR=$1
export HADOOP_CLASSPATH=$CLASSPATH
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index 62e79db..1619603 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@
#
# Map/Reduce job
#
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.5-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.5.0.jar
CONF=$SCRIPT_DIR/../conf/wikipedia_parallel.xml
HDFS_DATA_DIR=$1
export HADOOP_CLASSPATH=$CLASSPATH
diff --git a/ingest/pom.xml b/ingest/pom.xml
index cd8df15..a6f3d70 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -1,5 +1,5 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <!--
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
@@ -15,42 +15,43 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <artifactId>accumulo-wikisearch</artifactId>
<groupId>org.apache.accumulo</groupId>
- <version>1.4.5-SNAPSHOT</version>
- <relativePath>../</relativePath>
+ <artifactId>accumulo-wikisearch</artifactId>
+ <version>1.5.0</version>
</parent>
-
<artifactId>wikisearch-ingest</artifactId>
<name>wikisearch-ingest</name>
-
<dependencies>
<dependency>
- <groupId>org.apache.zookeeper</groupId>
- <artifactId>zookeeper</artifactId>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-core</artifactId>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-start</artifactId>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
</dependency>
<dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</dependency>
<dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
@@ -58,36 +59,19 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
<artifactId>lucene-wikipedia</artifactId>
</dependency>
<dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- </dependency>
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>cloudtrace</artifactId>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
</dependency>
</dependencies>
-
<build>
<plugins>
<plugin>
@@ -96,15 +80,15 @@
<executions>
<execution>
<id>copy-dependencies</id>
- <phase>prepare-package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
+ <phase>prepare-package</phase>
<configuration>
<outputDirectory>lib</outputDirectory>
<!-- just grab the non-provided runtime dependencies -->
<!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
- <includeArtifactIds>commons-lang,guava,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper,commons-codec</includeArtifactIds>
+ <includeArtifactIds>commons-lang,guava,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,zookeeper,commons-codec,accumulo-fate,accumulo-trace</includeArtifactIds>
<excludeTransitive>false</excludeTransitive>
</configuration>
</execution>
@@ -121,7 +105,6 @@
</plugin>
</plugins>
</build>
-
<profiles>
<!-- profile for building against Hadoop 1.0.x
Activate by not specifying hadoop.profile -->
@@ -136,6 +119,7 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
+ <version>${hadoop.version}</version>
</dependency>
</dependencies>
</profile>
@@ -153,9 +137,15 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
</dependencies>
</profile>
</profiles>
-
</project>
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index 90b8308..59035dc 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -39,7 +39,6 @@
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.user.SummingCombiner;
-import org.apache.accumulo.core.tabletserver.thrift.MutationLogger.log_args;
import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor.Article;
import org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner;
import org.apache.accumulo.examples.wikisearch.iterator.TextIndexCombiner;
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java b/ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
similarity index 98%
rename from ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
rename to ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
index c842da7..f79221d 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
+++ b/ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
@@ -31,12 +31,12 @@
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
-import org.apache.accumulo.core.util.ContextFactory;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.junit.Before;
import org.junit.Test;
@@ -97,7 +97,8 @@
conf.set(AggregatingRecordReader.START_TOKEN, "<doc");
conf.set(AggregatingRecordReader.END_TOKEN, "</doc>");
conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true));
- ctx = ContextFactory.createTaskAttemptContext(conf);
+ TaskAttemptID id = new TaskAttemptID();
+ ctx = new TaskAttemptContext(conf, id);
XPath xp = xpFactory.newXPath();
EXPR_A = xp.compile("/doc/a");
EXPR_B = xp.compile("/doc/b");
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java b/ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
similarity index 97%
copy from ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
copy to ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
index c842da7..d9443bc 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
+++ b/ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
@@ -31,12 +31,13 @@
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
-import org.apache.accumulo.core.util.ContextFactory;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.junit.Before;
import org.junit.Test;
@@ -97,7 +98,8 @@
conf.set(AggregatingRecordReader.START_TOKEN, "<doc");
conf.set(AggregatingRecordReader.END_TOKEN, "</doc>");
conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true));
- ctx = ContextFactory.createTaskAttemptContext(conf);
+ TaskAttemptID id = new TaskAttemptID();
+ ctx = new TaskAttemptContextImpl(conf, id);
XPath xp = xpFactory.newXPath();
EXPR_A = xp.compile("/doc/a");
EXPR_B = xp.compile("/doc/b");
diff --git a/pom.xml b/pom.xml
index 6be16f0..c62609a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -18,42 +18,168 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <artifactId>accumulo-examples</artifactId>
- <groupId>org.apache.accumulo</groupId>
- <version>1.4.5-SNAPSHOT</version>
- <relativePath>../</relativePath>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-examples</artifactId>
+ <version>1.5.0</version>
</parent>
<artifactId>accumulo-wikisearch</artifactId>
<packaging>pom</packaging>
<name>accumulo-wikisearch</name>
-
<modules>
<module>ingest</module>
<module>query</module>
<module>query-war</module>
</modules>
-
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <version.accumulo>${project.version}</version.accumulo>
+ <version.commons-codec>1.5</version.commons-codec>
+ <version.commons-jexl>2.0.1</version.commons-jexl>
+ <version.commons-lang>2.4</version.commons-lang>
+ <version.ejb-spec-api>1.0.1.Final</version.ejb-spec-api>
+ <version.guava>11.0.2</version.guava>
+ <version.jaxrs>2.1.0.GA</version.jaxrs>
+ <version.kryo>1.04</version.kryo>
+ <version.log4j>1.2.16</version.log4j>
+ <version.log4j-extras>1.0</version.log4j-extras>
+ <version.lucene>3.0.2</version.lucene>
+ <version.lucene-analyzers>3.0.2</version.lucene-analyzers>
+ <version.lucene-wikipedia>3.0.2</version.lucene-wikipedia>
+ <version.minlog>1.2</version.minlog>
+ <version.protobuf>2.3.0</version.protobuf>
+ <version.thrift>0.9.0</version.thrift>
+ <version.zookeeper>3.3.1</version.zookeeper>
+ </properties>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${version.guava}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${version.protobuf}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.googlecode</groupId>
+ <artifactId>kryo</artifactId>
+ <version>${version.kryo}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.googlecode</groupId>
+ <artifactId>minlog</artifactId>
+ <version>${version.minlog}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ <version>1.11</version>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ <version>1.11</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${version.commons-codec}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>${version.commons-lang}</version>
+ </dependency>
+ <!-- XXX This is just to fix the dependency conflict in Hadoop 1 -->
+ <dependency>
+ <groupId>net.java.dev.jets3t</groupId>
+ <artifactId>jets3t</artifactId>
+ <version>0.7.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-httpclient</groupId>
+ <artifactId>commons-httpclient</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-core</artifactId>
+ <version>${version.accumulo}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-trace</artifactId>
+ <version>${version.accumulo}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-jexl</artifactId>
+ <version>${version.commons-jexl}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers</artifactId>
+ <version>${version.lucene-analyzers}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-core</artifactId>
+ <version>${version.lucene}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-wikipedia</artifactId>
+ <version>${version.lucene-wikipedia}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-digester</groupId>
+ <artifactId>commons-digester</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ <version>${version.thrift}</version>
+ <exclusions>
+ <!-- excluded to make the enforcer plug in happy-->
+ <exclusion>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpcore</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
<repositories>
<repository>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
<id>central</id>
<name>Maven Repository Switchboard</name>
- <layout>default</layout>
<url>http://repo1.maven.org/maven2</url>
- <snapshots>
- <enabled>false</enabled>
- </snapshots>
+ <layout>default</layout>
</repository>
<repository>
- <id>java.net</id>
- <name>java.net</name>
- <layout>default</layout>
- <url>https://maven.java.net/content/groups/public</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
+ <id>java.net</id>
+ <name>java.net</name>
+ <url>https://maven.java.net/content/groups/public</url>
+ <layout>default</layout>
</repository>
</repositories>
-
<build>
<defaultGoal>package</defaultGoal>
<plugins>
@@ -63,14 +189,14 @@
<executions>
<execution>
<id>enforce-mvn</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
<configuration>
<rules>
<DependencyConvergence/>
</rules>
</configuration>
- <goals>
- <goal>enforce</goal>
- </goals>
</execution>
</executions>
</plugin>
@@ -88,6 +214,10 @@
</configuration>
</plugin>
<plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>2.8</version>
+ </plugin>
+ <plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<outputDirectory>lib</outputDirectory>
@@ -132,20 +262,20 @@
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
- <inherited>false</inherited>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
+ <inherited>false</inherited>
<executions>
<execution>
<id>copy-dependencies</id>
- <phase>prepare-package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
+ <phase>prepare-package</phase>
<configuration>
<outputDirectory>../../lib</outputDirectory>
<!-- just grab the non-provided runtime dependencies -->
- <includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api,cloudtrace</includeArtifactIds>
+ <includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api</includeArtifactIds>
<excludeGroupIds>accumulo</excludeGroupIds>
<excludeTransitive>true</excludeTransitive>
</configuration>
@@ -154,129 +284,73 @@
</plugin>
</plugins>
</build>
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <version.commons-lang>2.4</version.commons-lang>
- <version.accumulo>${project.version}</version.accumulo>
- <version.commons-jexl>2.0.1</version.commons-jexl>
- <version.commons-codec>1.5</version.commons-codec>
- <version.ejb-spec-api>1.0.1.Final</version.ejb-spec-api>
- <version.jaxrs>2.1.0.GA</version.jaxrs>
- <version.kryo>1.04</version.kryo>
- <version.log4j>1.2.16</version.log4j>
- <version.log4j-extras>1.0</version.log4j-extras>
- <version.lucene>3.0.2</version.lucene>
- <version.lucene-analyzers>3.0.2</version.lucene-analyzers>
- <version.lucene-wikipedia>3.0.2</version.lucene-wikipedia>
- <version.protobuf>2.3.0</version.protobuf>
- <version.guava>11.0.2</version.guava>
- <version.libthrift>0.6.1</version.libthrift>
- <version.zookeeper>3.3.1</version.zookeeper>
- <version.minlog>1.2</version.minlog>
- </properties>
-
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- <version>${version.commons-codec}</version>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>${version.commons-lang}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-core</artifactId>
- <version>${version.accumulo}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>zookeeper</artifactId>
- <version>${version.zookeeper}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>${version.libthrift}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>cloudtrace</artifactId>
- <version>${version.accumulo}</version>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>${version.guava}</version>
- </dependency>
- <dependency>
- <groupId>com.googlecode</groupId>
- <artifactId>kryo</artifactId>
- <version>${version.kryo}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-core</artifactId>
- <version>${version.lucene}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers</artifactId>
- <version>${version.lucene-analyzers}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-wikipedia</artifactId>
- <version>${version.lucene-wikipedia}</version>
- <exclusions>
- <exclusion>
- <groupId>commons-digester</groupId>
- <artifactId>commons-digester</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- <version>${version.protobuf}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-jexl</artifactId>
- <version>${version.commons-jexl}</version>
- </dependency>
- <dependency>
- <groupId>com.googlecode</groupId>
- <artifactId>minlog</artifactId>
- <version>${version.minlog}</version>
- </dependency>
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- <version>1.11</version>
- <exclusions>
- <exclusion>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <!-- XXX This is just to fix the dependency conflict in Hadoop 1 -->
- <dependency>
- <groupId>net.java.dev.jets3t</groupId>
- <artifactId>jets3t</artifactId>
- <version>0.7.1</version>
- <exclusions>
- <exclusion>
- <groupId>commons-httpclient</groupId>
- <artifactId>commons-httpclient</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- </dependencies>
- </dependencyManagement>
-
+ <profiles>
+ <profile>
+ <!-- profile for building against Hadoop 1.0.x
+ Activate by not specifying hadoop.profile -->
+ <id>hadoop-1.0</id>
+ <activation>
+ <property>
+ <name>!hadoop.profile</name>
+ </property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>1.8</version>
+ <executions>
+ <execution>
+ <id>add-test-source</id>
+ <goals>
+ <goal>add-test-source</goal>
+ </goals>
+ <phase>generate-test-sources</phase>
+ <configuration>
+ <sources>
+ <source>src/test/hadoop1</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ <profile>
+ <!-- profile for building against Hadoop 2.0.x
+ Activate using: mvn -Dhadoop.profile=2.0 -->
+ <id>hadoop-2.0</id>
+ <activation>
+ <property>
+ <name>hadoop.profile</name>
+ <value>2.0</value>
+ </property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>1.8</version>
+ <executions>
+ <execution>
+ <id>add-test-source</id>
+ <goals>
+ <goal>add-test-source</goal>
+ </goals>
+ <phase>generate-test-sources</phase>
+ <configuration>
+ <sources>
+ <source>src/test/hadoop2</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
</project>
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 485d584..be6e807 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -18,30 +18,14 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <artifactId>accumulo-wikisearch</artifactId>
<groupId>org.apache.accumulo</groupId>
- <version>1.4.5-SNAPSHOT</version>
+ <artifactId>accumulo-wikisearch</artifactId>
+ <version>1.5.0</version>
</parent>
<artifactId>wikisearch-query-war</artifactId>
<packaging>war</packaging>
<name>wikisearch-query-war</name>
-
- <dependencies>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>javaee-web-api</artifactId>
- <version>6.0</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
-
- </dependencies>
-
<build>
<plugins>
<plugin>
diff --git a/query/pom.xml b/query/pom.xml
index c8192f6..be6f6b2 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -16,35 +16,92 @@
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
<parent>
- <artifactId>accumulo-wikisearch</artifactId>
<groupId>org.apache.accumulo</groupId>
- <version>1.4.5-SNAPSHOT</version>
- <relativePath>../</relativePath>
+ <artifactId>accumulo-wikisearch</artifactId>
+ <version>1.5.0</version>
</parent>
-
<artifactId>wikisearch-query</artifactId>
<packaging>ejb</packaging>
<name>wikisearch-query</name>
<dependencies>
<dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.googlecode</groupId>
+ <artifactId>kryo</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>wikisearch-ingest</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-jexl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ </dependency>
+ <dependency>
<groupId>javaee</groupId>
<artifactId>javaee-api</artifactId>
<version>5</version>
<scope>provided</scope>
</dependency>
<dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-core</artifactId>
+ <groupId>com.googlecode</groupId>
+ <artifactId>minlog</artifactId>
+ <scope>runtime</scope>
</dependency>
<dependency>
- <groupId>org.apache.zookeeper</groupId>
- <artifactId>zookeeper</artifactId>
+ <groupId>commons-configuration</groupId>
+ <artifactId>commons-configuration</artifactId>
+ <version>1.6</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.1</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.accumulo</groupId>
- <artifactId>cloudtrace</artifactId>
+ <artifactId>accumulo-fate</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-trace</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
@@ -53,43 +110,15 @@
<scope>runtime</scope>
</dependency>
<dependency>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-jexl</artifactId>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- </dependency>
- <dependency>
- <groupId>com.googlecode</groupId>
- <artifactId>kryo</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>wikisearch-ingest</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>com.googlecode</groupId>
- <artifactId>minlog</artifactId>
- </dependency>
- <dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- </dependency>
</dependencies>
<build>
<plugins>
@@ -99,15 +128,15 @@
<executions>
<execution>
<id>copy-dependencies</id>
- <phase>prepare-package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
+ <phase>prepare-package</phase>
<configuration>
<outputDirectory>lib</outputDirectory>
<!-- just grab the non-provided runtime dependencies -->
<!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
- <includeArtifactIds>commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-core,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,cloudtrace</includeArtifactIds>
+ <includeArtifactIds>commons-io,commons-configuration,commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-core,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,accumulo-fate,accumulo-trace</includeArtifactIds>
<excludeTransitive>true</excludeTransitive>
</configuration>
</execution>
@@ -140,8 +169,6 @@
</plugin>
</plugins>
</build>
- <modelVersion>4.0.0</modelVersion>
-
<profiles>
<!-- profile for building against Hadoop 1.0.x
Activate by not specifying hadoop.profile -->
@@ -156,6 +183,7 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
+ <version>${hadoop.version}</version>
</dependency>
</dependencies>
</profile>
@@ -173,6 +201,13 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
</dependencies>
<build>
@@ -183,14 +218,14 @@
<executions>
<execution>
<id>copy-dependencies</id>
- <phase>prepare-package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
+ <phase>prepare-package</phase>
<configuration>
<outputDirectory>lib</outputDirectory>
<!-- just grab the non-provided runtime dependencies -->
- <includeArtifactIds>commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-client,hadoop-common,hadoop-hdfs,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,cloudtrace</includeArtifactIds>
+ <includeArtifactIds>commons-io,commons-configuration,commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-client,hadoop-common,hadoop-hdfs,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,accumulo-fate,accumulo-trace</includeArtifactIds>
<excludeTransitive>false</excludeTransitive>
</configuration>
</execution>
@@ -200,5 +235,4 @@
</build>
</profile>
</profiles>
-
</project>
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index 734d423..47a55e1 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -47,8 +47,8 @@
private Text currentRow = null;
private Text currentTerm = new Text(emptyByteArray);
private Text currentDocID = new Text(emptyByteArray);
- private Text parentEndRow;
private static boolean SEEK_INCLUSIVE = true;
+ private Text parentEndRow;
/**
* Used in representing a Term that is intersected on.
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
index 09ad8d3..d9f3c94 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
@@ -1654,7 +1654,7 @@
node.setValid(false);
if (testTreeState()) {
// it's valid set nextKey and make sure it's not the same as topKey.
- if (topKey.compareTo(this.root.getTopKey()) != 0) {
+ if (!topKey.equals(this.root.getTopKey())) {
// topKey = this.root.getTopKey();
if (this.overallRange != null) {
if (this.overallRange.contains(root.getTopKey())) {
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java
index 353ce79..6b58d08 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java
@@ -1,4 +1,4 @@
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -21,36 +21,52 @@
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.file.map.MyMapFile;
-import org.apache.accumulo.core.file.map.MyMapFile.Reader;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.system.MapFileIterator;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
-@SuppressWarnings("deprecation")
+/**
+ *
+ */
public class DefaultIteratorEnvironment implements IteratorEnvironment {
- public Reader reserveMapFileReader(String mapFileName) throws IOException {
+ AccumuloConfiguration conf;
+
+ public DefaultIteratorEnvironment(AccumuloConfiguration conf) {
+ this.conf = conf;
+ }
+
+ public DefaultIteratorEnvironment() {
+ this.conf = AccumuloConfiguration.getDefaultConfiguration();
+ }
+
+ @Override
+ public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
Configuration conf = CachedConfiguration.getInstance();
FileSystem fs = FileSystem.get(conf);
- return new MyMapFile.Reader(fs, mapFileName, conf);
+ return new MapFileIterator(this.conf, fs, mapFileName, conf);
}
+ @Override
public AccumuloConfiguration getConfig() {
- return AccumuloConfiguration.getDefaultConfiguration();
+ return conf;
}
+ @Override
public IteratorScope getIteratorScope() {
throw new UnsupportedOperationException();
}
+ @Override
public boolean isFullMajorCompaction() {
throw new UnsupportedOperationException();
}
+ @Override
public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
throw new UnsupportedOperationException();
}
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
index d51023c..b2a0c83 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
@@ -85,7 +85,7 @@
/**
* @param key
- * @return
+ * @return The column visibility
*/
public ColumnVisibility getColumnVisibility(Key key) {
ColumnVisibility result = (ColumnVisibility) visibilityMap.get(key.getColumnVisibility());
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
index d7dab3a..024a865 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
@@ -175,11 +175,10 @@
Connector connector = null;
if (null == instanceName || null == zooKeepers || null == username || null == password)
throw new EJBException("Required parameters not set. [instanceName = " + this.instanceName + ", zookeepers = " + this.zooKeepers + ", username = "
- + this.username + ", password = " + this.password + "]. Check values in ejb-jar.xml");
+ + this.username + (password==null?", password = null":"") + "]. Check values in ejb-jar.xml");
Instance instance = new ZooKeeperInstance(this.instanceName, this.zooKeepers);
try {
- log.info("Connecting to [instanceName = " + this.instanceName + ", zookeepers = " + this.zooKeepers + ", username = " + this.username + ", password = "
- + this.password + "].");
+ log.info("Connecting to [instanceName = " + this.instanceName + ", zookeepers = " + this.zooKeepers + ", username = " + this.username + "].");
connector = instance.getConnector(this.username, this.password.getBytes());
} catch (Exception e) {
throw new EJBException("Error getting connector from instance", e);
@@ -209,11 +208,10 @@
Connector connector = null;
if (null == instanceName || null == zooKeepers || null == username || null == password)
throw new EJBException("Required parameters not set. [instanceName = " + this.instanceName + ", zookeepers = " + this.zooKeepers + ", username = "
- + this.username + ", password = " + this.password + "]. Check values in ejb-jar.xml");
+ + this.username + (password==null?", password = null":"") + "]. Check values in ejb-jar.xml");
Instance instance = new ZooKeeperInstance(this.instanceName, this.zooKeepers);
try {
- log.info("Connecting to [instanceName = " + this.instanceName + ", zookeepers = " + this.zooKeepers + ", username = " + this.username + ", password = "
- + this.password + "].");
+ log.info("Connecting to [instanceName = " + this.instanceName + ", zookeepers = " + this.zooKeepers + ", username = " + this.username + "].");
connector = instance.getConnector(this.username, this.password.getBytes());
} catch (Exception e) {
throw new EJBException("Error getting connector from instance", e);
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
similarity index 94%
rename from query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
rename to query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index 24e7379..ac8241e 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -34,12 +34,12 @@
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.ContextFactory;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
@@ -58,6 +58,7 @@
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.log4j.Level;
@@ -120,13 +121,14 @@
conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
MockInstance i = new MockInstance();
- c = i.getConnector("root", "");
+ c = i.getConnector("root", new PasswordToken(""));
WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
for (String table : TABLE_NAMES) {
writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
}
- TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
+ TaskAttemptID id = new TaskAttemptID();
+ TaskAttemptContext context = new TaskAttemptContext(conf, id);
RawLocalFileSystem fs = new RawLocalFileSystem();
fs.setConf(conf);
@@ -148,7 +150,7 @@
WikipediaMapper mapper = new WikipediaMapper();
// Load data into Mock Accumulo
- Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
+ Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context(conf, id, rr, rw, oc, sr, split);
mapper.run(con);
// Flush and close record writers.
diff --git a/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
new file mode 100644
index 0000000..cbeefd9
--- /dev/null
+++ b/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.logic;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+import junit.framework.Assert;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
+import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
+import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
+import org.apache.accumulo.examples.wikisearch.sample.Document;
+import org.apache.accumulo.examples.wikisearch.sample.Field;
+import org.apache.accumulo.examples.wikisearch.sample.Results;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.apache.hadoop.mapreduce.task.MapContextImpl;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
+import org.apache.hadoop.conf.Configuration.IntegerRanges;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MapContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.security.Credentials;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestQueryLogic {
+
+ private static final String METADATA_TABLE_NAME = "wikiMetadata";
+
+ private static final String TABLE_NAME = "wiki";
+
+ private static final String INDEX_TABLE_NAME = "wikiIndex";
+
+ private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
+
+ private static final String TABLE_NAMES[] = {METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME};
+
+ private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
+ @Override
+ public void write(Text key, Mutation value) throws IOException, InterruptedException {
+ try {
+ writerMap.get(key).addMutation(value);
+ } catch (MutationsRejectedException e) {
+ throw new IOException("Error adding mutation", e);
+ }
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+ try {
+ for (BatchWriter w : writerMap.values()) {
+ w.flush();
+ w.close();
+ }
+ } catch (MutationsRejectedException e) {
+ throw new IOException("Error closing Batch Writer", e);
+ }
+ }
+
+ }
+
+ private Connector c = null;
+ private Configuration conf = new Configuration();
+ private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
+ private QueryLogic table = null;
+
+ @Before
+ public void setup() throws Exception {
+
+ Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
+ Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
+ Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);
+
+ conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
+ conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
+ conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
+ conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
+ conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
+
+ MockInstance i = new MockInstance();
+ c = i.getConnector("root", new PasswordToken(""));
+ WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
+ for (String table : TABLE_NAMES) {
+ writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
+ }
+
+ TaskAttemptID id = new TaskAttemptID( "fake", 1, TaskType.MAP, 1, 1);
+ TaskAttemptContext context = new TaskAttemptContextImpl(conf, id);
+
+ RawLocalFileSystem fs = new RawLocalFileSystem();
+ fs.setConf(conf);
+
+ URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
+ Assert.assertNotNull(url);
+ File data = new File(url.toURI());
+ Path tmpFile = new Path(data.getAbsolutePath());
+
+ // Setup the Mapper
+ WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
+ AggregatingRecordReader rr = new AggregatingRecordReader();
+ Path ocPath = new Path(tmpFile, "oc");
+ OutputCommitter oc = new FileOutputCommitter(ocPath, context);
+ fs.deleteOnExit(ocPath);
+ StandaloneStatusReporter sr = new StandaloneStatusReporter();
+ rr.initialize(split, context);
+ MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
+ WikipediaMapper mapper = new WikipediaMapper();
+
+ // there are times I wonder, "Why do Java people think this is good?" then I drink more whiskey
+ final MapContextImpl<LongWritable,Text,Text,Mutation> mapContext = new MapContextImpl<LongWritable,Text,Text,Mutation>(conf, id, rr, rw, oc, sr, split);
+ // Load data into Mock Accumulo
+ Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context() {
+ /**
+ * Get the input split for this map.
+ */
+ public InputSplit getInputSplit() {
+ return mapContext.getInputSplit();
+ }
+
+ @Override
+ public LongWritable getCurrentKey() throws IOException, InterruptedException {
+ return mapContext.getCurrentKey();
+ }
+
+ @Override
+ public Text getCurrentValue() throws IOException, InterruptedException {
+ return mapContext.getCurrentValue();
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ return mapContext.nextKeyValue();
+ }
+
+ @Override
+ public Counter getCounter(Enum<?> counterName) {
+ return mapContext.getCounter(counterName);
+ }
+
+ @Override
+ public Counter getCounter(String groupName, String counterName) {
+ return mapContext.getCounter(groupName, counterName);
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter() {
+ return mapContext.getOutputCommitter();
+ }
+
+ @Override
+ public void write(Text key, Mutation value) throws IOException,
+ InterruptedException {
+ mapContext.write(key, value);
+ }
+
+ @Override
+ public String getStatus() {
+ return mapContext.getStatus();
+ }
+
+ @Override
+ public TaskAttemptID getTaskAttemptID() {
+ return mapContext.getTaskAttemptID();
+ }
+
+ @Override
+ public void setStatus(String msg) {
+ mapContext.setStatus(msg);
+ }
+
+ @Override
+ public Path[] getArchiveClassPaths() {
+ return mapContext.getArchiveClassPaths();
+ }
+
+ @Override
+ public String[] getArchiveTimestamps() {
+ return mapContext.getArchiveTimestamps();
+ }
+
+ @Override
+ public URI[] getCacheArchives() throws IOException {
+ return mapContext.getCacheArchives();
+ }
+
+ @Override
+ public URI[] getCacheFiles() throws IOException {
+ return mapContext.getCacheArchives();
+ }
+
+ @Override
+ public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass()
+ throws ClassNotFoundException {
+ return mapContext.getCombinerClass();
+ }
+
+ @Override
+ public Configuration getConfiguration() {
+ return mapContext.getConfiguration();
+ }
+
+ @Override
+ public Path[] getFileClassPaths() {
+ return mapContext.getFileClassPaths();
+ }
+
+ @Override
+ public String[] getFileTimestamps() {
+ return mapContext.getFileTimestamps();
+ }
+
+ @Override
+ public RawComparator<?> getGroupingComparator() {
+ return mapContext.getGroupingComparator();
+ }
+
+ @Override
+ public Class<? extends InputFormat<?, ?>> getInputFormatClass()
+ throws ClassNotFoundException {
+ return mapContext.getInputFormatClass();
+ }
+
+ @Override
+ public String getJar() {
+ return mapContext.getJar();
+ }
+
+ @Override
+ public JobID getJobID() {
+ return mapContext.getJobID();
+ }
+
+ @Override
+ public String getJobName() {
+ return mapContext.getJobName();
+ }
+
+ /*@Override
+ public boolean userClassesTakesPrecedence() {
+ return mapContext.userClassesTakesPrecedence();
+ }*/
+
+ @Override
+ public boolean getJobSetupCleanupNeeded() {
+ return mapContext.getJobSetupCleanupNeeded();
+ }
+
+ @Override
+ public boolean getTaskCleanupNeeded() {
+ return mapContext.getTaskCleanupNeeded();
+ }
+
+ @Override
+ public Path[] getLocalCacheArchives() throws IOException {
+ return mapContext.getLocalCacheArchives();
+ }
+
+ @Override
+ public Path[] getLocalCacheFiles() throws IOException {
+ return mapContext.getLocalCacheFiles();
+ }
+
+ @Override
+ public Class<?> getMapOutputKeyClass() {
+ return mapContext.getMapOutputKeyClass();
+ }
+
+ @Override
+ public Class<?> getMapOutputValueClass() {
+ return mapContext.getMapOutputValueClass();
+ }
+
+ @Override
+ public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass()
+ throws ClassNotFoundException {
+ return mapContext.getMapperClass();
+ }
+
+ @Override
+ public int getMaxMapAttempts() {
+ return mapContext.getMaxMapAttempts();
+ }
+
+ @Override
+ public int getMaxReduceAttempts() {
+ return mapContext.getMaxReduceAttempts();
+ }
+
+ @Override
+ public int getNumReduceTasks() {
+ return mapContext.getNumReduceTasks();
+ }
+
+ @Override
+ public Class<? extends OutputFormat<?, ?>> getOutputFormatClass()
+ throws ClassNotFoundException {
+ return mapContext.getOutputFormatClass();
+ }
+
+ @Override
+ public Class<?> getOutputKeyClass() {
+ return mapContext.getOutputKeyClass();
+ }
+
+ @Override
+ public Class<?> getOutputValueClass() {
+ return mapContext.getOutputValueClass();
+ }
+
+ @Override
+ public Class<? extends Partitioner<?, ?>> getPartitionerClass()
+ throws ClassNotFoundException {
+ return mapContext.getPartitionerClass();
+ }
+
+ @Override
+ public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass()
+ throws ClassNotFoundException {
+ return mapContext.getReducerClass();
+ }
+
+ @Override
+ public RawComparator<?> getSortComparator() {
+ return mapContext.getSortComparator();
+ }
+
+ @Override
+ public boolean getSymlink() {
+ return mapContext.getSymlink();
+ }
+
+ @Override
+ public Path getWorkingDirectory() throws IOException {
+ return mapContext.getWorkingDirectory();
+ }
+
+ @Override
+ public void progress() {
+ mapContext.progress();
+ }
+
+ @Override
+ public boolean getProfileEnabled() {
+ return mapContext.getProfileEnabled();
+ }
+
+ @Override
+ public String getProfileParams() {
+ return mapContext.getProfileParams();
+ }
+
+ @Override
+ public IntegerRanges getProfileTaskRange(boolean isMap) {
+ return mapContext.getProfileTaskRange(isMap);
+ }
+
+ @Override
+ public String getUser() {
+ return mapContext.getUser();
+ }
+
+ @Override
+ public Credentials getCredentials() {
+ return mapContext.getCredentials();
+ }
+
+ @Override
+ public float getProgress() {
+ return mapContext.getProgress();
+ }
+ };
+
+ mapper.run(con);
+
+ // Flush and close record writers.
+ rw.close(context);
+
+ table = new QueryLogic();
+ table.setMetadataTableName(METADATA_TABLE_NAME);
+ table.setTableName(TABLE_NAME);
+ table.setIndexTableName(INDEX_TABLE_NAME);
+ table.setReverseIndexTableName(RINDEX_TABLE_NAME);
+ table.setUseReadAheadIterator(false);
+ table.setUnevaluatedFields(Collections.singletonList("TEXT"));
+ }
+
+ void debugQuery(String tableName) throws Exception {
+ Scanner s = c.createScanner(tableName, new Authorizations("all"));
+ Range r = new Range();
+ s.setRange(r);
+ for (Entry<Key,Value> entry : s)
+ System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
+ }
+
+ @Test
+ public void testTitle() throws Exception {
+ Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
+ Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
+ List<String> auths = new ArrayList<String>();
+ auths.add("enwiki");
+
+ Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null);
+ List<Document> docs = results.getResults();
+ assertEquals(4, docs.size());
+
+ results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
+ docs = results.getResults();
+ assertEquals(1, docs.size());
+ for (Document doc : docs) {
+ System.out.println("id: " + doc.getId());
+ for (Field field : doc.getFields())
+ System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
+ }
+ }
+
+}