Minor fixes
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 1f6bc99..a8a0a6c 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -84,8 +84,8 @@
<phase>prepare-package</phase>
<configuration>
<outputDirectory>lib</outputDirectory>
- <!– just grab the non-provided runtime dependencies –>
- <!– XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 –>
+ <!-- just grab the non-provided runtime dependencies -->
+ <!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
<includeArtifactIds>commons-lang,guava,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,zookeeper,commons-codec,accumulo-fate,accumulo-trace</includeArtifactIds>
<excludeTransitive>false</excludeTransitive>
</configuration>
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
index c2fed03..63f1d42 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
@@ -226,13 +226,7 @@
static Set<String> getTokens(Article article) {
Set<String> tokenList = new HashSet<>();
try (WikipediaTokenizer tok = new WikipediaTokenizer(new StringReader(article.getText()))) {
- Attribute term = tok.addAttribute(Attribute.class);
- while (tok.incrementToken()) {
- String token = term.toString();
- if (!StringUtils.isEmpty(token)) {
- tokenList.add(token);
- }
- }
+ tokenList.add(tok.toString());
} catch (IOException e) {
log.error("Error tokenizing text", e);
}
diff --git a/pom.xml b/pom.xml
index ba62cf0..e997c9f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -375,7 +375,7 @@
<phase>prepare-package</phase>
<configuration>
<outputDirectory>../../lib</outputDirectory>
- <!– just grab the non-provided runtime dependencies –>
+ <!-- just grab the non-provided runtime dependencies -->
<includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api</includeArtifactIds>
<excludeGroupIds>accumulo</excludeGroupIds>
<excludeTransitive>true</excludeTransitive>