[HIVEMALL-304] Updated lucene version from 5.5.5 (java7) to 8.8.2 (java8) ## What changes were proposed in this pull request? Updated lucene version from 5.5.5 (java7) to 8.8.2 (java8) ## What type of PR is it? Improvement ## What is the Jira issue? https://issues.apache.org/jira/browse/HIVEMALL-304 ## How was this patch tested? unit tests ## How to use this feature? ## Checklist (Please remove this section if not needed; check `x` for YES, blank for NO) - [x] Did you apply source code formatter, i.e., `./bin/format_code.sh`, for your commit? - [ ] Did you run system tests on Hive (or Spark)? Author: Makoto Yui <myui@apache.org> Closes #234 from myui/lucene_version_up.

commit: dc461c2c7d1f7702659acab60c0b1334990a7b17 [log] [tgz]
author: Makoto Yui <myui@apache.org> Mon Apr 19 15:39:03 2021 +0900
committer: Makoto Yui <myui@apache.org> Mon Apr 19 15:39:03 2021 +0900
tree: fa57d47477486be32a41f11c91af4496d01df7b7
parent: 68f1d880d4c6cf919d9d62eb405fc989cc1c0985 [diff]
diff --git a/nlp/pom.xml b/nlp/pom.xml
index c163fea..0324ca1 100644
--- a/nlp/pom.xml
+++ b/nlp/pom.xml

@@ -32,6 +32,7 @@
 
 	<properties>
 		<main.basedir>${project.parent.basedir}</main.basedir>
+		<lucene.version>8.8.2</lucene.version>
 	</properties>
 
 	<dependencies>
@@ -99,13 +100,13 @@
 		<dependency>
 			<groupId>org.apache.lucene</groupId>
 			<artifactId>lucene-analyzers-kuromoji</artifactId>
-			<version>5.5.5</version>
+			<version>${lucene.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.lucene</groupId>
 			<artifactId>lucene-analyzers-smartcn</artifactId>
-			<version>5.5.5</version>
+			<version>${lucene.version}</version>
 			<scope>compile</scope>
 		</dependency>
 

diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
index 4a58bae..879c1a5 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java

@@ -55,6 +55,7 @@
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.Text;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer;
@@ -62,7 +63,6 @@
 import org.apache.lucene.analysis.ja.dict.UserDictionary;
 import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 @Description(name = "tokenize_ja",
         value = "_FUNC_(String line [, const string mode = \"normal\", const array<string> stopWords, const array<string> stopTags, const array<string> userDict (or string userDictURL)])"

diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java
index cf6249f..93c8620 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java

@@ -41,7 +41,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 @Description(name = "tokenize_cn", value = "_FUNC_(String line [, const list<string> stopWords])"
         + " - returns tokenized strings in array<string>")
commit	dc461c2c7d1f7702659acab60c0b1334990a7b17	[log] [tgz]
author	Makoto Yui <myui@apache.org>	Mon Apr 19 15:39:03 2021 +0900
committer	Makoto Yui <myui@apache.org>	Mon Apr 19 15:39:03 2021 +0900
tree	fa57d47477486be32a41f11c91af4496d01df7b7
parent	68f1d880d4c6cf919d9d62eb405fc989cc1c0985 [diff]