[HIVEMALL-289] Add str_contain(string str, array<string> match, boolean or=true) UDF

## What changes were proposed in this pull request?

Add str_contain(string str, array<string> match, boolean or=true) UDF

## What type of PR is it?

Feature

## What is the Jira issue?

https://issues.apache.org/jira/browse/HIVEMALL-289

## How was this patch tested?

manual tests on EMR

## How to use this feature?

```sql
select
  str_contains('There are apple and orange', array('apple')),
  str_contains('There are apple and orange', array('apple', 'banana'), true),
  str_contains('There are apple and orange', array('apple', 'banana'), false);
> true, true, false
```

## Checklist

- [x] Did you apply source code formatter, i.e., `./bin/format_code.sh`, for your commit?
- [x] Did you run system tests on Hive (or Spark)?

Author: Makoto Yui <myui@apache.org>

Closes #225 from myui/HIVEMALL-289.
diff --git a/ChangeLog.md b/ChangeLog.md
index 244c9c5..2e2ef31 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -1,3 +1,22 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
 # v0.6.0 - Dec 19, 2019
 
 Major new features in this release includes:
diff --git a/core/src/main/java/hivemall/tools/strings/StrContainsUDF.java b/core/src/main/java/hivemall/tools/strings/StrContainsUDF.java
new file mode 100644
index 0000000..d7468a4
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/strings/StrContainsUDF.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.strings;
+
+import hivemall.utils.hadoop.HiveUtils;
+
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+
+//@formatter:off
+@Description(name = "str_contains",
+        value = "_FUNC_(string query, array<string> searchTerms [, boolean orQuery=false])"
+                + " - Returns true if the given query contains search terms",
+        extended = "select\n" + 
+                "  str_contains('There are apple and orange', array('apple')), -- or=false\n" + 
+                "  str_contains('There are apple and orange', array('apple', 'banana'), true), -- or=true\n" + 
+                "  str_contains('There are apple and orange', array('apple', 'banana'), false); -- or=false\n" + 
+                "> true, true, false")
+//@formatter:on
+@UDFType(deterministic = true, stateful = false)
+public final class StrContainsUDF extends GenericUDF {
+
+    private StringObjectInspector queryOI;
+    private ListObjectInspector searchTermsOI;
+    private BooleanObjectInspector orQueryOI;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
+        if (argOIs.length != 2 && argOIs.length != 3) {
+            throw new UDFArgumentLengthException("str_contains expects two or three arguments");
+        }
+
+        this.queryOI = HiveUtils.asStringOI(argOIs, 0);
+        if (!HiveUtils.isStringListOI(argOIs[1])) {
+            throw new UDFArgumentTypeException(1,
+                "Expected array<string> for the second argument but got "
+                        + argOIs[1].getTypeName());
+        }
+        this.searchTermsOI = HiveUtils.asListOI(argOIs, 1);
+
+        if (argOIs.length == 3) {
+            this.orQueryOI = HiveUtils.asBooleanOI(argOIs, 2);
+        }
+
+        return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
+    }
+
+    @Override
+    public Boolean evaluate(DeferredObject[] args) throws HiveException {
+        final String query = queryOI.getPrimitiveJavaObject(args[0].get());
+        if (query == null) {
+            return null;
+        }
+
+        final List<String> searchTerms = HiveUtils.asStringList(args[1], searchTermsOI);
+        if (searchTerms == null || searchTerms.isEmpty()) {
+            return Boolean.FALSE;
+        }
+
+        boolean orQuery = false;
+        if (args.length == 3) {
+            orQuery = orQueryOI.get(args[2].get());
+        }
+
+        if (orQuery) {
+            for (String term : searchTerms) {
+                if (query.contains(term)) {
+                    return Boolean.TRUE;
+                }
+            }
+            return Boolean.FALSE;
+        } else {
+            for (String term : searchTerms) {
+                if (!query.contains(term)) {
+                    return Boolean.FALSE;
+                }
+            }
+            return Boolean.TRUE;
+        }
+    }
+
+    @Override
+    public String getDisplayString(String[] args) {
+        return "str_contains(" + StringUtils.join(args, ',') + ')';
+    }
+
+
+}
diff --git a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
index 293d236..b82f6d4 100644
--- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
+++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
@@ -1066,6 +1066,17 @@
     }
 
     @Nonnull
+    public static BooleanObjectInspector asBooleanOI(@Nonnull final ObjectInspector[] argOIs,
+            final int argIndex) throws UDFArgumentException {
+        ObjectInspector argOI = getObjectInspector(argOIs, argIndex);
+        if (!BOOLEAN_TYPE_NAME.equals(argOI.getTypeName())) {
+            throw new UDFArgumentTypeException(argIndex,
+                "Argument type must be Boolean: " + argOI.getTypeName());
+        }
+        return (BooleanObjectInspector) argOI;
+    }
+
+    @Nonnull
     public static IntObjectInspector asIntOI(@Nonnull final ObjectInspector argOI)
             throws UDFArgumentException {
         if (!INT_TYPE_NAME.equals(argOI.getTypeName())) {
diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive
index 006bef9..f995d55 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -676,6 +676,9 @@
 DROP FUNCTION IF EXISTS word_ngrams;
 CREATE FUNCTION word_ngrams as 'hivemall.tools.text.WordNgramsUDF' USING JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS str_contains;
+CREATE FUNCTION str_contains as 'hivemall.tools.strings.StrContainsUDF' USING JAR '${hivemall_jar}';
+
 ---------------------------------
 -- Dataset generator functions --
 ---------------------------------
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index c0e319c..bf9bc7c 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -668,6 +668,9 @@
 drop temporary function if exists word_ngrams;
 create temporary function word_ngrams as 'hivemall.tools.text.WordNgramsUDF';
 
+drop temporary function if exists str_contains;
+create temporary function str_contains as 'hivemall.tools.strings.StrContainsUDF';
+
 ---------------------------------
 -- Dataset generator functions --
 ---------------------------------
diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark
index 84c1b9c..8529134 100644
--- a/resources/ddl/define-all.spark
+++ b/resources/ddl/define-all.spark
@@ -652,6 +652,9 @@
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS word_ngrams")
 sqlContext.sql("CREATE TEMPORARY FUNCTION word_ngrams AS 'hivemall.tools.text.WordNgramsUDF'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS str_contains")
+sqlContext.sql("CREATE TEMPORARY FUNCTION str_contains AS 'hivemall.tools.strings.StrContainsUDF'")
+
 /**
  * Dataset generator functions
  */
diff --git a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
index 54d7d4e..c383bd9 100644
--- a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
+++ b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
@@ -96,7 +96,7 @@
         genericFuncsHeaders.put("# Sanity Checks",
             Collections.singletonList("hivemall.tools.sanity"));
         genericFuncsHeaders.put("# Text processing",
-            Collections.singletonList("hivemall.tools.text"));
+            Arrays.asList("hivemall.tools.text", "hivemall.tools.strings"));
         genericFuncsHeaders.put("# Timeseries",
             Collections.singletonList("hivemall.tools.timeseries"));
         genericFuncsHeaders.put("# Others", Collections.singletonList("hivemall.tools"));