TEZ-4428: Use protoc-jar-maven-plugin to generate protobuf classes (#218) (Mark Bathori reviewed by Martin Tzvetanov Grigorov, Sylwester Lachiewicz, Laszlo Bodor)

diff --git a/.travis.yml b/.travis.yml
index 3637a0d..65eaf77 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,9 +22,6 @@
 
 env: MAVEN_OPTS="-Xmx2G -XX:MaxPermSize=512M"
 
-before_install:
-  - ./build-tools/install-protobuf.sh
-
 script:
   - jdk_switcher use oraclejdk8
   - mvn -B clean install package -DskipTests=true -Dmaven.javadoc.skip=true
diff --git a/BUILDING.txt b/BUILDING.txt
index ae81d9c..8b89a55 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -122,10 +122,11 @@
 
 The default version is defined in the root pom.xml.
 
-If you have multiple versions of protoc in your system, you can set in your 
-build shell the PROTOC_PATH environment variable to point to the one you 
-want to use for the Tez build. If you don't define this environment variable,
-protoc is looked up in the PATH.
+If you have multiple versions of protoc in your system, you can set in your
+build shell the PROTOC_PATH environment variable to point to the one you
+want to use for the Tez build. If you don't define this environment variable then the
+embedded protoc compiler will be used with the version defined in ${protobuf.version}.
+It detects the platform and executes the corresponding protoc binary at build time.
 
 You can also specify the path to protoc while building using -Dprotoc.path
 
diff --git a/build-tools/docker/Dockerfile b/build-tools/docker/Dockerfile
index 3e79260..f51cc3b 100644
--- a/build-tools/docker/Dockerfile
+++ b/build-tools/docker/Dockerfile
@@ -55,8 +55,6 @@
     rsync \
     software-properties-common \
     ssh-client \
-    sudo \
-    wget \
     xz-utils \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
diff --git a/build-tools/install-protobuf.sh b/build-tools/install-protobuf.sh
deleted file mode 100755
index c28729a..0000000
--- a/build-tools/install-protobuf.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script attempts to install an arbitrary version of protobuf if needed.
-# The desired version should be the first parameter: $1.
-# Typical usage: the script is automatically called from tez-api (by maven) during the build process.
-
-# This script runs from build-tools folder. The user can remove
-# the dynamically installed protobuf anytime like:
-# rm -rf ./build-tools/protobuf/ #from root folder
-
-set -x
-PROTOBUF_VERSION=${1:-2.5.0}
-PROTOBUF_MAJOR_VERSION=$(echo "$PROTOBUF_VERSION" | cut -d. -f1)
-if [ -n "$ZSH_VERSION" ]; then
-   SCRIPT_DIR="${0:a:h}"
-else
-   SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-fi
-
-function install_protobuf {
-    # before protobuf 3, there is no pre-compiled executables are host on github, let's try to build and make it
-    if (( PROTOBUF_MAJOR_VERSION < 3 )); then
-        wget "https://github.com/google/protobuf/releases/download/v$PROTOBUF_VERSION/protobuf-$PROTOBUF_VERSION.tar.gz"
-        tar -xzvf "protobuf-$PROTOBUF_VERSION.tar.gz"
-        rm "protobuf-$PROTOBUF_VERSION.tar.gz"
-        cd "protobuf-$PROTOBUF_VERSION" && ./configure --prefix=/usr && make && sudo make install
-    # since protobuf 3, there are precompiled protoc executables on github, let's quickly download and use it
-    else
-        ARCH=`uname -m`
-        case "$(uname -s)" in
-            Darwin)
-                FILE_NAME="protoc-$PROTOBUF_VERSION-osx-$ARCH"
-                ;;
-            Linux)
-                if test $ARCH = "aarch64"; then
-                    ARCH="aarch_64"
-                fi
-                FILE_NAME="protoc-$PROTOBUF_VERSION-linux-$ARCH"
-                ;;
-            *)
-                echo "Unsupported OS returned by uname -s, you'll have to install protobuf 3.x manually"
-                exit 1
-                ;;
-        esac
-        rm -f "$FILE_NAME.zip" #cleanup unfinished file if any
-        wget "https://github.com/google/protobuf/releases/download/v$PROTOBUF_VERSION/$FILE_NAME.zip"
-        mkdir "$SCRIPT_DIR/protobuf"
-        unzip -o "$FILE_NAME.zip" -d "$SCRIPT_DIR/protobuf"
-        rm "$FILE_NAME.zip"
-    fi
-}
-
-if test -f "$SCRIPT_DIR/protobuf/bin/protoc"; then
-    PROTOBUF_INSTALLED_VERSION=$("$SCRIPT_DIR/protobuf/bin/protoc" --version)
-else
-    PROTOBUF_INSTALLED_VERSION=$(protoc --version)
-fi
-
-PROTOC_EXIT_CODE=$?
-
-if [ $PROTOC_EXIT_CODE -eq 0 ]; then
-    PROTOBUF_INSTALLED_VERSION=$(echo "$PROTOBUF_INSTALLED_VERSION" | tr -s ' ' | cut -d ' ' -f 2)
-    if [ "$PROTOBUF_INSTALLED_VERSION" == "$PROTOBUF_VERSION" ]; then
-        echo "Current protobuf version is equal to the requested ($PROTOBUF_INSTALLED_VERSION), exiting..."
-    else
-        echo "Current protobuf version ($PROTOBUF_INSTALLED_VERSION) is not equal to the requested ($PROTOBUF_VERSION), installing $PROTOBUF_VERSION"
-        install_protobuf
-    fi
-else
-    echo "protoc --version command had non-zero return value, need to install probuf"
-    install_protobuf
-fi
diff --git a/build-tools/protocw b/build-tools/protocw
deleted file mode 100755
index 6196071..0000000
--- a/build-tools/protocw
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-### This is a protoc wrapper for tez, which can dinamically call protoc from a downloaded protobuf.
-
-if [ -n "$ZSH_VERSION" ]; then
-   SCRIPT_DIR="${0:a:h}"
-else
-   SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-fi
-
-if test -f "$SCRIPT_DIR/protobuf/bin/protoc"; then
-    "$SCRIPT_DIR/protobuf/bin/protoc" "$@"
-else
-    protoc "$@"
-fi
-exit $?
diff --git a/pom.xml b/pom.xml
index ebc1198..5f381cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1310,28 +1310,6 @@
         </dependency>
       </dependencies>
     </profile>
-    <!-- This is because <protoc.path>${env.PROTOC_PATH}</protoc.path> above
-         doesn't let us define a default value in the absence of env.PROTOC_PATH.
-         By defining this profile, the following order is considered:
-
-         0. protoc.path == env.PROTOC_PATH by pom.xml, if protoc.path is not defined
-         1. -Dprotoc.path: if defined, it wins
-         2. env.PROTOC_PATH: if protoc.path is not defined, but env.PROTOC_PATH is defined, env.PROTOC_PATH wins
-            (because protoc.path ==> env.PROTOC_PATH)
-         3. if neither -Dprotoc.path, nor PROTOC_PATH is defined, protocw script will run
-            (which can run protoc from the PATH, or an automatically installed version from build-tools/protobuf)
-    -->
-    <profile>
-        <id>protoc-path-env-variable-not-defined</id>
-        <activation>
-            <property>
-                <name>!env.PROTOC_PATH</name>
-            </property>
-        </activation>
-        <properties>
-            <protoc.path>${basedir}/../build-tools/protocw</protoc.path>
-        </properties>
-    </profile>
   </profiles>
 
   <reporting>
diff --git a/tez-api/pom.xml b/tez-api/pom.xml
index cd5abdb..1d10bc1 100644
--- a/tez-api/pom.xml
+++ b/tez-api/pom.xml
@@ -145,53 +145,28 @@
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
-      <!-- This plugin takes care of on-the-fly installation of the needed protobuf version.
-           The needed version is always what's defined as protobuf.version in the pom,
-           so if user wants to change protobuf version quickly in development time,
-           supposed to change only protobuf.version and then rebuild tez-api. -->
       <plugin>
-        <artifactId>exec-maven-plugin</artifactId>
-        <groupId>org.codehaus.mojo</groupId>
-        <version>1.6.0</version>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>Install protobuf</id>
-            <phase>initialize</phase>
-            <goals>
-              <goal>exec</goal>
-            </goals>
-            <configuration>
-              <executable>${basedir}/../build-tools/install-protobuf.sh</executable>
-              <arguments>${protobuf.version}</arguments>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
-        <executions>
-          <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/main/proto</directory>
-                <includes>
-                  <include>DAGApiRecords.proto</include>
-                  <include>DAGClientAMProtocol.proto</include>
-                  <include>Events.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-sources/java</output>
+              <addSources>none</addSources>
+              <inputDirectories>
+                <include>${basedir}/src/main/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>
diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml
index 356ed2c..fda9fb2 100644
--- a/tez-dag/pom.xml
+++ b/tez-dag/pom.xml
@@ -211,30 +211,31 @@
         </configuration>
       </plugin>
       <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/main/proto</param>
-                <param>${basedir}/../tez-api/src/main/proto</param>
-                <param>${basedir}/../tez-runtime-internals/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/main/proto</directory>
-                <includes>
-                  <include>HistoryEvents.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-sources/java</output>
+              <addSources>none</addSources>
+              <includeDirectories>
+                <include>${basedir}/../tez-api/src/main/proto</include>
+                <include>${basedir}/../tez-runtime-internals/src/main/proto</include>
+              </includeDirectories>
+              <inputDirectories>
+                <include>${basedir}/src/main/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
index 609b896..a0bcbe3 100644
--- a/tez-ext-service-tests/pom.xml
+++ b/tez-ext-service-tests/pom.xml
@@ -165,29 +165,30 @@
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
       <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/test/proto</param>
+              <addSources>none</addSources>
+              <includeDirectories>
                 <param>${basedir}/../tez-api/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/test/proto</directory>
-                <includes>
-                  <include>TezDaemonProtocol.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-test-sources/java</output>
+              </includeDirectories>
+              <inputDirectories>
+                <include>${basedir}/src/test/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-test-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>
diff --git a/tez-mapreduce/pom.xml b/tez-mapreduce/pom.xml
index a22870f..1e35d2e 100644
--- a/tez-mapreduce/pom.xml
+++ b/tez-mapreduce/pom.xml
@@ -137,28 +137,27 @@
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
       <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/main/proto</directory>
-                <includes>
-                  <include>MRRuntimeProtos.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-sources/java</output>
+              <addSources>none</addSources>
+              <inputDirectories>
+                <include>${basedir}/src/main/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>
diff --git a/tez-plugins/tez-protobuf-history-plugin/pom.xml b/tez-plugins/tez-protobuf-history-plugin/pom.xml
index 607a31f..0fa5264 100644
--- a/tez-plugins/tez-protobuf-history-plugin/pom.xml
+++ b/tez-plugins/tez-protobuf-history-plugin/pom.xml
@@ -59,47 +59,31 @@
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
       <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/main/proto</directory>
-                <includes>
-                  <include>HistoryLogger.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-sources/java</output>
+              <addSources>none</addSources>
+              <inputDirectories>
+                <include>${basedir}/src/main/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>
       </plugin>
     </plugins>
   </build>
-
-  <profiles>
-    <!-- For further details, please refer to the profile definition in root pom.xml -->
-    <profile>
-        <id>protoc-path-env-variable-not-defined</id>
-        <activation>
-            <property>
-                <name>!env.PROTOC_PATH</name>
-            </property>
-        </activation>
-        <properties>
-            <protoc.path>${basedir}/../../build-tools/protocw</protoc.path>
-        </properties>
-    </profile>
-  </profiles>
 </project>
diff --git a/tez-runtime-internals/pom.xml b/tez-runtime-internals/pom.xml
index 0619ca1..efe01e0 100644
--- a/tez-runtime-internals/pom.xml
+++ b/tez-runtime-internals/pom.xml
@@ -91,28 +91,27 @@
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
       <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/main/proto</directory>
-                <includes>
-                  <include>RuntimeEvents.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-sources/java</output>
+              <addSources>none</addSources>
+              <inputDirectories>
+                <include>${basedir}/src/main/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>
diff --git a/tez-runtime-library/pom.xml b/tez-runtime-library/pom.xml
index c913e87..e6db47e 100644
--- a/tez-runtime-library/pom.xml
+++ b/tez-runtime-library/pom.xml
@@ -114,30 +114,27 @@
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
       <plugin>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-maven-plugins</artifactId>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
         <executions>
           <execution>
-            <id>compile-protoc</id>
             <phase>generate-sources</phase>
             <goals>
-              <goal>protoc</goal>
+              <goal>run</goal>
             </goals>
             <configuration>
-              <protocVersion>${protobuf.version}</protocVersion>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <protocCommand>${protoc.path}</protocCommand>
-              <imports>
-                <param>${basedir}/src/main/proto</param>
-              </imports>
-              <source>
-                <directory>${basedir}/src/main/proto</directory>
-                <includes>
-                  <include>ShufflePayloads.proto</include>
-                  <include>CartesianProductPayload.proto</include>
-                  <include>FairShufflePayloads.proto</include>
-                </includes>
-              </source>
-              <output>${project.build.directory}/generated-sources/java</output>
+              <addSources>none</addSources>
+              <inputDirectories>
+                <include>${basedir}/src/main/proto</include>
+              </inputDirectories>
+              <outputTargets>
+                <outputTarget>
+                  <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+                </outputTarget>
+              </outputTargets>
             </configuration>
           </execution>
         </executions>