ARROW-7518: [Python] Use PYARROW_WITH_HDFS when building wheels, conda packages

Closes #6203 from kszucs/ARROW-7518 and squashes the following commits:

1e3f09195 <Krisztián Szűcs> test dataset import
6dd82a6d3 <Krisztián Szűcs> fix shebang
be3974826 <Krisztián Szűcs> fix flags
7d3668bb7 <Krisztián Szűcs> enable hdfs in the conda recipes and the wheels

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
diff --git a/dev/tasks/conda-recipes/arrow-cpp/bld.bat b/dev/tasks/conda-recipes/arrow-cpp/bld.bat
index fd87e0c..0588961 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/bld.bat
+++ b/dev/tasks/conda-recipes/arrow-cpp/bld.bat
@@ -22,6 +22,7 @@
       -DARROW_MIMALLOC:BOOL=ON ^
       -DARROW_DATASET:BOOL=ON ^
       -DARROW_FLIGHT:BOOL=ON ^
+      -DARROW_HDFS:BOOL=ON ^
       -DARROW_PARQUET:BOOL=ON ^
       -DARROW_GANDIVA:BOOL=ON ^
       -DARROW_ORC:BOOL=ON ^
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build.sh b/dev/tasks/conda-recipes/arrow-cpp/build.sh
index 70f4894..251187f 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/build.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build.sh
@@ -40,6 +40,7 @@
     -DARROW_PYTHON=ON \
     -DARROW_PARQUET=ON \
     -DARROW_GANDIVA=ON \
+    -DARROW_HDFS=ON \
     -DARROW_ORC=ON \
     -DARROW_S3=ON \
     -DCMAKE_AR=${AR} \
diff --git a/dev/tasks/conda-recipes/pyarrow/bld.bat b/dev/tasks/conda-recipes/pyarrow/bld.bat
index f06eb21..dbea193 100644
--- a/dev/tasks/conda-recipes/pyarrow/bld.bat
+++ b/dev/tasks/conda-recipes/pyarrow/bld.bat
@@ -16,6 +16,7 @@
 SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION%
 SET PYARROW_BUILD_TYPE=release
 SET PYARROW_WITH_S3=1
+SET PYARROW_WITH_HDFS=1
 SET PYARROW_WITH_DATASET=1
 SET PYARROW_WITH_FLIGHT=1
 SET PYARROW_WITH_GANDIVA=1
diff --git a/dev/tasks/conda-recipes/pyarrow/build.sh b/dev/tasks/conda-recipes/pyarrow/build.sh
index 1242f4b..a095dc2 100644
--- a/dev/tasks/conda-recipes/pyarrow/build.sh
+++ b/dev/tasks/conda-recipes/pyarrow/build.sh
@@ -11,6 +11,7 @@
 export PYARROW_WITH_DATASET=1
 export PYARROW_WITH_FLIGHT=1
 export PYARROW_WITH_GANDIVA=1
+export PYARROW_WITH_HDFS=1
 export PYARROW_WITH_ORC=1
 export PYARROW_WITH_PARQUET=1
 export PYARROW_WITH_PLASMA=1
diff --git a/dev/tasks/conda-recipes/pyarrow/meta.yaml b/dev/tasks/conda-recipes/pyarrow/meta.yaml
index 6ea3d64..abc1720 100644
--- a/dev/tasks/conda-recipes/pyarrow/meta.yaml
+++ b/dev/tasks/conda-recipes/pyarrow/meta.yaml
@@ -44,13 +44,15 @@
 test:
   imports:
     - pyarrow
-    - pyarrow.fs
     - pyarrow.dataset  # [not py==27]
     - pyarrow.flight   # [not py==27]
     - pyarrow.gandiva  # [not py==27]
     - pyarrow.orc      # [unix]
     - pyarrow.parquet
     - pyarrow.plasma   # [unix]
+    - pyarrow.fs
+    - pyarrow._s3fs
+    - pyarrow._hdfs
 
   requires:
     - pytest
diff --git a/dev/tasks/python-wheels/azure.linux.yml b/dev/tasks/python-wheels/azure.linux.yml
index 23ebfd5..87510b8 100644
--- a/dev/tasks/python-wheels/azure.linux.yml
+++ b/dev/tasks/python-wheels/azure.linux.yml
@@ -44,7 +44,7 @@
         # TODO(kou): Uncomment this when we resolve "ADD never use cache" problem.
         # docker-compose build $BUILD_IMAGE
         docker-compose run \
-          -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} \
+          -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
           -e PYTHON_VERSION="{{ python_version }}" \
           -e UNICODE_WIDTH="{{ unicode_width }}" \
           $BUILD_IMAGE
diff --git a/dev/tasks/python-wheels/manylinux-test.sh b/dev/tasks/python-wheels/manylinux-test.sh
index 6656801..4142791 100755
--- a/dev/tasks/python-wheels/manylinux-test.sh
+++ b/dev/tasks/python-wheels/manylinux-test.sh
@@ -41,6 +41,8 @@
 import pyarrow
 import pyarrow.parquet
 import pyarrow.plasma
+import pyarrow.fs
+import pyarrow._hdfs
 
 if sys.version_info.major > 2:
     import pyarrow.dataset
diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh
index c896ef5..ffaf076 100755
--- a/dev/tasks/python-wheels/osx-build.sh
+++ b/dev/tasks/python-wheels/osx-build.sh
@@ -124,6 +124,7 @@
           -DARROW_BUILD_TESTS=OFF \
           -DARROW_DATASET=ON \
           -DARROW_DEPENDENCY_SOURCE=BUNDLED \
+          -DARROW_HDFS=ON \
           -DARROW_FLIGHT=ON \
           -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
           -DARROW_JEMALLOC=ON \
@@ -166,6 +167,7 @@
 
     export PYARROW_WITH_DATASET=1
     export PYARROW_WITH_FLIGHT=1
+    export PYARROW_WITH_HDFS=1
     export PYARROW_WITH_PLASMA=1
     export PYARROW_WITH_PARQUET=1
     export PYARROW_WITH_ORC=0
@@ -220,6 +222,8 @@
 import pyarrow
 import pyarrow.parquet
 import pyarrow.plasma
+import pyarrow.fs
+import pyarrow._hdfs
 
 if sys.version_info.major > 2:
     import pyarrow.dataset
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index bdc303f..52825fd 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -45,7 +45,7 @@
 # ARROW-6860: Disabling ORC in wheels until Protobuf static linking issues
 # across projects is resolved
 export PYARROW_WITH_ORC=0
-
+export PYARROW_WITH_HDFS=1
 export PYARROW_WITH_PARQUET=1
 export PYARROW_WITH_PLASMA=1
 export PYARROW_BUNDLE_ARROW_CPP=1
@@ -97,36 +97,38 @@
 ARROW_BUILD_DIR=/tmp/build-PY${PYTHON_VERSION}-${UNICODE_WIDTH}
 mkdir -p "${ARROW_BUILD_DIR}"
 pushd "${ARROW_BUILD_DIR}"
-cmake -DCMAKE_BUILD_TYPE=Release \
-    -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
-    -DCMAKE_INSTALL_PREFIX=/arrow-dist \
-    -DCMAKE_INSTALL_LIBDIR=lib \
-    -DARROW_BUILD_TESTS=OFF \
-    -DARROW_BUILD_SHARED=ON \
+cmake \
+    -DCMAKE_BUILD_TYPE=Release \
     -DARROW_BOOST_USE_SHARED=ON \
+    -DARROW_BUILD_SHARED=ON \
+    -DARROW_BUILD_TESTS=OFF \
+    -DARROW_DATASET=${BUILD_ARROW_DATASET} \
+    -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
+    -DARROW_FLIGHT=${BUILD_ARROW_FLIGHT} \
+    -DARROW_GANDIVA_JAVA=OFF \
     -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \
+    -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
+    -DARROW_HDFS=ON \
     -DARROW_JEMALLOC=ON \
-    -DARROW_RPATH_ORIGIN=ON \
-    -DARROW_PYTHON=ON \
-    -DARROW_PARQUET=ON \
-    -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
-    -DARROW_PLASMA=ON \
-    -DARROW_TENSORFLOW=ON \
     -DARROW_ORC=OFF \
-    -DORC_SOURCE=BUNDLED \
+    -DARROW_PARQUET=ON \
+    -DARROW_PLASMA=ON \
+    -DARROW_PYTHON=ON \
+    -DARROW_RPATH_ORIGIN=ON \
+    -DARROW_TENSORFLOW=ON \
+    -DARROW_WITH_BROTLI=ON \
     -DARROW_WITH_BZ2=ON \
-    -DARROW_WITH_ZLIB=ON \
-    -DARROW_WITH_ZSTD=ON \
     -DARROW_WITH_LZ4=ON \
     -DARROW_WITH_SNAPPY=ON \
-    -DARROW_WITH_BROTLI=ON \
-    -DARROW_DATASET=${BUILD_ARROW_DATASET} \
-    -DARROW_FLIGHT=${BUILD_ARROW_FLIGHT} \
-    -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
-    -DARROW_GANDIVA_JAVA=OFF \
+    -DARROW_WITH_ZLIB=ON \
+    -DARROW_WITH_ZSTD=ON \
     -DBoost_NAMESPACE=arrow_boost \
     -DBOOST_ROOT=/arrow_boost_dist \
+    -DCMAKE_INSTALL_LIBDIR=lib \
+    -DCMAKE_INSTALL_PREFIX=/arrow-dist \
     -DOPENSSL_USE_STATIC_LIBS=ON \
+    -DORC_SOURCE=BUNDLED \
+    -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
     -GNinja /arrow/cpp
 ninja
 ninja install
@@ -164,6 +166,8 @@
 import pyarrow
 import pyarrow.parquet
 import pyarrow.plasma
+import pyarrow.fs
+import pyarrow._hdfs
 
 if sys.version_info.major > 2:
     import pyarrow.dataset
diff --git a/python/manylinux201x/build_arrow.sh b/python/manylinux201x/build_arrow.sh
index 661dac1..7ac9e98 100755
--- a/python/manylinux201x/build_arrow.sh
+++ b/python/manylinux201x/build_arrow.sh
@@ -46,7 +46,7 @@
 # ARROW-6860: Disabling ORC in wheels until Protobuf static linking issues
 # across projects is resolved
 export PYARROW_WITH_ORC=0
-
+export PYARROW_WITH_HDFS=1
 export PYARROW_WITH_PARQUET=1
 export PYARROW_WITH_PLASMA=1
 export PYARROW_BUNDLE_ARROW_CPP=1
@@ -98,38 +98,40 @@
 ARROW_BUILD_DIR=/tmp/build-PY${PYTHON_VERSION}-${UNICODE_WIDTH}
 mkdir -p "${ARROW_BUILD_DIR}"
 pushd "${ARROW_BUILD_DIR}"
-PATH="${CPYTHON_PATH}/bin:${PATH}" cmake -DCMAKE_BUILD_TYPE=Release \
-    -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
-    -DZLIB_ROOT=/usr/local \
-    -DCMAKE_INSTALL_PREFIX=/arrow-dist \
-    -DCMAKE_INSTALL_LIBDIR=lib \
-    -DARROW_BUILD_TESTS=OFF \
+PATH="${CPYTHON_PATH}/bin:${PATH}" cmake \
+    -DARROW_BOOST_USE_SHARED=ON \
     -DARROW_BUILD_SHARED=ON \
     -DARROW_BUILD_STATIC=OFF \
-    -DARROW_BOOST_USE_SHARED=ON \
+    -DARROW_BUILD_TESTS=OFF \
+    -DARROW_DATASET=${BUILD_ARROW_DATASET} \
+    -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
+    -DARROW_FLIGHT=${BUILD_ARROW_FLIGHT} \
+    -DARROW_GANDIVA_JAVA=OFF \
     -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-8/root/usr/include/c++/8/;-isystem;/opt/rh/devtoolset-8/root/usr/include/c++/8/x86_64-redhat-linux/" \
+    -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
+    -DARROW_HDFS=ON \
     -DARROW_JEMALLOC=ON \
-    -DARROW_RPATH_ORIGIN=ON \
-    -DARROW_PYTHON=ON \
-    -DARROW_PARQUET=ON \
-    -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
-    -DARROW_PLASMA=ON \
-    -DARROW_TENSORFLOW=ON \
     -DARROW_ORC=OFF \
-    -DORC_SOURCE=BUNDLED \
+    -DARROW_PARQUET=ON \
+    -DARROW_PLASMA=ON \
+    -DARROW_PYTHON=ON \
+    -DARROW_RPATH_ORIGIN=ON \
+    -DARROW_TENSORFLOW=ON \
+    -DARROW_WITH_BROTLI=ON \
     -DARROW_WITH_BZ2=ON \
-    -DARROW_WITH_ZLIB=ON \
-    -DARROW_WITH_ZSTD=ON \
     -DARROW_WITH_LZ4=ON \
     -DARROW_WITH_SNAPPY=ON \
-    -DARROW_WITH_BROTLI=ON \
-    -DARROW_DATASET=${BUILD_ARROW_DATASET} \
-    -DARROW_FLIGHT=${BUILD_ARROW_FLIGHT} \
-    -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
-    -DARROW_GANDIVA_JAVA=OFF \
+    -DARROW_WITH_ZLIB=ON \
+    -DARROW_WITH_ZSTD=ON \
     -DBoost_NAMESPACE=arrow_boost \
     -DBOOST_ROOT=/arrow_boost_dist \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_LIBDIR=lib \
+    -DCMAKE_INSTALL_PREFIX=/arrow-dist \
     -DOPENSSL_USE_STATIC_LIBS=ON \
+    -DORC_SOURCE=BUNDLED \
+    -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
+    -DZLIB_ROOT=/usr/local \
     -GNinja /arrow/cpp
 ninja install
 popd
@@ -162,6 +164,8 @@
 import pyarrow
 import pyarrow.parquet
 import pyarrow.plasma
+import pyarrow.fs
+import pyarrow._hdfs
 
 if sys.version_info.major > 2:
     import pyarrow.dataset