HADOOP-19565. Release Hadoop 3.4.2: lean artifact validation
add the property "category" and the -lean artifacts can be validated.
diff --git a/README.md b/README.md
index 8cfda36..0db9008 100644
--- a/README.md
+++ b/README.md
@@ -276,18 +276,16 @@
Clean up all the build files, including any remote downloads in the `downloads/`
dir.
-
-```bash
-ant clean
-```
-
And then purge all artifacts of that release from maven.
This is critical when validating downstream project builds.
```bash
-ant mvn-purge
+ant clean mvn-purge
```
+Tip: look at the output to make sure it is cleaning the artifacts from the release you intend to validate.
+
+
### SCP RC down to `target/incoming`
This will take a while! look in target/incoming for progress
@@ -308,7 +306,7 @@
### Build a lean binary tar
-The normal binary tar.gz files huge because they install a version of the AWS v2 SDK "bundle.jar"
+The normal `binary tar.gz` files huge because they install a version of the AWS v2 SDK "bundle.jar"
file which has been validated with the hadoop-aws module and the S3A connector which was built against it.
This is a really big file because it includes all the "shaded" dependencies as well as client libraries
@@ -325,8 +323,8 @@
process.
* Larger container images if preinstalled.
-The "lean" x86 binary tar.gz file aims to reduce eliminate these negative issues by being
-a variant of the normal x86 binary distribution with the relevant AWS SDK jar removed.
+The "lean" `binary tar.gz` files eliminate these negative issues by being
+a variant of the normal x86 binary distribution with the relevant AWS SDK JAR removed.
The build target `release.lean.tar` can do this once the normal x86 binaries have been downloaded.
@@ -387,7 +385,7 @@
```
-### copy to a staging location in the hadoop SVN repository.
+### Copy to a staging location in the hadoop SVN repository.
When committed to subversion it will be uploaded and accessible via a
https://svn.apache.org URL.
@@ -431,7 +429,7 @@
```
-### tag the rc and push to github
+### Tag the rc and push to github
This isn't automated as it needs to be done in the source tree.
@@ -480,6 +478,15 @@
amd.src.dir=https://dist.apache.org/repos/dist/dev/hadoop/hadoop-${hadoop.version}-RC${rc}/
```
+### Choose full versus lean downloads
+
+The property `category` controls what suffix to use when downloading artifacts.
+The default value, "", pulls in the full binaries.
+If set to `-lean` then lean artifacts are downloaded and validated.
+
+```
+category=-lean
+```
### Targets of Relevance
| target | action |
@@ -556,7 +563,12 @@
of `downloads/untar/source`, for example `downloads/untar/source/hadoop-3.4.1-src`
-### untar site and validate.
+Do remember to purge the locally generated artifacts from your maven repository
+```bash
+ant mvn-purge
+```
+
+### Untar site and validate.
```bash
@@ -565,9 +577,9 @@
Validation is pretty minimal; it just looks for the existence
of index.html files in the site root and under api/.
-### untar binary release
+### Untar binary release
-Untars the (already downloaded) binary tar to `bin/hadoop fs -ls $BUCKET/
+Untar the (already downloaded) binary tar to `bin/hadoop fs -ls $BUCKET/
`
```bash
@@ -609,10 +621,11 @@
| `release.fetch.arm` | fetch ARM artifacts |
| `gpg.arm.verify` | verify ARM artifacts |
| `release.arm.untar` | untar the ARM binary file |
-| `release.arm.commands` | execute commands against the arm binaries |
+| `release.arm.commands` | execute commands against the ARM binaries |
```bash
# untars the `-aarch64.tar.gz` binary
+ant release.fetch.arm gpg.arm.verify
ant release.arm.untar
ant release.arm.commands
```
@@ -683,7 +696,7 @@
checks for some known "forbidden" artifacts that must not be exported
as transitive dependencies.
-Review this to make sure there are no unexpected artifacts coming in,
+Review this to make sure there are no unexpected artifacts coming in.
## Build and test Cloudstore diagnostics
diff --git a/build.xml b/build.xml
index 0441213..e7486b3 100644
--- a/build.xml
+++ b/build.xml
@@ -152,7 +152,10 @@
<require p="release.branch"/>
<require p="asf.staging.url"/>
+ <!-- string to use to switch between lean or not (use "lean" -->
+ <set name="category" value=""/>
+ <!-- basis for artifacts-->
<set name="release" value="hadoop-${hadoop.version}"/>
<set name="rc.name" value="${hadoop.version}-${rc}"/>
<set name="rc.dirname" value="${release}-${rc}"/>
@@ -162,6 +165,8 @@
<setpath name="release.bin.dir" location="${release.untar.dir}/bin"/>
<setpath name="release.site.dir" location="${release.untar.dir}/site"/>
<setpath name="release.source.dir" location="${release.untar.dir}/source"/>
+ <set name="release.binary.filename" value="${release}${category}.tar.gz" />
+
<setpath name="site.dir" location="${release.untar.dir}/site/r${hadoop.version}"/>
<setpath name="staged.artifacts.dir" location="${staging.dir}/${rc.dirname}"/>
@@ -200,7 +205,7 @@
<setpath name="arm.artifact.dir" location="${arm.hadoop.dir}/target/artifacts/" />
<setpath name="arm.dir" location="${downloads.dir}/arm" />
<set name="arm.binary.prefix" value="hadoop-${hadoop.version}-aarch64" />
- <set name="arm.binary.filename" value="${arm.binary.prefix}.tar.gz" />
+ <set name="arm.binary.filename" value="${arm.binary.prefix}${category}.tar.gz" />
<setpath name="arm.binary.src" location="${arm.artifact.dir}/hadoop-${hadoop.version}.tar.gz" />
<setpath name="arm.binary" location="${arm.dir}/${arm.binary.filename}" />
<setpath name="arm.binary.sha512" location="${arm.binary}.sha512" />
@@ -208,6 +213,9 @@
<!-- for spark builds -->
<set name="spark.version" value="4.0.0-SNAPSHOT"/>
+ <!-- version of the AWS SDK to copy from mvn repo-->
+ <set name="aws-java-sdk-v2.version" value="2.29.52"/>
+ <setpath name="mvn.dependencies.dir" location="target/dependencies" />
<echo>
@@ -309,6 +317,23 @@
</sequential>
</macrodef>
+
+ <!-- download the artfact plus signature and checkshums -->
+ <macrodef name="download">
+ <attribute name="artifact" />
+ <sequential>
+ <get
+ dest="${incoming.dir}"
+ skipexisting="true"
+ verbose="true"
+ usetimestamp="true">
+ <url url="${http.source}/@{artifact}" />
+ <url url="${http.source}/@{artifact}.asc" />
+ <url url="${http.source}/@{artifact}.sha512" />
+ </get>
+ </sequential>
+ </macrodef>
+
<presetdef name="verify-release-dir">
<require-dir path="${release.dir}" />
</presetdef>
@@ -407,7 +432,27 @@
<!-- not probed for as maven pulls them in from zk, somehow -->
<!-- <forbidden artifact="logback-classic"/>-->
<forbidden artifact="logback-classic"/>
-<!-- <forbidden artifact="logback-core"/>-->
+ <forbidden artifact="logback-core"/>
+ </target>
+
+ <target name="mvn-copy-dependencies" depends="init"
+ description="copy all the maven dependencies">
+
+ <!-- execute mvn dependency:tree saving the output to a file -->
+ <!-- mvn install dependency:copy-dependencies-->
+ <mvn>
+ <arg value="install"/>
+ <arg value="dependency:copy-dependencies"/>
+ <arg value="-DskipTests"/>
+ <arg value="-Pstaging"/>
+ <arg value="-Dhadoop.version=${hadoop.version}"/>
+ </mvn>
+
+ <x executable="ls">
+ <arg value="-l"/>
+ <arg value="${mvn.dependencies.dir}"/>
+ </x>
+
</target>
@@ -482,7 +527,7 @@
<gpgverify name="RELEASENOTES.md"/>
<gpgverify name="${release}-src.tar.gz"/>
<gpgverify name="${release}-site.tar.gz"/>
- <gpgverify name="${release}.tar.gz"/>
+ <gpgverify name="${release.binary.filename}"/>
<gpgverify name="${release}-rat.txt"/>
</target>
@@ -984,11 +1029,10 @@
description="Build and test the parquet-hadoop module">
<echo>
Build and test parquet-hadoop.
- There's no profile for using ASF staging as a source for artifacts.
- Run this after other builds so the files are already present
</echo>
<mvn dir="${parquet.dir}">
<arg value="-Dhadoop.version=${hadoop.version}"/>
+ <arg value="-Pasf-staging"/>
<arg value="--pl"/>
<arg value="parquet-hadoop"/>
<arg value="install"/>
@@ -1083,26 +1127,11 @@
<fail unless="http.source"/>
<mkdir dir="${incoming.dir}"/>
- <!-- download the artfact plus signature and checkshums -->
- <macrodef name="download">
- <attribute name="artifact" />
- <sequential>
- <get
- dest="${incoming.dir}"
- skipexisting="true"
- verbose="true"
- usetimestamp="true">
- <url url="${http.source}/@{artifact}" />
- <url url="${http.source}/@{artifact}.asc" />
- <url url="${http.source}/@{artifact}.sha512" />
- </get>
- </sequential>
- </macrodef>
<!-- do a parallel fetch to avoid waiting quite as long for data. -->
<parallel threadsPerProcessor="4">
<download artifact="CHANGELOG.md"/>
<download artifact="RELEASENOTES.md"/>
- <download artifact="${release}.tar.gz"/>
+ <download artifact="${release.binary.filename}"/>
<download artifact="${release}-site.tar.gz"/>
<download artifact="${release}-src.tar.gz"/>
<download artifact="${release}-rat.txt"/>
@@ -1119,10 +1148,13 @@
description="fetch the arm artifacts from a remote http site">
<fail unless="http.source"/>
<mkdir dir="${incoming.dir}"/>
- <get src="${http.source}/${arm.binary.filename}"
- dest="${incoming.dir}"
- verbose="true"
- usetimestamp="true"/>
+ <mkdir dir="${release.dir}"/>
+ <download artifact="${arm.binary.filename}" />
+ <move todir="${release.dir}" >
+ <fileset dir="${incoming.dir}">
+ <include name="*aarch*"/>
+ </fileset>
+ </move>
</target>
<target name="release.src.untar" depends="release.dir.check"
@@ -1159,15 +1191,15 @@
description="untar the x86 binary release">
<mkdir dir="target/bin-untar" />
- <gunzip src="${release.dir}/${release}.tar.gz" dest="target/bin-untar"/>
+ <gunzip src="${release.dir}/${release.binary.filename}" dest="target/bin-untar"/>
<!-- use the native command to preserve properties -->
<x executable="tar" dir="target/bin-untar" >
<arg value="-xf" />
- <arg value="${release}.tar" />
+ <arg value="${release}${category}.tar" />
</x>
<echo>
- x86 Binary release expanded into target/bin-untar/${release}
+ x86 binary release ${release.binary.filename} expanded into target/bin-untar/${release}
</echo>
</target>
@@ -1240,7 +1272,7 @@
<delete dir="${lean.dir}" />
<mkdir dir="${lean.dir}" />
- <gunzip src="${release.dir}/${release}.tar.gz" dest="${lean.dir}"/>
+ <gunzip src="${release.dir}/${release.binary.filename}" dest="${lean.dir}"/>
<echo>Untarring ${lean.dir}/${release}.tar</echo>
<!-- use the native command to preserve properties -->
@@ -1300,7 +1332,7 @@
<!-- use the native command to preserve properties -->
<x executable="tar" dir="target/arm-untar" >
<arg value="-xf" />
- <arg value="${arm.binary.prefix}.tar" />
+ <arg value="${arm.binary.prefix}${category}.tar" />
</x>
<echo>
Binary release expanded into target/arm-untar/${release}
@@ -1579,14 +1611,6 @@
</target>
- <target name="arm.release" depends="arm.sign.artifacts"
- description="prepare the arm artifacts and copy into the release dir">
- <copy todir="${release.dir}" overwrite="true">
- <fileset dir="${arm.dir}" includes="hadoop-aarch64-*" />
- </copy>
- </target>
-
-
<!-- Third party release assistance -->
@@ -1794,7 +1818,7 @@
</echo>
<move
file="${3p.local.artifacts.dir}"
- tofile="${3p.staged.artifacts.dir}"/>
+ todir="${3p.staged.artifacts.dir}"/>
</target>
<!--
diff --git a/src/releases/release-info-3.4.2.properties b/src/releases/release-info-3.4.2.properties
index a3656b2..2bf7a73 100644
--- a/src/releases/release-info-3.4.2.properties
+++ b/src/releases/release-info-3.4.2.properties
@@ -16,15 +16,16 @@
# property file for 3.4.2
hadoop.version=3.4.2
-rc=RC1
+rc=RC2
+category=-lean
previous.version=3.4.1
release.branch=3.4.2
-git.commit.id=09870840ec35b48cd107972eb24d25e8aece04c9
+git.commit.id=e1c0dee881820a4d834ec4a4d2c70d0d953bb933
jira.id=HADOOP-19565
jira.title=Release Hadoop 3.4.2
-amd.src.dir=https://dist.apache.org/repos/dist/dev/hadoop/hadoop-3.4.2-RC1
+amd.src.dir=https://dist.apache.org/repos/dist/dev/hadoop/3.4.2-RC2
arm.src.dir=${amd.src.dir}
http.source=${amd.src.dir}
asf.staging.url=https://repository.apache.org/content/repositories/orgapachehadoop-1437
diff --git a/src/test/java/org/apache/hadoop/validator/TestRuntimeValid.java b/src/test/java/org/apache/hadoop/validator/TestRuntimeValid.java
index ec3336a..b2a3147 100644
--- a/src/test/java/org/apache/hadoop/validator/TestRuntimeValid.java
+++ b/src/test/java/org/apache/hadoop/validator/TestRuntimeValid.java
@@ -22,7 +22,9 @@
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
-import org.apache.hadoop.hdfs.DistributedFileSystem;/**
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+
+/**
* Let's test that runtime.
*/
public class TestRuntimeValid {
@@ -30,21 +32,21 @@
@Test
public void testRuntime() throws Throwable {
final CompileFS compileFS = new CompileFS();
- compileFS.run();
+ compileFS.run().close();
}
@Test
public void testS3AConstructor() throws Throwable {
- new S3AFileSystem();
+ new S3AFileSystem().close();
}
@Test
public void testHDFSConstructor() throws Throwable {
- new DistributedFileSystem();
+ new DistributedFileSystem().close();
}
@Test
public void testABFSConstructor() throws Throwable {
- new AzureBlobFileSystem();
+ new AzureBlobFileSystem(); // .close(); HADOOP-19650
}
}