Merge pull request #6546: [BEAM-4495] Create precommit to stage website on GCS

diff --git a/.test-infra/jenkins/PrecommitJobBuilder.groovy b/.test-infra/jenkins/PrecommitJobBuilder.groovy
index 49b4f98..0b81471 100644
--- a/.test-infra/jenkins/PrecommitJobBuilder.groovy
+++ b/.test-infra/jenkins/PrecommitJobBuilder.groovy
@@ -103,6 +103,7 @@
           rootBuildScriptDir(commonJobProperties.checkoutDir)
           tasks(gradleTask)
           commonJobProperties.setGradleSwitches(delegate)
+          switches('-PgithubPullRequestId=${ghprbPullId}')
 	  if (nameBase == 'Java') {
             // BEAM-5035: Parallel builds are very flaky
             switches('--no-parallel')
diff --git a/.test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy b/.test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy
new file mode 100644
index 0000000..5f3f4e3
--- /dev/null
+++ b/.test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import PrecommitJobBuilder
+
+PrecommitJobBuilder builder = new PrecommitJobBuilder(
+    scope: this,
+    nameBase: 'Website_Stage_GCS',
+    gradleTask: ':beam-website:stageWebsite',
+    triggerPathPatterns: ['^website/.*$']
+)
+builder.build()
+
diff --git a/website/build.gradle b/website/build.gradle
index 26cf0f2..eb52de7 100644
--- a/website/build.gradle
+++ b/website/build.gradle
@@ -38,6 +38,22 @@
   } 
 }
 
+
+def envdir = "${buildDir}/gradleenv"
+
+task setupVirtualenv {
+  doLast {
+    exec {
+      commandLine 'virtualenv', "${envdir}"
+    }
+    exec {
+      executable 'sh'
+      args '-c', ". ${envdir}/bin/activate && pip install beautifulsoup4"
+    }
+  }
+  outputs.dirs(envdir)
+}
+
 task buildDockerImage(type: Exec) {
   inputs.files 'Gemfile', 'Gemfile.lock'
   commandLine 'docker', 'build', '-t', dockerImageTag, '.'
@@ -80,10 +96,12 @@
 clean.dependsOn cleanWebsite
 
 task buildWebsite(type: Exec) {
+  def baseurlFlag = project.findProperty('githubPullRequestId') ? "--baseurl=/${project.findProperty('githubPullRequestId')}" : ''
   dependsOn startDockerContainer, setupBuildDir
   finalizedBy stopAndRemoveDockerContainer
   inputs.files 'Gemfile.lock', '_config.yml'
   inputs.dir 'src'
+  inputs.property 'baseurl', baseurlFlag
   outputs.dir "$buildDir/.sass-cache"
   outputs.dir buildContentDir
   commandLine 'docker', 'exec',
@@ -91,7 +109,7 @@
     """cd $dockerWorkDir/build/website && \
       bundle exec jekyll build \
       --config $dockerWorkDir/website/_config.yml \
-      --incremental \
+      --incremental ${baseurlFlag} \
       --source $dockerWorkDir/website/src
       """
 }
@@ -183,3 +201,33 @@
 
 commitWebsite.dependsOn testWebsite
 publishWebsite.dependsOn commitWebsite
+
+/*
+ * Stages a pull request on GCS
+ * For example:
+ *   ./gradlew :beam-website:stageWebsite -PgithubPullRequestId=${ghprbPullId} -PwebsiteBucket=foo
+ */
+task stageWebsite << {
+  assert project.hasProperty('githubPullRequestId')
+  assert githubPullRequestId.isInteger()
+
+  def gcs_bucket = project.findProperty('websiteBucket') ?: 'apache-beam-website-pull-requests'
+  def gcs_path = "gs://${gcs_bucket}/${githubPullRequestId}"
+
+  // Remove current site if it exists.
+  shell "gsutil -m rm -r -f ${gcs_path} || true"
+
+  // Fixup the links to index.html files
+  shell "cd ${buildDir} && ln -s generated-content content || true"
+  shell ". ${envdir}/bin/activate && cd ${buildDir} && " +
+        "python ${project.rootDir}/website/.jenkins/append_index_html_to_internal_links.py"
+
+  // Copy the build website to GCS
+  shell "gsutil -m cp -r ${buildContentDir} ${gcs_path}"
+
+  println "Website published to http://${gcs_bucket}." +
+      "storage.googleapis.com/${githubPullRequestId}/index.html"
+}
+
+stageWebsite.dependsOn setupVirtualenv
+stageWebsite.dependsOn buildWebsite