| #!/bin/bash |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # A simple launcher for the Apache Beam SQL Shell. |
| # This script builds a self-contained JAR with all dependencies using Maven, |
| # which correctly handles service loading for IOs, and caches the JAR. |
| set -e # Exit immediately if a command exits with a non-zero status. |
| |
| # --- Configuration --- |
| DEFAULT_BEAM_VERSION="2.70.0" |
| MAIN_CLASS="org.apache.beam.sdk.extensions.sql.jdbc.BeamSqlLine" |
| # Directory to store cached executable JAR files |
| CACHE_DIR="${HOME}/.beam/cache" |
| |
| # Maven Wrapper Configuration |
| MAVEN_WRAPPER_VERSION="3.2.0" |
| MAVEN_VERSION="3.9.6" |
| MAVEN_WRAPPER_SCRIPT_URL="https://raw.githubusercontent.com/apache/maven-wrapper/refs/tags/maven-wrapper-${MAVEN_WRAPPER_VERSION}/maven-wrapper-distribution/src/resources/mvnw" |
| MAVEN_WRAPPER_JAR_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/${MAVEN_WRAPPER_VERSION}/maven-wrapper-${MAVEN_WRAPPER_VERSION}.jar" |
| MAVEN_DISTRIBUTION_URL="https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/${MAVEN_VERSION}/apache-maven-${MAVEN_VERSION}-bin.zip" |
| |
| # Maven Plugin Configuration |
| MAVEN_SHADE_PLUGIN_VERSION="3.5.1" |
| mkdir -p "${CACHE_DIR}" |
| |
| # Create a temporary directory for our Maven project. |
| WORK_DIR=$(mktemp -d) |
| |
| # Ensure cleanup on script exit |
| cleanup() { |
| if [ -n "${WORK_DIR}" ] && [ -d "${WORK_DIR}" ]; then |
| rm -rf "${WORK_DIR}" |
| fi |
| } |
| trap cleanup EXIT |
| |
| # --- Helper Functions --- |
| # This function downloads the maven wrapper script and supporting files. |
| function setup_maven_wrapper() { |
| local beam_dir="${HOME}/.beam" |
| local maven_wrapper_dir="${beam_dir}/maven-wrapper" |
| local mvnw_script="${maven_wrapper_dir}/mvnw" |
| local wrapper_jar="${maven_wrapper_dir}/.mvn/wrapper/maven-wrapper.jar" |
| local wrapper_props="${maven_wrapper_dir}/.mvn/wrapper/maven-wrapper.properties" |
| |
| # Check if Maven wrapper is already cached |
| if [ -f "${mvnw_script}" ] && [ -f "${wrapper_jar}" ] && [ -f "${wrapper_props}" ]; then |
| echo "🔧 Using cached Maven Wrapper from ${maven_wrapper_dir}" |
| # Use the cached wrapper directly |
| MAVEN_CMD="${mvnw_script}" |
| return |
| fi |
| |
| echo "🔧 Downloading Maven Wrapper for the first time..." |
| mkdir -p "${maven_wrapper_dir}/.mvn/wrapper" |
| |
| # Create the properties file to specify a modern Maven version |
| echo "distributionUrl=${MAVEN_DISTRIBUTION_URL}" > "${wrapper_props}" |
| |
| # Download the mvnw script and the wrapper JAR to cache directory |
| curl -sSL -o "${mvnw_script}" "${MAVEN_WRAPPER_SCRIPT_URL}" |
| curl -sSL -o "${wrapper_jar}" "${MAVEN_WRAPPER_JAR_URL}" |
| |
| # Make the wrapper script executable |
| chmod +x "${mvnw_script}" |
| |
| echo "✅ Maven Wrapper cached in ${maven_wrapper_dir} for future use" |
| # Use the cached wrapper directly |
| MAVEN_CMD="${mvnw_script}" |
| } |
| |
| function usage() { |
| echo "Usage: $0 [--version <beam_version>] [--runner <runner_name>] [--io <io_connector>] [--list-versions] [--list-ios] [--list-runners] [--debug] [-h|--help]" |
| echo "" |
| echo "A self-contained launcher for the Apache Beam SQL Shell." |
| echo "" |
| echo "Options:" |
| echo " --version Specify the Apache Beam version (default: ${DEFAULT_BEAM_VERSION})." |
| echo " --runner Specify the Beam runner to use (default: direct)." |
| echo " Supported runners:" |
| echo " direct - DirectRunner (runs locally, good for development)" |
| echo " dataflow - DataflowRunner (runs on Google Cloud Dataflow)" |
| echo " --io Specify an IO connector to include. Can be used multiple times." |
| echo " Available connectors: amazon-web-services2, amqp, azure," |
| echo " azure-cosmos, cassandra, cdap, clickhouse, csv, debezium, elasticsearch," |
| echo " google-ads, google-cloud-platform, hadoop-format, hbase, hcatalog, iceberg," |
| echo " influxdb, jdbc, jms, json, kafka, kinesis, kudu, mongodb, mqtt, neo4j," |
| echo " parquet, pulsar, rabbitmq, redis, singlestore, snowflake, solace, solr," |
| echo " sparkreceiver, splunk, synthetic, thrift, tika, xml" |
| echo " --list-versions List all available Beam versions from Maven Central and exit." |
| echo " --list-ios List all available IO connectors from Maven Central and exit." |
| echo " --list-runners List all available runners and exit." |
| echo " --debug Enable debug mode (sets bash -x flag)." |
| echo " -h, --help Show this help message." |
| exit 1 |
| } |
| |
| # This function fetches all available Beam versions from Maven Central. |
| function list_versions() { |
| echo "🔎 Fetching the 10 most recent Apache Beam versions from Maven Central..." |
| local metadata_url="https://repo1.maven.org/maven2/org/apache/beam/beam-sdks-java-core/maven-metadata.xml" |
| |
| if ! command -v curl &> /dev/null; then |
| echo "❌ Error: 'curl' is required to fetch the version list." >&2 |
| return 1 |
| fi |
| |
| # Fetch, parse, filter, sort, and take the top 10. |
| local versions |
| versions=$(curl -sS "${metadata_url}" | \ |
| grep '<version>' | \ |
| sed 's/.*<version>\(.*\)<\/version>.*/\1/' | \ |
| grep -v 'SNAPSHOT' | \ |
| sort -rV | \ |
| head -n 10) # Limit to the first 10 lines |
| |
| if [ -z "${versions}" ]; then |
| echo "❌ Could not retrieve versions. Please check your internet connection or the Maven Central status." >&2 |
| return 1 |
| fi |
| |
| echo "✅ 10 latest versions:" |
| echo "${versions}" |
| } |
| |
| # This function lists all available IO connectors by querying Maven Central. |
| function list_ios() { |
| echo "🔎 Fetching available Apache Beam IO connectors from Maven Central..." |
| local search_url="https://search.maven.org/solrsearch/select?q=g:org.apache.beam+AND+a:beam-sdks-java-io-*&rows=100&wt=json" |
| |
| if ! command -v curl &> /dev/null; then |
| echo "❌ Error: 'curl' is required to fetch the IO connector list." >&2 |
| return 1 |
| fi |
| |
| # Fetch and parse the JSON response to extract IO connector names |
| local ios |
| ios=$(curl -sS "${search_url}" | \ |
| grep -o '"a":"beam-sdks-java-io-[^"]*"' | \ |
| sed 's/"a":"beam-sdks-java-io-\([^"]*\)"/\1/' | \ |
| grep -v -E '(tests?|expansion-service|parent|upgrade)' | \ |
| sort -u) |
| |
| if [ -z "${ios}" ]; then |
| echo "❌ Could not retrieve IO connectors. Please check your internet connection or try again later." >&2 |
| echo "📋 Here are the known IO connectors (may not be complete):" |
| echo "amazon-web-services2, amqp, azure, azure-cosmos, cassandra," |
| echo "cdap, clickhouse, csv, debezium, elasticsearch, google-ads, google-cloud-platform," |
| echo "hadoop-format, hbase, hcatalog, iceberg, influxdb, jdbc, jms, json, kafka, kinesis," |
| echo "kudu, mongodb, mqtt, neo4j, parquet, pulsar, rabbitmq, redis, singlestore, snowflake," |
| echo "solace, solr, sparkreceiver, splunk, synthetic, thrift, tika, xml" |
| return 1 |
| fi |
| |
| echo "✅ Available IO connectors:" |
| echo "${ios}" | tr '\n' ' ' | fold -s -w 80 | sed 's/^/ /' |
| } |
| |
| # This function lists all available runners by querying Maven Central. |
| function list_runners() { |
| echo "🚀 Fetching available Apache Beam runners for version ${BEAM_VERSION} from Maven Central..." |
| local search_url="https://search.maven.org/solrsearch/select?q=g:org.apache.beam+AND+a:beam-runners-*+AND+v:${BEAM_VERSION}&rows=100&wt=json" |
| |
| if ! command -v curl &> /dev/null; then |
| echo "❌ Error: 'curl' is required to fetch the runner list." >&2 |
| return 1 |
| fi |
| |
| # Fetch and parse the JSON response to extract runner names |
| local runners |
| runners=$(curl -sS "${search_url}" | \ |
| grep -o '"a":"beam-runners-[^"]*"' | \ |
| sed 's/"a":"beam-runners-\([^"]*\)"/\1/' | \ |
| grep -v -E '(tests?|parent|core-construction|core-java|extensions|job-server|legacy-worker|windmill|examples|experimental|orchestrator|java-fn-execution|java-job-service|gcp-gcemd|gcp-gcsproxy|local-java-core|portability-java|prism-java|reference-java)' | \ |
| sort -u) |
| |
| if [ -z "${runners}" ]; then |
| echo "❌ Could not retrieve runners for version ${BEAM_VERSION}. Please check your internet connection or try again later." >&2 |
| echo "📋 Here are the known runners for recent Beam versions (may not be complete):" |
| echo "" |
| echo " direct - DirectRunner (runs locally, good for development)" |
| echo " dataflow - DataflowRunner (runs on Google Cloud Dataflow)" |
| echo " flink - FlinkRunner (runs on Apache Flink)" |
| echo " spark - SparkRunner (runs on Apache Spark)" |
| echo " samza - SamzaRunner (runs on Apache Samza)" |
| echo " jet - JetRunner (runs on Hazelcast Jet)" |
| echo " twister2 - Twister2Runner (runs on Twister2)" |
| echo "" |
| echo "💡 Usage: ./beam-sql.sh --runner <runner_name>" |
| echo " Default: direct" |
| echo " Note: Only 'direct' and 'dataflow' are currently supported by this script." |
| return 1 |
| fi |
| |
| echo "✅ Available runners for Beam ${BEAM_VERSION}:" |
| echo "" |
| |
| # Process each runner and provide descriptions |
| while IFS= read -r runner; do |
| case "$runner" in |
| "direct-java") |
| echo " direct - DirectRunner" |
| echo " Runs locally on your machine. Good for development and testing." |
| ;; |
| "google-cloud-dataflow-java") |
| echo " dataflow - DataflowRunner" |
| echo " Runs on Google Cloud Dataflow for production workloads." |
| ;; |
| flink-*) |
| local version=$(echo "$runner" | sed 's/flink-//') |
| echo " flink-${version} - FlinkRunner (Flink ${version})" |
| echo " Runs on Apache Flink ${version} clusters." |
| ;; |
| flink_*) |
| local version=$(echo "$runner" | sed 's/flink_//') |
| echo " flink-${version} - FlinkRunner (Flink ${version})" |
| echo " Runs on Apache Flink ${version} clusters." |
| ;; |
| "spark") |
| echo " spark - SparkRunner" |
| echo " Runs on Apache Spark clusters." |
| ;; |
| "spark-3") |
| echo " spark-3 - SparkRunner (Spark 3.x)" |
| echo " Runs on Apache Spark 3.x clusters." |
| ;; |
| "samza") |
| echo " samza - SamzaRunner" |
| echo " Runs on Apache Samza." |
| ;; |
| "jet") |
| echo " jet - JetRunner" |
| echo " Runs on Hazelcast Jet." |
| ;; |
| "twister2") |
| echo " twister2 - Twister2Runner" |
| echo " Runs on Twister2." |
| ;; |
| "apex") |
| echo " apex - ApexRunner" |
| echo " Runs on Apache Apex." |
| ;; |
| "gearpump") |
| echo " gearpump - GearpumpRunner" |
| echo " Runs on Apache Gearpump." |
| ;; |
| "prism") |
| echo " prism - PrismRunner" |
| echo " Local runner for testing portable pipelines." |
| ;; |
| "reference") |
| echo " reference - ReferenceRunner" |
| echo " Reference implementation for testing." |
| ;; |
| "portability") |
| echo " portability - PortabilityRunner" |
| echo " For portable pipeline execution." |
| ;; |
| *) |
| # For any other runners, clean up the name and show it |
| local clean_name=$(echo "$runner" | sed -e 's/-java$//' -e 's/^gcp-//' -e 's/^local-//') |
| echo " ${clean_name} - ${runner}" |
| ;; |
| esac |
| done <<< "$runners" |
| |
| echo "" |
| echo "💡 Usage: ./beam-sql.sh --runner <runner_name>" |
| echo " Default: direct" |
| echo " Note: This script currently supports 'direct' and 'dataflow' runners." |
| echo " Other runners may require additional setup and dependencies." |
| } |
| |
| |
| # --- Argument Parsing --- |
| BEAM_VERSION="${DEFAULT_BEAM_VERSION}" |
| IO_CONNECTORS=() |
| BEAM_RUNNER="direct" |
| SQLLINE_ARGS=() |
| DEBUG_MODE=false |
| |
| while [[ "$#" -gt 0 ]]; do |
| case $1 in |
| --version) BEAM_VERSION="$2"; shift ;; |
| --runner) BEAM_RUNNER=$(echo "$2" | tr '[:upper:]' '[:lower:]'); shift ;; |
| --io) IO_CONNECTORS+=("$2"); shift ;; |
| --list-versions) list_versions; exit 0 ;; |
| --list-ios) list_ios; exit 0 ;; |
| --list-runners) list_runners; exit 0 ;; |
| --debug) DEBUG_MODE=true ;; |
| -h|--help) usage ;; |
| *) SQLLINE_ARGS+=("$1") ;; |
| esac |
| shift |
| done |
| |
| # Enable debug mode if requested |
| if [ "${DEBUG_MODE}" = true ]; then |
| set -x |
| fi |
| |
| # --- Prerequisite Check --- |
| # Java is always required. |
| if ! command -v java &> /dev/null; then |
| echo "❌ Error: 'java' command not found. It is required to run the application." >&2 |
| exit 1 |
| fi |
| |
| # Curl is required for Maven wrapper setup. |
| if ! command -v curl &> /dev/null; then |
| echo "❌ Error: 'curl' command not found. It is required to download the Maven wrapper." >&2 |
| exit 1 |
| fi |
| |
| setup_maven_wrapper |
| |
| echo "🚀 Preparing Beam SQL Shell v${BEAM_VERSION}..." |
| echo " Runner: ${BEAM_RUNNER}" |
| if [ ${#IO_CONNECTORS[@]} -gt 0 ]; then |
| echo " Including IOs: ${IO_CONNECTORS[*]}" |
| fi |
| |
| # --- Dependency Resolution & JAR Caching --- |
| |
| # Create a unique key for the configuration to use as a cache filename. |
| sorted_ios_str=$(printf "%s\n" "${IO_CONNECTORS[@]}" | sort | tr '\n' '-' | sed 's/-$//') |
| CACHE_KEY="beam-${BEAM_VERSION}_runner-${BEAM_RUNNER}_ios-${sorted_ios_str}.jar" |
| CACHE_FILE="${CACHE_DIR}/${CACHE_KEY}" |
| |
| # Check if a cached JAR already exists for this configuration. |
| if [ -f "${CACHE_FILE}" ]; then |
| echo "✅ Found cached executable JAR. Skipping build." |
| CP="${CACHE_FILE}" |
| else |
| echo "🔎 No cache found. Building executable JAR (this might take a moment on first run)..." |
| |
| # --- Dynamic POM Generation --- |
| POM_FILE="${WORK_DIR}/pom.xml" |
| cat > "${POM_FILE}" << EOL |
| <project xmlns="http://maven.apache.org/POM/4.0.0" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
| <modelVersion>4.0.0</modelVersion> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sql-shell-runner</artifactId> |
| <version>1.0</version> |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-extensions-sql-jdbc</artifactId> |
| <version>\${beam.version}</version> |
| </dependency> |
| EOL |
| # Add IO and Runner dependencies |
| for io in "${IO_CONNECTORS[@]}"; do |
| echo " <dependency><groupId>org.apache.beam</groupId><artifactId>beam-sdks-java-io-${io}</artifactId><version>\${beam.version}</version></dependency>" >> "${POM_FILE}" |
| done |
| RUNNER_ARTIFACT="" |
| case "${BEAM_RUNNER}" in |
| dataflow) RUNNER_ARTIFACT="beam-runners-google-cloud-dataflow-java" ;; |
| direct) ;; |
| *) echo "❌ Error: Unsupported runner '${BEAM_RUNNER}'." >&2; exit 1 ;; |
| esac |
| if [ -n "${RUNNER_ARTIFACT}" ]; then |
| echo " <dependency><groupId>org.apache.beam</groupId><artifactId>${RUNNER_ARTIFACT}</artifactId><version>\${beam.version}</version></dependency>" >> "${POM_FILE}" |
| fi |
| |
| # Complete the POM with the build section for the maven-shade-plugin |
| cat >> "${POM_FILE}" << EOL |
| </dependencies> |
| <properties> |
| <beam.version>${BEAM_VERSION}</beam.version> |
| </properties> |
| <build> |
| <plugins> |
| <plugin> |
| <groupId>org.apache.maven.plugins</groupId> |
| <artifactId>maven-shade-plugin</artifactId> |
| <version>${MAVEN_SHADE_PLUGIN_VERSION}</version> |
| <executions> |
| <execution> |
| <phase>package</phase> |
| <goals> |
| <goal>shade</goal> |
| </goals> |
| <configuration> |
| <transformers> |
| <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> |
| </transformers> |
| <filters> |
| <filter> |
| <artifact>*:*</artifact> |
| <excludes> |
| <exclude>META-INF/*.SF</exclude> |
| <exclude>META-INF/*.DSA</exclude> |
| <exclude>META-INF/*.RSA</exclude> |
| </excludes> |
| </filter> |
| </filters> |
| </configuration> |
| </execution> |
| </executions> |
| </plugin> |
| </plugins> |
| </build> |
| </project> |
| EOL |
| |
| # Use `mvn package` to build the uber JAR. |
| ${MAVEN_CMD} -f "${POM_FILE}" -q --batch-mode package |
| |
| UBER_JAR_PATH="${WORK_DIR}/target/beam-sql-shell-runner-1.0.jar" |
| |
| # Check if build was successful before caching |
| if [ ! -f "${UBER_JAR_PATH}" ]; then |
| echo "❌ Maven build failed. The uber JAR was not created." >&2 |
| exit 1 |
| fi |
| |
| # Copy the newly built JAR to our cache directory. |
| cp "${UBER_JAR_PATH}" "${CACHE_FILE}" |
| CP="${CACHE_FILE}" |
| echo "💾 JAR built and cached for future use." |
| fi |
| |
| # --- Launch Shell --- |
| echo "✅ Dependencies ready. Launching Beam SQL Shell..." |
| echo "----------------------------------------------------" |
| |
| java -cp "${CP}" "${MAIN_CLASS}" "${SQLLINE_ARGS[@]}" |
| |
| echo "----------------------------------------------------" |
| echo "👋 Exited Beam SQL Shell." |