| #!/usr/bin/env bash |
| |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| set -ex |
| |
| SPARK_HOME="$(cd "`dirname $0`"/..; pwd)" |
| cd "$SPARK_HOME" |
| |
| OUTPUT_PATH="" |
| MODULE="" |
| SOURCE_MODULE="" |
| TARGET_MODULE="" |
| |
| function usage() { |
| echo "Illegal number of parameters." |
| echo "Usage:./dev/gen-protos.sh [connect|streaming] [output_path]" |
| exit -1 |
| } |
| |
| if [[ $# -lt 1 || $# -gt 2 ]]; then |
| usage |
| fi |
| |
| if [[ $1 == "connect" ]]; then |
| MODULE="connect" |
| OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/ |
| SOURCE_MODULE="spark.connect" |
| TARGET_MODULE="pyspark.sql.connect.proto" |
| elif [[ $1 == "streaming" ]]; then |
| MODULE="streaming" |
| OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/streaming/proto/ |
| SOURCE_MODULE="org.apache.spark.sql.execution.streaming" |
| TARGET_MODULE="pyspark.sql.streaming.proto" |
| else |
| usage |
| fi |
| |
| if [[ $# -eq 2 ]]; then |
| rm -Rf $2 |
| mkdir -p $2 |
| OUTPUT_PATH=$2 |
| fi |
| |
| if [[ $MODULE == "connect" ]]; then |
| pushd sql/connect/common/src/main |
| elif [[ $MODULE == "streaming" ]]; then |
| pushd sql/core/src/main |
| fi |
| |
| LICENSE=$(cat <<'EOF' |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| EOF) |
| echo "$LICENSE" > /tmp/tmp_licence |
| |
| # Delete the old generated protobuf files. |
| rm -Rf gen |
| |
| # Now, regenerate the new files |
| buf generate --debug -vvv |
| |
| # We need to edit the generate python files to account for the actual package location and not |
| # the one generated by proto. |
| for f in `find gen/proto/python -name "*.py*"`; do |
| # First fix the imports. |
| if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then |
| sed -e "s/from ${SOURCE_MODULE} import/from ${TARGET_MODULE} import/g" $f > $f.tmp |
| mv $f.tmp $f |
| # Now fix the module name in the serialized descriptor. |
| sed -e "s/DESCRIPTOR, '${SOURCE_MODULE}/DESCRIPTOR, '${TARGET_MODULE}/g" $f > $f.tmp |
| mv $f.tmp $f |
| elif [[ $f == *.pyi ]]; then |
| sed -e "s/import ${SOURCE_MODULE}./import ${TARGET_MODULE}./g" -e "s/${SOURCE_MODULE}./${TARGET_MODULE}./g" -e '/ *@typing_extensions\.final/d' $f > $f.tmp |
| mv $f.tmp $f |
| fi |
| |
| # Prepend the Apache licence header to the files. |
| cp $f $f.bak |
| cat /tmp/tmp_licence $f.bak > $f |
| |
| LC=$(wc -l < $f) |
| echo $LC |
| if [[ $f == *_grpc.py && $LC -eq 20 ]]; then |
| rm $f |
| fi |
| rm $f.bak |
| done |
| |
| black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python |
| |
| # Last step copy the result files to the destination module. |
| for f in `find gen/proto/python -name "*.py*"`; do |
| cp $f $OUTPUT_PATH |
| done |
| |
| # Clean up everything. |
| rm -Rf gen |