blob: 2815893d556bf6edbb477cf3077cb2302baf3684 [file] [log] [blame]
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Function to validate if a BigQuery table exists and has rows.
# Usage: validate_table <table_name> [retry_delay_seconds]
# Exits with 0 if validation succeeds, 1 otherwise.
# Requires GCP_PROJECT_ID and BQ_DATASET to be set in the environment.
validate_table() {
local table_name=$1
echo "DEBUG: ===== Starting validate_table for table: $table_name ====="
# Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are inherited)
if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the environment."
exit 1 # Exit script if env vars missing
fi
local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
local full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
local count=""
local exit_code=1
local retries=10
local delay=60 # Default seconds between retries
# Allow overriding delay via second argument (optional)
if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
delay=$2
echo "DEBUG: Using custom retry delay: ${delay}s for table ${table_name}"
else
echo "DEBUG: Using default retry delay: ${delay}s for table ${table_name}"
fi
echo "DEBUG: Full table ID: ${full_table_id}, Max retries: ${retries}"
for i in $(seq 1 $retries); do
echo "DEBUG: Starting attempt $i/$retries..."
local query_output
echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} --use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM \`${full_table_id}\`\""
query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
--use_legacy_sql=false \
--format=sparse \
--max_rows=1 \
"SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
exit_code=$?
echo "DEBUG: bq query exit code: $exit_code"
echo "DEBUG: bq query raw output: [$query_output]"
if [ $exit_code -eq 0 ]; then
echo "DEBUG: bq query exited successfully (code 0)."
count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
echo "DEBUG: Processed count after removing whitespace (from last line): [$count]"
if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
echo "DEBUG: Count [$count] is a positive integer. Validation successful for this attempt."
break # Success! Found non-zero rows
else
echo "DEBUG: Count [$count] is zero or not a positive integer."
if [[ "$count" == "0" ]]; then
echo "DEBUG: Explicit count of 0 received."
fi
fi
else
echo "DEBUG: bq query failed (exit code: $exit_code)."
echo "DEBUG: Checking table existence with bq show..."
if ! bq show --project_id=${GCP_PROJECT_ID} "${full_table_id_show}" > /dev/null 2>&1; then
echo "DEBUG: Table ${full_table_id_show} appears not to exist (bq show failed)."
else
echo "DEBUG: Table ${full_table_id_show} appears to exist (bq show succeeded), but query failed."
fi
fi
if [ $i -lt $retries ]; then
echo "DEBUG: Validation condition not met on attempt $i. Retrying in $delay seconds..."
sleep $delay
else
echo "DEBUG: Final attempt ($i) failed."
fi
done
echo "DEBUG: ===== Final validation check for table: $table_name ====="
if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
echo "SUCCESS: Table ${table_name} has ${count} rows. Final validation OK."
echo "DEBUG: validate_table returning 0 (success)."
return 0 # Indicate success
else
echo "ERROR: Failed to get a non-zero row count for table ${table_name} after $retries retries (Last exit code: $exit_code, Last processed count: '$count')."
echo "DEBUG: validate_table returning 1 (failure)."
return 1 # Indicate failure
fi
}
# Allow the script to be sourced using "source ./script.sh"
# and then call the function directly: "validate_table my_table 30"
# If the script is executed directly, check if arguments are provided and call the function.
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
if [[ $# -eq 0 ]]; then
echo "Usage: $0 <table_name> [retry_delay_seconds]"
echo "Requires GCP_PROJECT_ID and BQ_DATASET env vars."
exit 1
fi
validate_table "$@"
fi