| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| DIR=$( |
| cd $(dirname $0) |
| pwd |
| ) |
| |
| source $DIR/common.sh |
| |
| REGISTER_FILE="${DORIS_HOME}/status/fe-${MY_ID}-register" |
| MASTER_EDITLOG_PORT="" |
| |
| add_local_fe() { |
| wait_master_fe_ready |
| |
| while true; do |
| output=$(mysql -P $MASTER_FE_PORT -h $MASTER_FE_IP -u root --execute "ALTER SYSTEM ADD FOLLOWER '$MY_IP:$MY_EDITLOG_PORT';" 2>&1) |
| res=$? |
| health_log "${output}\n" |
| [ $res -eq 0 ] && break |
| (echo $output | grep "frontend already exists") && break |
| sleep 1 |
| done |
| |
| touch $REGISTER_FILE |
| } |
| |
| fe_daemon() { |
| set +e |
| while true; do |
| sleep 1 |
| output=$(mysql -P $MY_QUERY_PORT -h $MY_IP -u root --execute "SHOW FRONTENDS;") |
| code=$? |
| if [ $code -ne 0 ]; then |
| health_log "daemon get frontends exec show frontends bad: $output" |
| continue |
| fi |
| header=$(grep IsMaster <<<$output) |
| if [ $? -ne 0 ]; then |
| health_log "not found header" |
| continue |
| fi |
| host_index=-1 |
| is_master_index=-1 |
| query_port_index=-1 |
| i=1 |
| for field in $header; do |
| [[ "$field" = "Host" ]] && host_index=$i |
| [[ "$field" = "IsMaster" ]] && is_master_index=$i |
| [[ "$field" = "QueryPort" ]] && query_port_index=$i |
| ((i = i + 1)) |
| done |
| if [ $host_index -eq -1 ]; then |
| health_log "header not found Host" |
| continue |
| fi |
| if [ $is_master_index -eq -1 ]; then |
| health_log "header not found IsMaster" |
| continue |
| fi |
| if [ $query_port_index -eq -1 ]; then |
| health_log "header not found QueryPort" |
| continue |
| fi |
| echo "$output" | awk -v query_port="$query_port_index" -v is_master="$is_master_index" -v host="$host_index" '{print $query_port $is_master $host}' | grep $MY_QUERY_PORT | grep $MY_IP | grep true 2>&1 |
| if [ $? -eq 0 ]; then |
| echo ${MY_IP}:${MY_QUERY_PORT} >$MASTER_FE_QUERY_ADDR_FILE |
| if [ "$MASTER_FE_IP" != "$MY_IP" -o "${MASTER_FE_PORT}" != "${MY_QUERY_PORT}" ]; then |
| health_log "change to master, last master is ${MASTER_FE_IP}:${MASTER_FE_PORT}" |
| MASTER_FE_IP=$MY_IP |
| MASTER_FE_PORT=$MY_QUERY_PORT |
| fi |
| fi |
| done |
| } |
| |
| run_fe() { |
| export DORIS_TDE_AK=${TDE_AK} |
| export DORIS_TDE_SK=${TDE_SK} |
| health_log "run start_fe.sh" |
| |
| # Add cluster_snapshot parameter for first startup only ( |
| # when REGISTER_FILE does not exist, or ROLLBACK is set). |
| EXTRA_ARGS="" |
| if [ -n "$CLUSTER_SNAPSHOT_FILE" ]; then |
| if [ -n "${ROLLBACK}" ] || [ ! -f "$REGISTER_FILE" ]; then |
| EXTRA_ARGS="--cluster_snapshot $CLUSTER_SNAPSHOT_FILE" |
| health_log "Using cluster snapshot: $CLUSTER_SNAPSHOT_FILE" |
| fi |
| fi |
| |
| bash $DORIS_HOME/bin/start_fe.sh --daemon $EXTRA_ARGS $@ | tee -a $DORIS_HOME/log/fe.out |
| } |
| |
| register_sql_server_cluster() { |
| action=add_cluster |
| node_type=FE_MASTER |
| if [ "$MY_ID" != "1" ]; then |
| wait_master_fe_ready |
| action=add_node |
| node_type=FE_OBSERVER |
| fi |
| |
| if [ "a$IS_FE_FOLLOWER" == "a1" ]; then |
| node_type=FE_FOLLOWER |
| fi |
| |
| nodes='{ |
| "cloud_unique_id": "'"${CLOUD_UNIQUE_ID}"'", |
| "ip": "'"${MY_IP}"'", |
| "edit_log_port": "'"${MY_EDITLOG_PORT}"'", |
| "node_type": "'"${node_type}"'" |
| }' |
| |
| lock_cluster |
| |
| output=$(curl -s "${META_SERVICE_ENDPOINT}/MetaService/http/${action}?token=greedisgood9999" \ |
| -d '{"instance_id": "'"${INSTANCE_ID}"'", |
| "cluster": { |
| "type": "SQL", |
| "cluster_name": "RESERVED_CLUSTER_NAME_FOR_SQL_SERVER", |
| "cluster_id": "RESERVED_CLUSTER_ID_FOR_SQL_SERVER", |
| "nodes": ['"${nodes}"'] |
| }}') |
| |
| unlock_cluster |
| |
| health_log "add cluster. output: $output" |
| code=$(jq -r '.code' <<<$output) |
| |
| if [ "$code" != "OK" ]; then |
| health_log "add cluster failed, exit." |
| exit 1 |
| fi |
| |
| output=$(curl -s "${META_SERVICE_ENDPOINT}/MetaService/http/get_cluster?token=greedisgood9999" \ |
| -d '{"instance_id": "'"${INSTANCE_ID}"'", |
| "cloud_unique_id": "'"${CLOUD_UNIQUE_ID}"'", |
| "cluster_name": "RESERVED_CLUSTER_NAME_FOR_SQL_SERVER", |
| "cluster_id": "RESERVED_CLUSTER_ID_FOR_SQL_SERVER"}') |
| |
| health_log "get cluster is: $output" |
| code=$(jq -r '.code' <<<$output) |
| |
| if [ "$code" != "OK" ]; then |
| health_log "get cluster failed, exit." |
| exit 1 |
| fi |
| |
| touch $REGISTER_FILE |
| } |
| |
| start_cloud_fe() { |
| RECOVERY_SCRIPT="${DORIS_HOME}/conf/restore_snapshot.sh" |
| RECOVERY_ARGS="" |
| if [ -f "$RECOVERY_SCRIPT" ]; then |
| JOURNAL_ID=$(grep '^JOURNAL_ID=' "$RECOVERY_SCRIPT" | head -1 | cut -d= -f2) |
| if [ -z "$JOURNAL_ID" ]; then |
| health_log "ERROR: Could not extract JOURNAL_ID from recovery script" |
| exit 1 |
| fi |
| health_log "Found recovery script with JOURNAL_ID=$JOURNAL_ID, executing..." |
| bash "$RECOVERY_SCRIPT" |
| RECOVERY_RES=$? |
| if [ $RECOVERY_RES -ne 0 ]; then |
| health_log "ERROR: Recovery script failed with exit code $RECOVERY_RES" |
| exit $RECOVERY_RES |
| fi |
| mv "$RECOVERY_SCRIPT" "${RECOVERY_SCRIPT}.bak" |
| MV_RES=$? |
| if [ $MV_RES -ne 0 ]; then |
| health_log "ERROR: Failed to rename recovery script to ${RECOVERY_SCRIPT}.bak" |
| exit $MV_RES |
| fi |
| health_log "Recovery script executed and renamed to ${RECOVERY_SCRIPT}.bak" |
| RECOVERY_ARGS="--metadata_failure_recovery --recovery_journal_id $JOURNAL_ID" |
| fi |
| |
| if [ -f "$REGISTER_FILE" ] || [ -n "${CLUSTER_SNAPSHOT_FILE}" ] || [ -n "$RECOVERY_ARGS" ]; then |
| fe_daemon & |
| run_fe $RECOVERY_ARGS |
| |
| # Cluster snapshot is provided, need to register cluster after FE is started. |
| if [ -n "${CLUSTER_SNAPSHOT_FILE}" ]; then |
| wait_doris_instance_ready |
| if [ ! -n "${ROLLBACK}" ]; then |
| # When ROLLBACK is not set, need to register cluster. |
| register_sql_server_cluster |
| else |
| # Rollback scenario, just create the register file to skip register step. |
| touch $REGISTER_FILE |
| fi |
| rm "${CLUSTER_SNAPSHOT_FILE}" |
| fi |
| |
| return |
| fi |
| |
| # Check if SQL_MODE_NODE_MGR is set to 1 |
| if [ "${SQL_MODE_NODE_MGR}" = "1" ]; then |
| health_log "SQL_MODE_NODE_MGR is set to 1. Skipping add FE." |
| |
| touch $REGISTER_FILE |
| |
| fe_daemon & |
| run_fe |
| |
| return |
| fi |
| |
| # Support to create instance in FE startup. |
| AUTO_CREATE_INSTANCE=${AUTO_CREATE_INSTANCE:-"0"} |
| if [ "a$MY_ID" == "a1" ] && [ "a$AUTO_CREATE_INSTANCE" == "a1" ]; then |
| health_log "auto create instance is enabled, trying to create instance" |
| if [ -f $HAS_CREATE_INSTANCE_FILE ]; then |
| health_log "instance has been created before, skip create instance" |
| else |
| create_doris_instance |
| fi |
| else |
| wait_create_instance |
| fi |
| |
| register_sql_server_cluster |
| |
| fe_daemon & |
| run_fe |
| } |
| |
| stop_frontend() { |
| if [ "$STOP_GRACE" = "1" ]; then |
| bash $DORIS_HOME/bin/stop_fe.sh --grace |
| else |
| bash $DORIS_HOME/bin/stop_fe.sh |
| fi |
| |
| exit 0 |
| } |
| |
| wait_process() { |
| pid="" |
| for ((i = 0; i < 5; i++)); do |
| sleep 1s |
| pid=$(ps -elf | grep java | grep org.apache.doris.DorisFE | grep -v grep | awk '{print $4}') |
| if [ -n "$pid" ]; then |
| break |
| fi |
| done |
| |
| wait_pid $pid |
| } |
| |
| fetch_master_fe_editlog_port() { |
| set +e |
| while true; do |
| output=$(mysql -P $MY_QUERY_PORT -h $MASTER_FE_IP -u root -N -s --execute "select EditLogPort from frontends() where IsMaster = 'true';") |
| code=$? |
| if [ $code -ne 0 ]; then |
| health_log "get editlog port exec show frontends bad: $output" |
| sleep 1 |
| continue |
| fi |
| if [ -n "$output" ]; then |
| MASTER_EDITLOG_PORT=${output} |
| break |
| fi |
| done |
| } |
| |
| start_local_fe() { |
| if [ "$MY_ID" = "1" -a ! -f $REGISTER_FILE ]; then |
| touch $REGISTER_FILE |
| fi |
| |
| if [ -f $REGISTER_FILE ]; then |
| fe_daemon & |
| run_fe |
| else |
| add_local_fe |
| fe_daemon & |
| fetch_master_fe_editlog_port |
| run_fe --helper $MASTER_FE_IP:$MASTER_EDITLOG_PORT |
| fi |
| } |
| |
| main() { |
| trap stop_frontend SIGTERM |
| |
| if [ "$IS_CLOUD" == "1" ]; then |
| start_cloud_fe |
| else |
| start_local_fe |
| fi |
| |
| wait_process |
| } |
| |
| main |