blob: 4fefa54a6d9807e0806f4c30ee4c777c51dbc6ed [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.kubernetes.cluster.actionworkers;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Level;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.kubernetes.cluster.KubernetesCluster;
import com.cloud.kubernetes.cluster.KubernetesClusterManagerImpl;
import com.cloud.kubernetes.cluster.KubernetesClusterService;
import com.cloud.kubernetes.cluster.KubernetesClusterVO;
import com.cloud.kubernetes.cluster.utils.KubernetesClusterUtil;
import com.cloud.kubernetes.version.KubernetesSupportedVersion;
import com.cloud.kubernetes.version.KubernetesVersionManagerImpl;
import com.cloud.uservm.UserVm;
import com.cloud.utils.Pair;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.ssh.SshHelper;
public class KubernetesClusterUpgradeWorker extends KubernetesClusterActionWorker {
private List<UserVm> clusterVMs = new ArrayList<>();
private KubernetesSupportedVersion upgradeVersion;
private final String upgradeScriptFilename = "upgrade-kubernetes.sh";
private File upgradeScriptFile;
private long upgradeTimeoutTime;
public KubernetesClusterUpgradeWorker(final KubernetesCluster kubernetesCluster,
final KubernetesSupportedVersion upgradeVersion,
final KubernetesClusterManagerImpl clusterManager,
final String[] keys) {
super(kubernetesCluster, clusterManager);
this.upgradeVersion = upgradeVersion;
this.keys = keys;
}
protected void retrieveScriptFiles() {
super.retrieveScriptFiles();
upgradeScriptFile = retrieveScriptFile(upgradeScriptFilename);
}
private Pair<Boolean, String> runInstallScriptOnVM(final UserVm vm, final int index) throws Exception {
int nodeSshPort = sshPort == 22 ? sshPort : sshPort + index;
String nodeAddress = (index > 0 && sshPort == 22) ? vm.getPrivateIpAddress() : publicIpAddress;
SshHelper.scpTo(nodeAddress, nodeSshPort, getControlNodeLoginUser(), sshKeyFile, null,
"~/", upgradeScriptFile.getAbsolutePath(), "0755");
String cmdStr = String.format("sudo ./%s %s %s %s %s",
upgradeScriptFile.getName(),
upgradeVersion.getSemanticVersion(),
index == 0 ? "true" : "false",
KubernetesVersionManagerImpl.compareSemanticVersions(upgradeVersion.getSemanticVersion(), "1.15.0") < 0 ? "true" : "false",
Hypervisor.HypervisorType.VMware.equals(vm.getHypervisorType()));
return SshHelper.sshExecute(nodeAddress, nodeSshPort, getControlNodeLoginUser(), sshKeyFile, null,
cmdStr,
10000, 10000, 10 * 60 * 1000);
}
private void upgradeKubernetesClusterNodes() {
for (int i = 0; i < clusterVMs.size(); ++i) {
UserVm vm = clusterVMs.get(i);
String hostName = vm.getHostName();
if (StringUtils.isNotEmpty(hostName)) {
hostName = hostName.toLowerCase();
}
Pair<Boolean, String> result;
if (logger.isInfoEnabled()) {
logger.info(String.format("Upgrading node on VM %s in Kubernetes cluster %s with Kubernetes version(%s) ID: %s",
vm.getDisplayName(), kubernetesCluster.getName(), upgradeVersion.getSemanticVersion(), upgradeVersion.getUuid()));
}
String errorMessage = String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName());
for (int retry = KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; retry--) {
try {
result = SshHelper.sshExecute(publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, null,
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-emptydir-data", hostName),
10000, 10000, 60000);
if (result.first()) {
break;
}
if (retry > 0) {
logger.error(String.format("%s, retries left: %s", errorMessage, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
} catch (Exception e) {
if (retry > 0) {
logger.error(String.format("%s due to %s, retries left: %s", errorMessage, e, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
}
}
}
if (System.currentTimeMillis() > upgradeTimeoutTime) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
errorMessage = String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName());
for (int retry = KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; retry--) {
try {
deployProvider();
result = runInstallScriptOnVM(vm, i);
if (result.first()) {
break;
}
if (retry > 0) {
logger.error(String.format("%s, retries left: %s", errorMessage, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
} catch (Exception e) {
if (retry > 0) {
logger.error(String.format("%s due to %s, retries left: %s", errorMessage, e, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
}
}
}
if (System.currentTimeMillis() > upgradeTimeoutTime) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
if (!KubernetesClusterUtil.uncordonKubernetesClusterNode(kubernetesCluster, publicIpAddress, sshPort, getControlNodeLoginUser(), getManagementServerSshPublicKeyFile(), vm, upgradeTimeoutTime, 15000)) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to uncordon Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
if (i == 0) { // Wait for control node to get in Ready state
if (!KubernetesClusterUtil.isKubernetesClusterNodeReady(kubernetesCluster, publicIpAddress, sshPort, getControlNodeLoginUser(), getManagementServerSshPublicKeyFile(), hostName, upgradeTimeoutTime, 15000)) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to get control Kubernetes node on VM : %s in ready state", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
}
if (!KubernetesClusterUtil.clusterNodeVersionMatches(upgradeVersion.getSemanticVersion(), publicIpAddress, sshPort, getControlNodeLoginUser(), getManagementServerSshPublicKeyFile(), hostName, upgradeTimeoutTime, 15000)) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to get Kubernetes node on VM : %s upgraded to version %s", kubernetesCluster.getName(), vm.getDisplayName(), upgradeVersion.getSemanticVersion()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
if (logger.isInfoEnabled()) {
logger.info(String.format("Successfully upgraded node on VM %s in Kubernetes cluster %s with Kubernetes version(%s) ID: %s",
vm.getDisplayName(), kubernetesCluster.getName(), upgradeVersion.getSemanticVersion(), upgradeVersion.getUuid()));
}
}
}
public boolean upgradeCluster() throws CloudRuntimeException {
init();
if (logger.isInfoEnabled()) {
logger.info(String.format("Upgrading Kubernetes cluster : %s", kubernetesCluster.getName()));
}
upgradeTimeoutTime = System.currentTimeMillis() + KubernetesClusterService.KubernetesClusterUpgradeTimeout.value() * 1000;
Pair<String, Integer> publicIpSshPort = getKubernetesClusterServerIpSshPort(null);
publicIpAddress = publicIpSshPort.first();
sshPort = publicIpSshPort.second();
if (StringUtils.isEmpty(publicIpAddress)) {
logAndThrow(Level.ERROR, String.format("Upgrade failed for Kubernetes cluster : %s, unable to retrieve associated public IP", kubernetesCluster.getName()));
}
clusterVMs = getKubernetesClusterVMs();
if (CollectionUtils.isEmpty(clusterVMs)) {
logAndThrow(Level.ERROR, String.format("Upgrade failed for Kubernetes cluster : %s, unable to retrieve VMs for cluster", kubernetesCluster.getName()));
}
retrieveScriptFiles();
stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.UpgradeRequested);
attachIsoKubernetesVMs(clusterVMs, upgradeVersion);
upgradeKubernetesClusterNodes();
detachIsoKubernetesVMs(clusterVMs);
KubernetesClusterVO kubernetesClusterVO = kubernetesClusterDao.findById(kubernetesCluster.getId());
kubernetesClusterVO.setKubernetesVersionId(upgradeVersion.getId());
boolean updated = kubernetesClusterDao.update(kubernetesCluster.getId(), kubernetesClusterVO);
if (!updated) {
stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed);
} else {
stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.OperationSucceeded);
}
return updated;
}
}