| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| package com.cloud.kubernetes.cluster.actionworkers; |
| |
| import java.io.IOException; |
| import java.net.InetAddress; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.net.UnknownHostException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.stream.Collectors; |
| |
| import org.apache.cloudstack.api.BaseCmd; |
| import org.apache.cloudstack.api.InternalIdentity; |
| import org.apache.cloudstack.framework.ca.Certificate; |
| import org.apache.cloudstack.utils.security.CertUtils; |
| import org.apache.commons.codec.binary.Base64; |
| import org.apache.commons.collections.CollectionUtils; |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.log4j.Level; |
| |
| import com.cloud.dc.DataCenter; |
| import com.cloud.dc.Vlan; |
| import com.cloud.dc.VlanVO; |
| import com.cloud.deploy.DeployDestination; |
| import com.cloud.exception.ConcurrentOperationException; |
| import com.cloud.exception.InsufficientAddressCapacityException; |
| import com.cloud.exception.InsufficientCapacityException; |
| import com.cloud.exception.ManagementServerException; |
| import com.cloud.exception.ResourceAllocationException; |
| import com.cloud.exception.ResourceUnavailableException; |
| import com.cloud.hypervisor.Hypervisor; |
| import com.cloud.kubernetes.cluster.KubernetesCluster; |
| import com.cloud.kubernetes.cluster.KubernetesClusterDetailsVO; |
| import com.cloud.kubernetes.cluster.KubernetesClusterManagerImpl; |
| import com.cloud.kubernetes.cluster.KubernetesClusterService; |
| import com.cloud.kubernetes.cluster.KubernetesClusterVO; |
| import com.cloud.kubernetes.cluster.KubernetesClusterVmMapVO; |
| import com.cloud.kubernetes.cluster.utils.KubernetesClusterUtil; |
| import com.cloud.kubernetes.version.KubernetesSupportedVersion; |
| import com.cloud.kubernetes.version.KubernetesVersionManagerImpl; |
| import com.cloud.network.IpAddress; |
| import com.cloud.network.Network; |
| import com.cloud.network.addr.PublicIp; |
| import com.cloud.offering.ServiceOffering; |
| import com.cloud.storage.LaunchPermissionVO; |
| import com.cloud.user.Account; |
| import com.cloud.user.SSHKeyPairVO; |
| import com.cloud.uservm.UserVm; |
| import com.cloud.utils.Pair; |
| import com.cloud.utils.exception.CloudRuntimeException; |
| import com.cloud.utils.net.Ip; |
| import com.cloud.vm.ReservationContext; |
| import com.cloud.vm.ReservationContextImpl; |
| import com.cloud.vm.UserVmManager; |
| import com.cloud.vm.VirtualMachine; |
| import com.cloud.vm.VmDetailConstants; |
| |
| public class KubernetesClusterStartWorker extends KubernetesClusterResourceModifierActionWorker { |
| |
| private KubernetesSupportedVersion kubernetesClusterVersion; |
| |
| public KubernetesClusterStartWorker(final KubernetesCluster kubernetesCluster, final KubernetesClusterManagerImpl clusterManager) { |
| super(kubernetesCluster, clusterManager); |
| } |
| |
| public KubernetesSupportedVersion getKubernetesClusterVersion() { |
| if (kubernetesClusterVersion == null) { |
| kubernetesClusterVersion = kubernetesSupportedVersionDao.findById(kubernetesCluster.getKubernetesVersionId()); |
| } |
| return kubernetesClusterVersion; |
| } |
| |
| private Pair<String, Map<Long, Network.IpAddresses>> getKubernetesControlNodeIpAddresses(final DataCenter zone, final Network network, final Account account) throws InsufficientAddressCapacityException { |
| String controlNodeIp = null; |
| Map<Long, Network.IpAddresses> requestedIps = null; |
| if (Network.GuestType.Shared.equals(network.getGuestType())) { |
| List<Long> vlanIds = new ArrayList<>(); |
| List<VlanVO> vlans = vlanDao.listVlansByNetworkId(network.getId()); |
| for (VlanVO vlan : vlans) { |
| vlanIds.add(vlan.getId()); |
| } |
| PublicIp ip = ipAddressManager.getAvailablePublicIpAddressFromVlans(zone.getId(), null, account, Vlan.VlanType.DirectAttached, vlanIds,network.getId(), null, false); |
| if (ip != null) { |
| controlNodeIp = ip.getAddress().toString(); |
| } |
| requestedIps = new HashMap<>(); |
| Ip ipAddress = ip.getAddress(); |
| boolean isIp6 = ipAddress.isIp6(); |
| requestedIps.put(network.getId(), new Network.IpAddresses(ipAddress.isIp4() ? ip.getAddress().addr() : null, null)); |
| } else { |
| controlNodeIp = ipAddressManager.acquireGuestIpAddress(networkDao.findById(kubernetesCluster.getNetworkId()), null); |
| } |
| return new Pair<>(controlNodeIp, requestedIps); |
| } |
| |
| private boolean isKubernetesVersionSupportsHA() { |
| boolean haSupported = false; |
| KubernetesSupportedVersion version = getKubernetesClusterVersion(); |
| if (version != null) { |
| try { |
| if (KubernetesVersionManagerImpl.compareSemanticVersions(version.getSemanticVersion(), KubernetesClusterService.MIN_KUBERNETES_VERSION_HA_SUPPORT) >= 0) { |
| haSupported = true; |
| } |
| } catch (IllegalArgumentException e) { |
| LOGGER.error(String.format("Unable to compare Kubernetes version for cluster version : %s with %s", version.getName(), KubernetesClusterService.MIN_KUBERNETES_VERSION_HA_SUPPORT), e); |
| } |
| } |
| return haSupported; |
| } |
| |
| private String getKubernetesControlNodeConfig(final String controlNodeIp, final String serverIp, |
| final String hostName, final boolean haSupported, |
| final boolean ejectIso) throws IOException { |
| String k8sControlNodeConfig = readResourceFile("/conf/k8s-control-node.yml"); |
| final String apiServerCert = "{{ k8s_control_node.apiserver.crt }}"; |
| final String apiServerKey = "{{ k8s_control_node.apiserver.key }}"; |
| final String caCert = "{{ k8s_control_node.ca.crt }}"; |
| final String sshPubKey = "{{ k8s.ssh.pub.key }}"; |
| final String clusterToken = "{{ k8s_control_node.cluster.token }}"; |
| final String clusterInitArgsKey = "{{ k8s_control_node.cluster.initargs }}"; |
| final String ejectIsoKey = "{{ k8s.eject.iso }}"; |
| final List<String> addresses = new ArrayList<>(); |
| addresses.add(controlNodeIp); |
| if (!serverIp.equals(controlNodeIp)) { |
| addresses.add(serverIp); |
| } |
| final Certificate certificate = caManager.issueCertificate(null, Arrays.asList(hostName, "kubernetes", |
| "kubernetes.default", "kubernetes.default.svc", "kubernetes.default.svc.cluster", "kubernetes.default.svc.cluster.local"), |
| addresses, 3650, null); |
| final String tlsClientCert = CertUtils.x509CertificateToPem(certificate.getClientCertificate()); |
| final String tlsPrivateKey = CertUtils.privateKeyToPem(certificate.getPrivateKey()); |
| final String tlsCaCert = CertUtils.x509CertificatesToPem(certificate.getCaCertificates()); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerCert, tlsClientCert.replace("\n", "\n ")); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerKey, tlsPrivateKey.replace("\n", "\n ")); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(caCert, tlsCaCert.replace("\n", "\n ")); |
| String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\""; |
| String sshKeyPair = kubernetesCluster.getKeyPair(); |
| if (StringUtils.isNotEmpty(sshKeyPair)) { |
| SSHKeyPairVO sshkp = sshKeyPairDao.findByName(owner.getAccountId(), owner.getDomainId(), sshKeyPair); |
| if (sshkp != null) { |
| pubKey += "\n - \"" + sshkp.getPublicKey() + "\""; |
| } |
| } |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterToken, KubernetesClusterUtil.generateClusterToken(kubernetesCluster)); |
| String initArgs = ""; |
| if (haSupported) { |
| initArgs = String.format("--control-plane-endpoint %s:%d --upload-certs --certificate-key %s ", |
| serverIp, |
| CLUSTER_API_PORT, |
| KubernetesClusterUtil.generateClusterHACertificateKey(kubernetesCluster)); |
| } |
| initArgs += String.format("--apiserver-cert-extra-sans=%s", serverIp); |
| initArgs += String.format(" --kubernetes-version=%s", getKubernetesClusterVersion().getSemanticVersion()); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterInitArgsKey, initArgs); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(ejectIsoKey, String.valueOf(ejectIso)); |
| k8sControlNodeConfig = updateKubeConfigWithRegistryDetails(k8sControlNodeConfig); |
| |
| return k8sControlNodeConfig; |
| } |
| |
| private UserVm createKubernetesControlNode(final Network network, String serverIp) throws ManagementServerException, |
| ResourceUnavailableException, InsufficientCapacityException { |
| UserVm controlVm = null; |
| DataCenter zone = dataCenterDao.findById(kubernetesCluster.getZoneId()); |
| ServiceOffering serviceOffering = serviceOfferingDao.findById(kubernetesCluster.getServiceOfferingId()); |
| List<Long> networkIds = new ArrayList<Long>(); |
| networkIds.add(kubernetesCluster.getNetworkId()); |
| Pair<String, Map<Long, Network.IpAddresses>> ipAddresses = getKubernetesControlNodeIpAddresses(zone, network, owner); |
| String controlNodeIp = ipAddresses.first(); |
| Map<Long, Network.IpAddresses> requestedIps = ipAddresses.second(); |
| if (Network.GuestType.Shared.equals(network.getGuestType()) && StringUtils.isEmpty(serverIp)) { |
| serverIp = controlNodeIp; |
| } |
| Network.IpAddresses addrs = new Network.IpAddresses(controlNodeIp, null); |
| long rootDiskSize = kubernetesCluster.getNodeRootDiskSize(); |
| Map<String, String> customParameterMap = new HashMap<String, String>(); |
| if (rootDiskSize > 0) { |
| customParameterMap.put("rootdisksize", String.valueOf(rootDiskSize)); |
| } |
| if (Hypervisor.HypervisorType.VMware.equals(clusterTemplate.getHypervisorType())) { |
| customParameterMap.put(VmDetailConstants.ROOT_DISK_CONTROLLER, "scsi"); |
| } |
| String suffix = Long.toHexString(System.currentTimeMillis()); |
| String hostName = String.format("%s-control-%s", kubernetesClusterNodeNamePrefix, suffix); |
| boolean haSupported = isKubernetesVersionSupportsHA(); |
| String k8sControlNodeConfig = null; |
| try { |
| k8sControlNodeConfig = getKubernetesControlNodeConfig(controlNodeIp, serverIp, hostName, haSupported, Hypervisor.HypervisorType.VMware.equals(clusterTemplate.getHypervisorType())); |
| } catch (IOException e) { |
| logAndThrow(Level.ERROR, "Failed to read Kubernetes control node configuration file", e); |
| } |
| String base64UserData = Base64.encodeBase64String(k8sControlNodeConfig.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); |
| List<String> keypairs = new ArrayList<String>(); |
| if (StringUtils.isNotBlank(kubernetesCluster.getKeyPair())) { |
| keypairs.add(kubernetesCluster.getKeyPair()); |
| } |
| if (zone.isSecurityGroupEnabled()) { |
| List<Long> securityGroupIds = new ArrayList<>(); |
| securityGroupIds.add(kubernetesCluster.getSecurityGroupId()); |
| controlVm = userVmService.createAdvancedSecurityGroupVirtualMachine(zone, serviceOffering, clusterTemplate, networkIds, securityGroupIds, owner, |
| hostName, hostName, null, null, null, Hypervisor.HypervisorType.None, BaseCmd.HTTPMethod.POST,base64UserData, null, null, keypairs, |
| requestedIps, addrs, null, null, null, customParameterMap, null, null, null, |
| null, true, null, UserVmManager.CKS_NODE); |
| } else { |
| controlVm = userVmService.createAdvancedVirtualMachine(zone, serviceOffering, clusterTemplate, networkIds, owner, |
| hostName, hostName, null, null, null, |
| Hypervisor.HypervisorType.None, BaseCmd.HTTPMethod.POST, base64UserData, null, null, keypairs, |
| requestedIps, addrs, null, null, null, customParameterMap, null, null, null, null, true, UserVmManager.CKS_NODE, null); |
| } |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Created control VM ID: %s, %s in the Kubernetes cluster : %s", controlVm.getUuid(), hostName, kubernetesCluster.getName())); |
| } |
| return controlVm; |
| } |
| |
| private String getKubernetesAdditionalControlNodeConfig(final String joinIp, final boolean ejectIso) throws IOException { |
| String k8sControlNodeConfig = readResourceFile("/conf/k8s-control-node-add.yml"); |
| final String joinIpKey = "{{ k8s_control_node.join_ip }}"; |
| final String clusterTokenKey = "{{ k8s_control_node.cluster.token }}"; |
| final String sshPubKey = "{{ k8s.ssh.pub.key }}"; |
| final String clusterHACertificateKey = "{{ k8s_control_node.cluster.ha.certificate.key }}"; |
| final String ejectIsoKey = "{{ k8s.eject.iso }}"; |
| String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\""; |
| String sshKeyPair = kubernetesCluster.getKeyPair(); |
| if (StringUtils.isNotEmpty(sshKeyPair)) { |
| SSHKeyPairVO sshkp = sshKeyPairDao.findByName(owner.getAccountId(), owner.getDomainId(), sshKeyPair); |
| if (sshkp != null) { |
| pubKey += "\n - \"" + sshkp.getPublicKey() + "\""; |
| } |
| } |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(joinIpKey, joinIp); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster)); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterHACertificateKey, KubernetesClusterUtil.generateClusterHACertificateKey(kubernetesCluster)); |
| k8sControlNodeConfig = k8sControlNodeConfig.replace(ejectIsoKey, String.valueOf(ejectIso)); |
| k8sControlNodeConfig = updateKubeConfigWithRegistryDetails(k8sControlNodeConfig); |
| |
| return k8sControlNodeConfig; |
| } |
| |
| private UserVm createKubernetesAdditionalControlNode(final String joinIp, final int additionalControlNodeInstance) throws ManagementServerException, |
| ResourceUnavailableException, InsufficientCapacityException { |
| UserVm additionalControlVm = null; |
| DataCenter zone = dataCenterDao.findById(kubernetesCluster.getZoneId()); |
| ServiceOffering serviceOffering = serviceOfferingDao.findById(kubernetesCluster.getServiceOfferingId()); |
| List<Long> networkIds = new ArrayList<Long>(); |
| networkIds.add(kubernetesCluster.getNetworkId()); |
| Network.IpAddresses addrs = new Network.IpAddresses(null, null); |
| long rootDiskSize = kubernetesCluster.getNodeRootDiskSize(); |
| Map<String, String> customParameterMap = new HashMap<String, String>(); |
| if (rootDiskSize > 0) { |
| customParameterMap.put("rootdisksize", String.valueOf(rootDiskSize)); |
| } |
| if (Hypervisor.HypervisorType.VMware.equals(clusterTemplate.getHypervisorType())) { |
| customParameterMap.put(VmDetailConstants.ROOT_DISK_CONTROLLER, "scsi"); |
| } |
| String suffix = Long.toHexString(System.currentTimeMillis()); |
| String hostName = String.format("%s-control-%s", kubernetesClusterNodeNamePrefix, suffix); |
| String k8sControlNodeConfig = null; |
| try { |
| k8sControlNodeConfig = getKubernetesAdditionalControlNodeConfig(joinIp, Hypervisor.HypervisorType.VMware.equals(clusterTemplate.getHypervisorType())); |
| } catch (IOException e) { |
| logAndThrow(Level.ERROR, "Failed to read Kubernetes control configuration file", e); |
| } |
| |
| String base64UserData = Base64.encodeBase64String(k8sControlNodeConfig.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); |
| List<String> keypairs = new ArrayList<String>(); |
| if (StringUtils.isNotBlank(kubernetesCluster.getKeyPair())) { |
| keypairs.add(kubernetesCluster.getKeyPair()); |
| } |
| if (zone.isSecurityGroupEnabled()) { |
| List<Long> securityGroupIds = new ArrayList<>(); |
| securityGroupIds.add(kubernetesCluster.getSecurityGroupId()); |
| additionalControlVm = userVmService.createAdvancedSecurityGroupVirtualMachine(zone, serviceOffering, clusterTemplate, networkIds, securityGroupIds, owner, |
| hostName, hostName, null, null, null, Hypervisor.HypervisorType.None, BaseCmd.HTTPMethod.POST,base64UserData, null, null, keypairs, |
| null, addrs, null, null, null, customParameterMap, null, null, null, |
| null, true, null, UserVmManager.CKS_NODE); |
| } else { |
| additionalControlVm = userVmService.createAdvancedVirtualMachine(zone, serviceOffering, clusterTemplate, networkIds, owner, |
| hostName, hostName, null, null, null, |
| Hypervisor.HypervisorType.None, BaseCmd.HTTPMethod.POST, base64UserData, null, null, keypairs, |
| null, addrs, null, null, null, customParameterMap, null, null, null, null, true, UserVmManager.CKS_NODE, null); |
| } |
| |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Created control VM ID : %s, %s in the Kubernetes cluster : %s", additionalControlVm.getUuid(), hostName, kubernetesCluster.getName())); |
| } |
| return additionalControlVm; |
| } |
| |
| private UserVm provisionKubernetesClusterControlVm(final Network network, final String publicIpAddress) throws |
| ManagementServerException, InsufficientCapacityException, ResourceUnavailableException { |
| UserVm k8sControlVM = null; |
| k8sControlVM = createKubernetesControlNode(network, publicIpAddress); |
| addKubernetesClusterVm(kubernetesCluster.getId(), k8sControlVM.getId(), true); |
| if (kubernetesCluster.getNodeRootDiskSize() > 0) { |
| resizeNodeVolume(k8sControlVM); |
| } |
| startKubernetesVM(k8sControlVM); |
| k8sControlVM = userVmDao.findById(k8sControlVM.getId()); |
| if (k8sControlVM == null) { |
| throw new ManagementServerException(String.format("Failed to provision control VM for Kubernetes cluster : %s" , kubernetesCluster.getName())); |
| } |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Provisioned the control VM : %s in to the Kubernetes cluster : %s", k8sControlVM.getDisplayName(), kubernetesCluster.getName())); |
| } |
| return k8sControlVM; |
| } |
| |
| private List<UserVm> provisionKubernetesClusterAdditionalControlVms(final String publicIpAddress) throws |
| InsufficientCapacityException, ManagementServerException, ResourceUnavailableException { |
| List<UserVm> additionalControlVms = new ArrayList<>(); |
| if (kubernetesCluster.getControlNodeCount() > 1) { |
| for (int i = 1; i < kubernetesCluster.getControlNodeCount(); i++) { |
| UserVm vm = null; |
| vm = createKubernetesAdditionalControlNode(publicIpAddress, i); |
| addKubernetesClusterVm(kubernetesCluster.getId(), vm.getId(), true); |
| if (kubernetesCluster.getNodeRootDiskSize() > 0) { |
| resizeNodeVolume(vm); |
| } |
| startKubernetesVM(vm); |
| vm = userVmDao.findById(vm.getId()); |
| if (vm == null) { |
| throw new ManagementServerException(String.format("Failed to provision additional control VM for Kubernetes cluster : %s" , kubernetesCluster.getName())); |
| } |
| additionalControlVms.add(vm); |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Provisioned additional control VM : %s in to the Kubernetes cluster : %s", vm.getDisplayName(), kubernetesCluster.getName())); |
| } |
| } |
| } |
| return additionalControlVms; |
| } |
| |
| private Network startKubernetesClusterNetwork(final DeployDestination destination) throws ManagementServerException { |
| final ReservationContext context = new ReservationContextImpl(null, null, null, owner); |
| Network network = networkDao.findById(kubernetesCluster.getNetworkId()); |
| if (network == null) { |
| String msg = String.format("Network for Kubernetes cluster : %s not found", kubernetesCluster.getName()); |
| LOGGER.warn(msg); |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed); |
| throw new ManagementServerException(msg); |
| } |
| try { |
| networkMgr.startNetwork(network.getId(), destination, context); |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Network : %s is started for the Kubernetes cluster : %s", network.getName(), kubernetesCluster.getName())); |
| } |
| } catch (ConcurrentOperationException | ResourceUnavailableException |InsufficientCapacityException e) { |
| String msg = String.format("Failed to start Kubernetes cluster : %s as unable to start associated network : %s" , kubernetesCluster.getName(), network.getName()); |
| LOGGER.error(msg, e); |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed); |
| throw new ManagementServerException(msg, e); |
| } |
| return network; |
| } |
| |
| protected void setupKubernetesClusterNetworkRules(Network network, List<UserVm> clusterVMs) throws ManagementServerException { |
| if (!Network.GuestType.Isolated.equals(network.getGuestType())) { |
| if (LOGGER.isDebugEnabled()) { |
| LOGGER.debug(String.format("Network : %s for Kubernetes cluster : %s is not an isolated network, therefore, no need for network rules", network.getName(), kubernetesCluster.getName())); |
| } |
| return; |
| } |
| List<Long> clusterVMIds = clusterVMs.stream().map(UserVm::getId).collect(Collectors.toList()); |
| if (network.getVpcId() != null) { |
| IpAddress publicIp = getVpcTierKubernetesPublicIp(network); |
| if (publicIp == null) { |
| throw new ManagementServerException(String.format("No public IP addresses found for VPC tier : %s, Kubernetes cluster : %s", network.getName(), kubernetesCluster.getName())); |
| } |
| setupKubernetesClusterVpcTierRules(publicIp, network, clusterVMIds); |
| return; |
| } |
| IpAddress publicIp = getNetworkSourceNatIp(network); |
| if (publicIp == null) { |
| throw new ManagementServerException(String.format("No source NAT IP addresses found for network : %s, Kubernetes cluster : %s", |
| network.getName(), kubernetesCluster.getName())); |
| } |
| setupKubernetesClusterIsolatedNetworkRules(publicIp, network, clusterVMIds, true); |
| } |
| |
| private void startKubernetesClusterVMs() { |
| List <UserVm> clusterVms = getKubernetesClusterVMs(); |
| for (final UserVm vm : clusterVms) { |
| if (vm == null) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start all VMs in Kubernetes cluster : %s", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| try { |
| resizeNodeVolume(vm); |
| startKubernetesVM(vm); |
| } catch (ManagementServerException ex) { |
| LOGGER.warn(String.format("Failed to start VM : %s in Kubernetes cluster : %s due to ", vm.getDisplayName(), kubernetesCluster.getName()) + ex); |
| // don't bail out here. proceed further to stop the reset of the VM's |
| } |
| } |
| for (final UserVm userVm : clusterVms) { |
| UserVm vm = userVmDao.findById(userVm.getId()); |
| if (vm == null || !vm.getState().equals(VirtualMachine.State.Running)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start all VMs in Kubernetes cluster : %s", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| } |
| } |
| |
| private boolean isKubernetesClusterKubeConfigAvailable(final long timeoutTime) { |
| if (StringUtils.isEmpty(publicIpAddress)) { |
| KubernetesClusterDetailsVO kubeConfigDetail = kubernetesClusterDetailsDao.findDetail(kubernetesCluster.getId(), "kubeConfigData"); |
| if (kubeConfigDetail != null && StringUtils.isNotEmpty(kubeConfigDetail.getValue())) { |
| return true; |
| } |
| } |
| String kubeConfig = KubernetesClusterUtil.getKubernetesClusterConfig(kubernetesCluster, publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, timeoutTime); |
| if (StringUtils.isNotEmpty(kubeConfig)) { |
| final String controlVMPrivateIpAddress = getControlVmPrivateIp(); |
| if (StringUtils.isNotEmpty(controlVMPrivateIpAddress)) { |
| kubeConfig = kubeConfig.replace(String.format("server: https://%s:%d", controlVMPrivateIpAddress, CLUSTER_API_PORT), |
| String.format("server: https://%s:%d", publicIpAddress, CLUSTER_API_PORT)); |
| } |
| kubernetesClusterDetailsDao.addDetail(kubernetesCluster.getId(), "kubeConfigData", Base64.encodeBase64String(kubeConfig.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())), false); |
| return true; |
| } |
| return false; |
| } |
| |
| private boolean isKubernetesClusterDashboardServiceRunning(final boolean onCreate, final Long timeoutTime) { |
| if (!onCreate) { |
| KubernetesClusterDetailsVO dashboardServiceRunningDetail = kubernetesClusterDetailsDao.findDetail(kubernetesCluster.getId(), "dashboardServiceRunning"); |
| if (dashboardServiceRunningDetail != null && Boolean.parseBoolean(dashboardServiceRunningDetail.getValue())) { |
| return true; |
| } |
| } |
| if (KubernetesClusterUtil.isKubernetesClusterDashboardServiceRunning(kubernetesCluster, publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, timeoutTime, 15000)) { |
| kubernetesClusterDetailsDao.addDetail(kubernetesCluster.getId(), "dashboardServiceRunning", String.valueOf(true), false); |
| return true; |
| } |
| return false; |
| } |
| |
| private void updateKubernetesClusterEntryEndpoint() { |
| KubernetesClusterVO kubernetesClusterVO = kubernetesClusterDao.findById(kubernetesCluster.getId()); |
| kubernetesClusterVO.setEndpoint(String.format("https://%s:%d/", publicIpAddress, CLUSTER_API_PORT)); |
| kubernetesClusterDao.update(kubernetesCluster.getId(), kubernetesClusterVO); |
| } |
| |
| public boolean startKubernetesClusterOnCreate() { |
| init(); |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Starting Kubernetes cluster : %s", kubernetesCluster.getName())); |
| } |
| final long startTimeoutTime = System.currentTimeMillis() + KubernetesClusterService.KubernetesClusterStartTimeout.value() * 1000; |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.StartRequested); |
| DeployDestination dest = null; |
| try { |
| dest = plan(); |
| } catch (InsufficientCapacityException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Provisioning the cluster failed due to insufficient capacity in the Kubernetes cluster: %s", kubernetesCluster.getUuid()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed, e); |
| } |
| Network network = null; |
| try { |
| network = startKubernetesClusterNetwork(dest); |
| } catch (ManagementServerException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s as its network cannot be started", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed, e); |
| } |
| Pair<String, Integer> publicIpSshPort = new Pair<>(null, null); |
| try { |
| publicIpSshPort = getKubernetesClusterServerIpSshPort(null, true); |
| } catch (InsufficientAddressCapacityException | ResourceAllocationException | ResourceUnavailableException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s as failed to acquire public IP" , kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed); |
| } |
| publicIpAddress = publicIpSshPort.first(); |
| if (StringUtils.isEmpty(publicIpAddress) && |
| (Network.GuestType.Isolated.equals(network.getGuestType()) || kubernetesCluster.getControlNodeCount() > 1)) { // Shared network, single-control node cluster won't have an IP yet |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s as no public IP found for the cluster" , kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed); |
| } |
| // Allow account creating the kubernetes cluster to access systemVM template |
| LaunchPermissionVO launchPermission = new LaunchPermissionVO(clusterTemplate.getId(), owner.getId()); |
| launchPermissionDao.persist(launchPermission); |
| |
| List<UserVm> clusterVMs = new ArrayList<>(); |
| UserVm k8sControlVM = null; |
| try { |
| k8sControlVM = provisionKubernetesClusterControlVm(network, publicIpAddress); |
| } catch (CloudRuntimeException | ManagementServerException | ResourceUnavailableException | InsufficientCapacityException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Provisioning the control VM failed in the Kubernetes cluster : %s", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed, e); |
| } |
| clusterVMs.add(k8sControlVM); |
| if (StringUtils.isEmpty(publicIpAddress)) { |
| publicIpSshPort = getKubernetesClusterServerIpSshPort(k8sControlVM); |
| publicIpAddress = publicIpSshPort.first(); |
| if (StringUtils.isEmpty(publicIpAddress)) { |
| logTransitStateAndThrow(Level.WARN, String.format("Failed to start Kubernetes cluster : %s as no public IP found for the cluster", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed); |
| } |
| } |
| try { |
| List<UserVm> additionalControlVMs = provisionKubernetesClusterAdditionalControlVms(publicIpAddress); |
| clusterVMs.addAll(additionalControlVMs); |
| } catch (CloudRuntimeException | ManagementServerException | ResourceUnavailableException | InsufficientCapacityException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Provisioning additional control VM failed in the Kubernetes cluster : %s", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed, e); |
| } |
| try { |
| List<UserVm> nodeVMs = provisionKubernetesClusterNodeVms(kubernetesCluster.getNodeCount(), publicIpAddress); |
| clusterVMs.addAll(nodeVMs); |
| } catch (CloudRuntimeException | ManagementServerException | ResourceUnavailableException | InsufficientCapacityException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Provisioning node VM failed in the Kubernetes cluster : %s", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed, e); |
| } |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Kubernetes cluster : %s VMs successfully provisioned", kubernetesCluster.getName())); |
| } |
| try { |
| setupKubernetesClusterNetworkRules(network, clusterVMs); |
| } catch (ManagementServerException e) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to setup Kubernetes cluster : %s, unable to setup network rules", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed, e); |
| } |
| attachIsoKubernetesVMs(clusterVMs); |
| if (!KubernetesClusterUtil.isKubernetesClusterControlVmRunning(kubernetesCluster, publicIpAddress, publicIpSshPort.second(), startTimeoutTime)) { |
| String msg = String.format("Failed to setup Kubernetes cluster : %s in usable state as unable to access control node VMs of the cluster", kubernetesCluster.getName()); |
| if (kubernetesCluster.getControlNodeCount() > 1 && Network.GuestType.Shared.equals(network.getGuestType())) { |
| msg = String.format("%s. Make sure external load-balancer has port forwarding rules for SSH access on ports %d-%d and API access on port %d", |
| msg, |
| CLUSTER_NODES_DEFAULT_START_SSH_PORT, |
| CLUSTER_NODES_DEFAULT_START_SSH_PORT + kubernetesCluster.getTotalNodeCount() - 1, |
| CLUSTER_API_PORT); |
| } |
| logTransitStateDetachIsoAndThrow(Level.ERROR, msg, kubernetesCluster, clusterVMs, KubernetesCluster.Event.CreateFailed, null); |
| } |
| boolean k8sApiServerSetup = KubernetesClusterUtil.isKubernetesClusterServerRunning(kubernetesCluster, publicIpAddress, CLUSTER_API_PORT, startTimeoutTime, 15000); |
| if (!k8sApiServerSetup) { |
| logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to setup Kubernetes cluster : %s in usable state as unable to provision API endpoint for the cluster", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.CreateFailed, null); |
| } |
| sshPort = publicIpSshPort.second(); |
| updateKubernetesClusterEntryEndpoint(); |
| boolean readyNodesCountValid = KubernetesClusterUtil.validateKubernetesClusterReadyNodesCount(kubernetesCluster, publicIpAddress, sshPort, |
| getControlNodeLoginUser(), sshKeyFile, startTimeoutTime, 15000); |
| detachIsoKubernetesVMs(clusterVMs); |
| if (!readyNodesCountValid) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to setup Kubernetes cluster : %s as it does not have desired number of nodes in ready state", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.CreateFailed); |
| } |
| if (!isKubernetesClusterKubeConfigAvailable(startTimeoutTime)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to setup Kubernetes cluster : %s in usable state as unable to retrieve kube-config for the cluster", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| if (!isKubernetesClusterDashboardServiceRunning(true, startTimeoutTime)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to setup Kubernetes cluster : %s in usable state as unable to get Dashboard service running for the cluster", kubernetesCluster.getName()), kubernetesCluster.getId(),KubernetesCluster.Event.OperationFailed); |
| } |
| taintControlNodes(); |
| deployProvider(); |
| updateLoginUserDetails(clusterVMs.stream().map(InternalIdentity::getId).collect(Collectors.toList())); |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.OperationSucceeded); |
| return true; |
| } |
| |
| public boolean startStoppedKubernetesCluster() throws CloudRuntimeException { |
| init(); |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Starting Kubernetes cluster : %s", kubernetesCluster.getName())); |
| } |
| final long startTimeoutTime = System.currentTimeMillis() + KubernetesClusterService.KubernetesClusterStartTimeout.value() * 1000; |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.StartRequested); |
| startKubernetesClusterVMs(); |
| try { |
| InetAddress address = InetAddress.getByName(new URL(kubernetesCluster.getEndpoint()).getHost()); |
| } catch (MalformedURLException | UnknownHostException ex) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Kubernetes cluster : %s has invalid API endpoint. Can not verify if cluster is in ready state", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| Pair<String, Integer> sshIpPort = getKubernetesClusterServerIpSshPort(null); |
| publicIpAddress = sshIpPort.first(); |
| sshPort = sshIpPort.second(); |
| if (StringUtils.isEmpty(publicIpAddress)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s as no public IP found for the cluster" , kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| if (!KubernetesClusterUtil.isKubernetesClusterServerRunning(kubernetesCluster, publicIpAddress, CLUSTER_API_PORT, startTimeoutTime, 15000)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s in usable state", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| if (!isKubernetesClusterKubeConfigAvailable(startTimeoutTime)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s in usable state as unable to retrieve kube-config for the cluster", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| if (!isKubernetesClusterDashboardServiceRunning(false, startTimeoutTime)) { |
| logTransitStateAndThrow(Level.ERROR, String.format("Failed to start Kubernetes cluster : %s in usable state as unable to get Dashboard service running for the cluster", kubernetesCluster.getName()), kubernetesCluster.getId(), KubernetesCluster.Event.OperationFailed); |
| } |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.OperationSucceeded); |
| if (LOGGER.isInfoEnabled()) { |
| LOGGER.info(String.format("Kubernetes cluster : %s successfully started", kubernetesCluster.getName())); |
| } |
| return true; |
| } |
| |
| public boolean reconcileAlertCluster() { |
| init(); |
| final long startTimeoutTime = System.currentTimeMillis() + 3 * 60 * 1000; |
| List<KubernetesClusterVmMapVO> vmMapVOList = getKubernetesClusterVMMaps(); |
| if (CollectionUtils.isEmpty(vmMapVOList) || vmMapVOList.size() != kubernetesCluster.getTotalNodeCount()) { |
| return false; |
| } |
| Pair<String, Integer> sshIpPort = getKubernetesClusterServerIpSshPort(null); |
| publicIpAddress = sshIpPort.first(); |
| sshPort = sshIpPort.second(); |
| if (StringUtils.isEmpty(publicIpAddress)) { |
| return false; |
| } |
| long actualNodeCount = 0; |
| try { |
| actualNodeCount = KubernetesClusterUtil.getKubernetesClusterReadyNodesCount(kubernetesCluster, publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile); |
| } catch (Exception e) { |
| return false; |
| } |
| if (kubernetesCluster.getTotalNodeCount() != actualNodeCount) { |
| return false; |
| } |
| if (StringUtils.isEmpty(sshIpPort.first())) { |
| return false; |
| } |
| if (!KubernetesClusterUtil.isKubernetesClusterServerRunning(kubernetesCluster, sshIpPort.first(), |
| KubernetesClusterActionWorker.CLUSTER_API_PORT, startTimeoutTime, 0)) { |
| return false; |
| } |
| updateKubernetesClusterEntryEndpoint(); |
| if (!isKubernetesClusterKubeConfigAvailable(startTimeoutTime)) { |
| return false; |
| } |
| if (!isKubernetesClusterDashboardServiceRunning(false, startTimeoutTime)) { |
| return false; |
| } |
| // mark the cluster to be running |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.RecoveryRequested); |
| stateTransitTo(kubernetesCluster.getId(), KubernetesCluster.Event.OperationSucceeded); |
| return true; |
| } |
| } |