blob: 1dd999dad9708d05bd1043fe4f14dc9e0eb2a900 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.ha;
import static org.apache.cloudstack.framework.config.ConfigKey.Scope.Cluster;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.utils.component.Manager;
import com.cloud.vm.VMInstanceVO;
import org.apache.cloudstack.framework.config.ConfigKey;
import java.util.List;
/**
* HighAvailabilityManager checks to make sure the VMs are running fine.
*/
public interface HighAvailabilityManager extends Manager {
public ConfigKey<Boolean> ForceHA = new ConfigKey<>("Advanced", Boolean.class, "force.ha", "false",
"Force High-Availability to happen even if the VM says no.", true, Cluster);
ConfigKey<Integer> HAWorkers = new ConfigKey<>("Advanced", Integer.class, "ha.workers", "5",
"The number of High-Availability worker threads.", true, Cluster);
ConfigKey<Integer> InvestigateRetryInterval = new ConfigKey<>("Advanced", Integer.class, "investigate.retry.interval",
"60", "The time (in seconds) between VM pings when the agent is disconnected.", true, Cluster);
ConfigKey<Integer> MigrateRetryInterval = new ConfigKey<>("Advanced", Integer.class, "migrate.retry.interval",
"120", "The time (in seconds) between migration retries.", true, Cluster);
ConfigKey<Integer> RestartRetryInterval = new ConfigKey<>("Advanced", Integer.class, "restart.retry.interval",
"600", "The time (in seconds) between retries to restart a VM.", true, Cluster);
ConfigKey<Integer> StopRetryInterval = new ConfigKey<>("Advanced", Integer.class, "stop.retry.interval",
"600", "The time in seconds between retries to stop or destroy a VM.", true, Cluster);
ConfigKey<Long> TimeBetweenCleanup = new ConfigKey<>("Advanced", Long.class,
"time.between.cleanup", "86400", "The time in seconds to wait before the"
+ " cleanup thread runs for the different HA-Worker-Threads. The cleanup thread finds all the work items "
+ "that were successful and is now ready to be purged from the the database (table: op_ha_work).",
true, Cluster);
ConfigKey<Integer> MaxRetries = new ConfigKey<>("Advanced", Integer.class, "max.retries",
"5", "The number of times to try a restart for the different Work-Types: "
+ "Migrating - VMs off of a host, Destroy - a VM, Stop - a VM for storage pool migration purposes,"
+ " CheckStop - checks if a VM has been stopped, ForceStop - force a VM to stop even if the "
+ "states don't allow it, Destroy - a VM and HA - restart a VM.", true, Cluster);
ConfigKey<Long> TimeToSleep = new ConfigKey<>("Advanced", Long.class, "time.to.sleep",
"60", "The time in seconds to sleep before checking the database (table: op_ha_work) "
+ "for new working types (Migration, Stop, CheckStop, ForceStop, Destroy and HA), if no work items are found.",
true, Cluster);
ConfigKey<Long> TimeBetweenFailures = new ConfigKey<>("Advanced", Long.class,
"time.between.failures", "3600", "The time in seconds before try to cleanup all the VMs"
+ " which are registered for the HA event that were successful and are now ready to be purged.",
true, Cluster);
public enum WorkType {
Migration, // Migrating VMs off of a host.
Stop, // Stops a VM for storage pool migration purposes. This should be obsolete now.
CheckStop, // Checks if a VM has been stopped.
ForceStop, // Force a VM to stop even if the states don't allow it. Use this only if you know the VM is stopped on the physical hypervisor.
Destroy, // Destroy a VM.
HA; // Restart a VM.
}
enum Step {
Scheduled, Investigating, Fencing, Stopping, Restarting, Migrating, Cancelled, Done, Error,
}
/**
* Investigate why a host has disconnected and migrate the VMs on it
* if necessary.
*
* @param host - the host that has disconnected.
*/
Status investigate(long hostId);
/**
* Restart a vm that has gone away due to various reasons. Whether a
* VM is restarted depends on various reasons.
* 1. Is the VM really dead. This method will try to find out.
* 2. Is the VM HA enabled? If not, the VM is simply stopped.
*
* All VMs that enter HA mode is not allowed to be operated on until it
* has been determined that the VM is dead.
*
* @param vm the vm that has gone away.
* @param investigate must be investigated before we do anything with this vm.
*/
void scheduleRestart(VMInstanceVO vm, boolean investigate);
void cancelDestroy(VMInstanceVO vm, Long hostId);
void scheduleDestroy(VMInstanceVO vm, long hostId);
/**
* Schedule restarts for all vms running on the host.
* @param host host.
* @param investigate TODO
*/
void scheduleRestartForVmsOnHost(HostVO host, boolean investigate);
/**
* Schedule the vm for migration.
*
* @param vm
* @return true if schedule worked.
*/
boolean scheduleMigration(VMInstanceVO vm);
List<VMInstanceVO> findTakenMigrationWork();
/**
* Schedules a work item to stop a VM. This method schedules a work
* item to do one of three things.
*
* 1. Perform a regular stop of a VM: WorkType.Stop
* 2. Perform a force stop of a VM: WorkType.ForceStop
* 3. Check if a VM has been stopped: WorkType.CheckStop
*
* @param vm virtual machine to stop.
* @param host host the virtual machine is on.
* @param type which type of stop is requested.
*/
void scheduleStop(VMInstanceVO vm, long hostId, WorkType type);
void cancelScheduledMigrations(HostVO host);
boolean hasPendingHaWork(long vmId);
boolean hasPendingMigrationsWork(long vmId);
/**
* @return
*/
String getHaTag();
DeploymentPlanner getHAPlanner();
}