blob: 7a99f09ff01e1572c977b5a7824e8eeebd38d2d7 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ambari.server.serveraction.upgrades;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentMap;
import org.apache.ambari.server.AmbariException;
import org.apache.ambari.server.Role;
import org.apache.ambari.server.RoleCommand;
import org.apache.ambari.server.actionmanager.HostRoleCommand;
import org.apache.ambari.server.actionmanager.HostRoleStatus;
import org.apache.ambari.server.actionmanager.ServiceComponentHostEventWrapper;
import org.apache.ambari.server.agent.CommandReport;
import org.apache.ambari.server.api.services.AmbariMetaInfo;
import org.apache.ambari.server.metadata.ActionMetadata;
import org.apache.ambari.server.orm.dao.HostRoleCommandDAO;
import org.apache.ambari.server.orm.dao.UpgradeDAO;
import org.apache.ambari.server.orm.entities.HostRoleCommandEntity;
import org.apache.ambari.server.orm.entities.UpgradeGroupEntity;
import org.apache.ambari.server.orm.entities.UpgradeItemEntity;
import org.apache.ambari.server.serveraction.AbstractServerAction;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.ServiceComponentHostEvent;
import org.apache.ambari.server.state.StackId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import com.google.inject.Inject;
/**
* The {@link AutoSkipFailedSummaryAction} is used to check if any
* {@link HostRoleCommand}s were skipped automatically after they failed during
* an upgrade. This will be automatically marked as
* {@link HostRoleStatus#COMPLETED} if there are no skipped failures. Otherwise
* it will be placed into {@link HostRoleStatus#HOLDING}.
*/
public class AutoSkipFailedSummaryAction extends AbstractServerAction {
/**
* Logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(AutoSkipFailedSummaryAction.class);
/**
* The standard output template message.
*/
private static final String FAILURE_STD_OUT_TEMPLATE = "There were {0} skipped failure(s) that must be addressed before you can proceed. Please resolve each failure before continuing with the upgrade.";
private static final String SKIPPED_SERVICE_CHECK = "service_check";
private static final String SKIPPED_HOST_COMPONENT = "host_component";
private static final String SKIPPED = "skipped";
private static final String FAILURES = "failures";
/**
* Used to lookup the {@link UpgradeGroupEntity}.
*/
@Inject
private UpgradeDAO m_upgradeDAO;
/**
* Used to lookup the tasks that need to be checked for
* {@link HostRoleStatus#SKIPPED_FAILED}.
*/
@Inject
private HostRoleCommandDAO m_hostRoleCommandDAO;
/**
* Used for writing structured out.
*/
@Inject
private Gson m_gson;
/**
* Used to look up service check name -> service name bindings
*/
@Inject
private ActionMetadata actionMetadata;
@Inject
private AmbariMetaInfo ambariMetaInfo;
@Inject
private Clusters clusters;
/**
* A mapping of host -> Map<key,info> for each failure.
*/
private Map<String, Object> m_structuredFailures = new HashMap<>();
/**
* {@inheritDoc}
*/
@Override
public CommandReport execute(ConcurrentMap<String, Object> requestSharedDataContext)
throws AmbariException, InterruptedException {
HostRoleCommand hostRoleCommand = getHostRoleCommand();
long requestId = hostRoleCommand.getRequestId();
long stageId = hostRoleCommand.getStageId();
String clusterName = hostRoleCommand.getExecutionCommandWrapper().getExecutionCommand().getClusterName();
Cluster cluster = clusters.getCluster(clusterName);
StackId stackId = cluster.getDesiredStackVersion();
// use the host role command to get to the parent upgrade group
UpgradeItemEntity upgradeItem = m_upgradeDAO.findUpgradeItemByRequestAndStage(requestId,stageId);
UpgradeGroupEntity upgradeGroup = upgradeItem.getGroupEntity();
// find all of the stages in this group
long upgradeGroupId = upgradeGroup.getId();
UpgradeGroupEntity upgradeGroupEntity = m_upgradeDAO.findUpgradeGroup(upgradeGroupId);
List<UpgradeItemEntity> groupUpgradeItems = upgradeGroupEntity.getItems();
TreeSet<Long> stageIds = new TreeSet<>();
for (UpgradeItemEntity groupUpgradeItem : groupUpgradeItems) {
stageIds.add(groupUpgradeItem.getStageId());
}
// for every stage, find all tasks that have been SKIPPED_FAILED - we use a
// bit of trickery here since within any given request, the stage ID are
// always sequential. This allows us to make a simple query instead of some
// overly complex IN or NESTED SELECT query
long minStageId = stageIds.first();
long maxStageId = stageIds.last();
List<HostRoleCommandEntity> skippedTasks = m_hostRoleCommandDAO.findByStatusBetweenStages(
hostRoleCommand.getRequestId(),
HostRoleStatus.SKIPPED_FAILED, minStageId, maxStageId);
if (skippedTasks.isEmpty()) {
return createCommandReport(0, HostRoleStatus.COMPLETED, "{}",
"There were no skipped failures", null);
}
StringBuilder buffer = new StringBuilder("The following steps failed but were automatically skipped:\n");
Set<String> skippedCategories = new HashSet<>();
Map<String, Object> skippedFailures = new HashMap<>();
Set<String> skippedServiceChecks = new HashSet<>();
Map<String, Object> hostComponents= new HashMap<>();
// Internal representation for failed host components
// To avoid duplicates
// Format: <hostname, Set<Role>>
Map<String, Set<Role>> publishedHostComponents= new HashMap<>();
for (HostRoleCommandEntity skippedTask : skippedTasks) {
try {
String skippedCategory;
if (skippedTask.getRoleCommand().equals(RoleCommand.SERVICE_CHECK)) {
skippedCategory = SKIPPED_SERVICE_CHECK;
String serviceCheckActionName = skippedTask.getRole().toString();
String service = actionMetadata.getServiceNameByServiceCheckAction(serviceCheckActionName);
skippedServiceChecks.add(service);
skippedFailures.put(SKIPPED_SERVICE_CHECK, skippedServiceChecks);
m_structuredFailures.put(FAILURES, skippedFailures);
} else {
skippedCategory = SKIPPED_HOST_COMPONENT;
String hostName = skippedTask.getHostName();
if (null != hostName) {
List<Object> failures = (List<Object>) hostComponents.get(hostName);
if (null == failures) {
failures = new ArrayList<>();
hostComponents.put(hostName, failures);
publishedHostComponents.put(hostName, new HashSet<Role>());
}
Set<Role> publishedHostComponentsOnHost = publishedHostComponents.get(hostName);
Role role = skippedTask.getRole();
if (! publishedHostComponentsOnHost.contains(role)) {
HashMap<String, String> details = new HashMap<>();
String service = ambariMetaInfo.getComponentToService(
stackId.getStackName(), stackId.getStackVersion(), role.toString());
details.put("service", service);
details.put("component", role.toString());
failures.add(details);
}
}
skippedFailures.put(SKIPPED_HOST_COMPONENT, hostComponents);
m_structuredFailures.put(FAILURES, skippedFailures);
}
skippedCategories.add(skippedCategory);
ServiceComponentHostEventWrapper eventWrapper = new ServiceComponentHostEventWrapper(
skippedTask.getEvent());
ServiceComponentHostEvent event = eventWrapper.getEvent();
buffer.append(event.getServiceComponentName());
if (null != event.getHostName()) {
buffer.append(" on ");
buffer.append(event.getHostName());
}
buffer.append(": ");
buffer.append(skippedTask.getCommandDetail());
buffer.append("\n");
} catch (Exception exception) {
LOG.warn("Unable to extract failure information for {}", skippedTask);
buffer.append(": ");
buffer.append(skippedTask);
}
}
m_structuredFailures.put(SKIPPED, skippedCategories);
String structuredOutput = m_gson.toJson(m_structuredFailures);
String standardOutput = MessageFormat.format(FAILURE_STD_OUT_TEMPLATE, skippedTasks.size());
String standardError = buffer.toString();
return createCommandReport(0, HostRoleStatus.HOLDING, structuredOutput, standardOutput,
standardError);
}
}