blob: c4b99d3143453f5891fe6005911bd9c6ee72c64a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.servicecomb.loadbalance.filter;
import java.util.HashMap;
import java.util.Map;
import org.apache.servicecomb.core.Invocation;
import org.apache.servicecomb.foundation.common.event.AlarmEvent.Type;
import org.apache.servicecomb.foundation.common.event.EventManager;
import org.apache.servicecomb.loadbalance.Configuration;
import org.apache.servicecomb.loadbalance.ServiceCombLoadBalancerStats;
import org.apache.servicecomb.loadbalance.ServiceCombServer;
import org.apache.servicecomb.loadbalance.ServiceCombServerStats;
import org.apache.servicecomb.loadbalance.event.IsolationServerEvent;
import org.apache.servicecomb.serviceregistry.api.registry.MicroserviceInstance;
import org.apache.servicecomb.serviceregistry.discovery.DiscoveryContext;
import org.apache.servicecomb.serviceregistry.discovery.DiscoveryFilter;
import org.apache.servicecomb.serviceregistry.discovery.DiscoveryTreeNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.EventBus;
import com.netflix.config.DynamicBooleanProperty;
import com.netflix.config.DynamicPropertyFactory;
/**
* Isolate instances by error metrics
*/
public class IsolationDiscoveryFilter implements DiscoveryFilter {
public static final String TRYING_INSTANCES_EXISTING = "scb-hasTryingInstances";
private static final Logger LOGGER = LoggerFactory.getLogger(IsolationDiscoveryFilter.class);
private static final String EMPTY_INSTANCE_PROTECTION = "servicecomb.loadbalance.filter.isolation.emptyInstanceProtectionEnabled";
private final DynamicBooleanProperty emptyProtection = DynamicPropertyFactory.getInstance()
.getBooleanProperty(EMPTY_INSTANCE_PROTECTION, false);
public class Settings {
public int errorThresholdPercentage;
public long singleTestTime;
public long enableRequestThreshold;
public int continuousFailureThreshold;
public int minIsolationTime; // to avoid isolation recover too fast due to no concurrent control in concurrent scenario
}
public EventBus eventBus = EventManager.getEventBus();
@Override
public int getOrder() {
return 500;
}
public IsolationDiscoveryFilter() {
emptyProtection.addCallback(() -> {
boolean newValue = emptyProtection.get();
LOGGER.info("{} changed from {} to {}", EMPTY_INSTANCE_PROTECTION, emptyProtection, newValue);
});
}
@Override
public boolean enabled() {
return DynamicPropertyFactory.getInstance()
.getBooleanProperty("servicecomb.loadbalance.filter.isolation.enabled", true).get();
}
@Override
public boolean isGroupingFilter() {
return false;
}
@Override
public DiscoveryTreeNode discovery(DiscoveryContext context, DiscoveryTreeNode parent) {
Map<String, MicroserviceInstance> instances = parent.data();
Invocation invocation = context.getInputParameters();
if (!Configuration.INSTANCE.isIsolationFilterOpen(invocation.getMicroserviceName())) {
return parent;
}
Map<String, MicroserviceInstance> filteredServers = new HashMap<>();
instances.entrySet().forEach(stringMicroserviceInstanceEntry -> {
MicroserviceInstance instance = stringMicroserviceInstanceEntry.getValue();
if (allowVisit(invocation, instance)) {
filteredServers.put(stringMicroserviceInstanceEntry.getKey(), instance);
}
});
DiscoveryTreeNode child = parent.children().computeIfAbsent("filterred", etn -> new DiscoveryTreeNode());
if (ZoneAwareDiscoveryFilter.GROUP_Instances_All
.equals(context.getContextParameter(ZoneAwareDiscoveryFilter.KEY_ZONE_AWARE_STEP)) && filteredServers.isEmpty()
&& emptyProtection.get()) {
LOGGER.warn("All servers have been isolated, allow one of them based on load balance rule.");
child.data(instances);
} else {
child.data(filteredServers);
}
parent.child("filterred", child);
return child;
}
private Settings createSettings(Invocation invocation) {
Settings settings = new Settings();
settings.errorThresholdPercentage = Configuration.INSTANCE
.getErrorThresholdPercentage(invocation.getMicroserviceName());
settings.singleTestTime = Configuration.INSTANCE.getSingleTestTime(invocation.getMicroserviceName());
settings.enableRequestThreshold = Configuration.INSTANCE
.getEnableRequestThreshold(invocation.getMicroserviceName());
settings.continuousFailureThreshold = Configuration.INSTANCE
.getContinuousFailureThreshold(invocation.getMicroserviceName());
settings.minIsolationTime = Configuration.INSTANCE
.getMinIsolationTime(invocation.getMicroserviceName());
return settings;
}
private boolean allowVisit(Invocation invocation, MicroserviceInstance instance) {
ServiceCombServer server = ServiceCombLoadBalancerStats.INSTANCE.getServiceCombServer(instance);
if (server == null) {
// first time accessed.
return true;
}
ServiceCombServerStats serverStats = ServiceCombLoadBalancerStats.INSTANCE.getServiceCombServerStats(server);
Settings settings = createSettings(invocation);
if (!checkThresholdAllowed(settings, serverStats)) {
if (serverStats.isIsolated()
&& (System.currentTimeMillis() - serverStats.getLastVisitTime()) > settings.singleTestTime) {
return ServiceCombServerStats.applyForTryingChance(invocation);
}
if (!serverStats.isIsolated()) {
// checkThresholdAllowed is not concurrent control, may print several logs/events in current access.
serverStats.markIsolated(true);
eventBus.post(
new IsolationServerEvent(invocation, instance, serverStats,
settings, Type.OPEN, server.getEndpoint()));
LOGGER.warn("Isolate service {}'s instance {}.", invocation.getMicroserviceName(),
instance.getInstanceId());
}
return false;
}
if (serverStats.isIsolated()) {
// [2] so that we add a feature to isolate for at least a minimal time, and we can avoid
// high volume of concurrent requests with a percentage of error(e.g. 50%) scenario with no isolation
if ((System.currentTimeMillis() - serverStats.getIsolatedTime()) <= settings.minIsolationTime) {
return false;
}
serverStats.markIsolated(false);
eventBus.post(new IsolationServerEvent(invocation, instance, serverStats,
settings, Type.CLOSE, server.getEndpoint()));
LOGGER.warn("Recover service {}'s instance {} from isolation.", invocation.getMicroserviceName(),
instance.getInstanceId());
}
return true;
}
private boolean checkThresholdAllowed(Settings settings, ServiceCombServerStats serverStats) {
if (serverStats.getTotalRequests() < settings.enableRequestThreshold) {
return true;
}
if (settings.continuousFailureThreshold > 0) {
// continuousFailureThreshold has higher priority to decide the result
if (serverStats.getContinuousFailureCount() >= settings.continuousFailureThreshold) {
return false;
}
}
if (settings.errorThresholdPercentage == 0) {
return true;
}
return serverStats.getFailedRate() < settings.errorThresholdPercentage;
}
}