package health

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import (
	"fmt"
	"strings"
	"time"

	"github.com/apache/trafficcontrol/lib/go-log"
	"github.com/apache/trafficcontrol/lib/go-tc"
	"github.com/apache/trafficcontrol/lib/go-util"
	"github.com/apache/trafficcontrol/traffic_monitor/cache"
	"github.com/apache/trafficcontrol/traffic_monitor/config"
	"github.com/apache/trafficcontrol/traffic_monitor/peer"
	"github.com/apache/trafficcontrol/traffic_monitor/threadsafe"
	"github.com/apache/trafficcontrol/traffic_monitor/todata"
)

// Used as a dummy value for evaluating threshold stats (which don't use real
// combined states).
var dummyCombinedState = tc.IsAvailable{}

// AvailableStr is used to describe the state of a cache server that is
// available to serve traffic.
const AvailableStr = "available"

// UnavailableStr is used to describe the state of a cache server that is not
// available to serve traffic.
const UnavailableStr = "unavailable"

// GetVitals Gets the vitals to decide health on in the right format
func GetVitals(newResult *cache.Result, prevResult *cache.Result, mc *tc.TrafficMonitorConfigMap) {
	if newResult.Error != nil {
		log.Errorf("cache_health.GetVitals() called with an errored Result!")
		return
	}

	if mc == nil {
		log.Errorf("TrafficMonitorConfigMap must not be nil")
		return
	}

	if newResult.InterfaceVitals == nil {
		newResult.InterfaceVitals = map[string]cache.Vitals{}
	}

	// proc.loadavg -- we're using the 1 minute average (!?)
	newResult.Vitals.LoadAvg = newResult.Statistics.Loadavg.One

	ts, exists := mc.TrafficServer[newResult.ID]
	if !exists {
		log.Errorf("cache server not found in config map for cache: %s", newResult.ID)
		return
	}
	if ts.Interfaces == nil {
		log.Warnf("no interfaces reported in config map for cache: %s", newResult.ID)
		return
	}

	var monitoredInterfaces []tc.ServerInterfaceInfo
	for _, srvrIfaceInfo := range mc.TrafficServer[newResult.ID].Interfaces {
		if srvrIfaceInfo.Monitor {
			monitoredInterfaces = append(monitoredInterfaces, srvrIfaceInfo)
		}
	}

	if len(monitoredInterfaces) == 0 {
		log.Warnf("no interfaces selected to be monitored for %v", newResult.ID)
		return
	}

	for _, monitoredInterface := range monitoredInterfaces {
		ifaceName := monitoredInterface.Name
		iface, exists := newResult.Interfaces()[ifaceName]
		if !exists {
			// monitored interface doesn't exist in Result interfaces, skip
			log.Warnf("monitored interface %v does not exist in cache %v", ifaceName, newResult.ID)
			continue
		}

		ifaceVitals := cache.Vitals{
			BytesIn:    iface.BytesIn,
			BytesOut:   iface.BytesOut,
			MaxKbpsOut: iface.Speed * 1000,
		}

		if prevResult != nil && prevResult.InterfaceVitals != nil && prevResult.InterfaceVitals[ifaceName].BytesOut != 0 {
			elapsedTimeInSecs := float64(newResult.Time.UnixNano()-prevResult.Time.UnixNano()) / 1000000000
			ifaceVitals.KbpsOut = int64(float64((ifaceVitals.BytesOut-prevResult.InterfaceVitals[ifaceName].BytesOut)*8/1000) / elapsedTimeInSecs)
		}
		newResult.InterfaceVitals[ifaceName] = ifaceVitals

		// Overflow possible
		newResult.Vitals.BytesOut += iface.BytesOut
		newResult.Vitals.BytesIn += iface.BytesIn
		// TODO JvD: Should we really be running this code every second for every cache polled????? I don't think so.
		newResult.Vitals.MaxKbpsOut += iface.Speed * 1000
	}

	if prevResult != nil && prevResult.Vitals.BytesOut != 0 {
		elapsedTimeInSecs := float64(newResult.Time.UnixNano()-prevResult.Time.UnixNano()) / 1000000000
		newResult.Vitals.KbpsOut = int64(float64((newResult.Vitals.BytesOut-prevResult.Vitals.BytesOut)*8/1000) / elapsedTimeInSecs)
	}

}

// EvalCacheWithStatusInfo evaluates whether the given cache should be marked
// available, taking the server's configured Status into account as well as its
// polling information.
func EvalCacheWithStatusInfo(result cache.ResultInfo, mc *tc.TrafficMonitorConfigMap, status tc.CacheStatus, serverStatus string) (bool, string, string) {
	availability := AvailableStr
	if !result.Available {
		availability = UnavailableStr
	}
	switch {
	case status == tc.CacheStatusInvalid:
		log.Errorf("Cache %v got invalid status from Traffic Ops '%v' - treating as OFFLINE\n", result.ID, serverStatus)
		return false, eventDesc(status, availability+"; invalid status"), ""
	case status == tc.CacheStatusAdminDown:
		return false, eventDesc(status, availability), ""
	case status == tc.CacheStatusOffline:
		log.Errorf("Cache %v set to offline, but still polled\n", result.ID)
		return false, eventDesc(status, availability), ""
	case status == tc.CacheStatusOnline:
		return true, eventDesc(status, availability), ""
	case result.Error != nil:
		return false, eventDesc(status, fmt.Sprintf("%v", result.Error)), ""
	case result.Statistics.NotAvailable == true:
		return false, eventDesc(status, fmt.Sprintf("system.notAvailable == %v", result.Statistics.NotAvailable)), ""
	}
	return result.Available, eventDesc(status, availability), ""
}

// EvalInterface returns whether the given interface should be marked
// available, a boolean of whether the result was over IPv4 (false means it
// was IPv6), a string describing why, and which stat exceeded a threshold. The
// `stats` may be nil, for pollers which don't poll stats. The availability of
// EvalCache MAY NOT be used to directly set the cache's local availability,
// because the threshold stats may not be part of the poller which produced the
// result. Rather, if the cache was previously unavailable from a threshold, it
// must be verified that threshold stat is in the results before setting the
// cache to available. The resultStats may be nil, and if so, won't be checked
// for thresholds. For example, the Health poller doesn't have Stats.
// TODO change to return a `cache.AvailableStatus`
func EvalInterface(infVitals map[string]cache.Vitals, inf tc.ServerInterfaceInfo) (bool, string) {
	if !inf.Monitor {
		return true, ""
	}

	vitals, ok := infVitals[inf.Name]
	if !ok {
		return false, "not found in polled data"
	}

	if inf.MaxBandwidth == nil {
		return true, ""
	}

	if *inf.MaxBandwidth < uint64(vitals.KbpsOut) {
		return false, "maximum bandwidth exceeded"
	}

	return true, ""
}

// EvalAggregate calculates the availability of a cache server as an aggregate
// of server metrics and metrics of its network interfaces.
func EvalAggregate(result cache.ResultInfo, resultStats *threadsafe.ResultStatValHistory, mc *tc.TrafficMonitorConfigMap) (bool, string, string) {
	serverInfo, ok := mc.TrafficServer[string(result.ID)]
	if !ok {
		log.Errorf("Cache %v missing from from Traffic Ops Monitor Config - treating as OFFLINE\n", result.ID)
		return false, "ERROR - server missing in Traffic Ops monitor config", ""
	}
	status := tc.CacheStatusFromString(serverInfo.ServerStatus)
	if status == tc.CacheStatusOnline {
		// return here first, even though EvalCacheWithStatus checks online, because we later assume that if EvalCacheWithStatus returns true, to return false if thresholds are exceeded; but, if the cache is ONLINE, we don't want to check thresholds.
		return true, eventDesc(status, AvailableStr), ""
	}

	profile, ok := mc.Profile[serverInfo.Profile]
	if !ok {
		log.Errorf("Profile '%v' for cache server '%v' missing from monitoring configuration - treating as OFFLINE", serverInfo.Profile, result.ID)
		return false, "ERROR - server profile missing in Traffic Ops monitor config", ""
	}

	avail, eventDescVal, eventMsg := EvalCacheWithStatusInfo(result, mc, status, serverInfo.ServerStatus)
	if !avail {
		return avail, eventDescVal, eventMsg
	}

	computedStats := cache.ComputedStats()

	for stat, threshold := range profile.Parameters.Thresholds {
		resultStat := interface{}(nil)
		computedStatF, ok := computedStats[stat]
		if !ok {
			if resultStats == nil {
				continue
			}
			resultStatHistory := resultStats.Load(stat)
			if len(resultStatHistory) == 0 {
				continue
			}
			resultStat = resultStatHistory[0].Val
		} else {
			resultStat = computedStatF(result, serverInfo, profile, dummyCombinedState)
		}

		resultStatNum, ok := util.ToNumeric(resultStat)
		if !ok {
			log.Errorf("health.EvalCache threshold stat %s was not a number: %v", stat, resultStat)
			continue
		}

		if !inThreshold(threshold, resultStatNum) {
			return false, eventDesc(status, exceedsThresholdMsg(stat, threshold, resultStatNum)), stat
		}
	}

	return avail, eventDescVal, eventMsg
}

// getProcessAvailableTuple gets a function to process an availability tuple
// based on the protocol used.
func getProcessAvailableTuple(protocol config.PollingProtocol) func(cache.AvailableTuple, tc.TrafficServer) bool {
	switch protocol {
	case config.IPv4Only:
		return func(tuple cache.AvailableTuple, _ tc.TrafficServer) bool {
			return tuple.IPv4
		}
	case config.IPv6Only:
		return func(tuple cache.AvailableTuple, _ tc.TrafficServer) bool {
			return tuple.IPv6
		}
	case config.Both:
		return func(tuple cache.AvailableTuple, serverInfo tc.TrafficServer) bool {
			if serverInfo.IPv4() == "" {
				return tuple.IPv6
			} else if serverInfo.IPv6() == "" {
				return tuple.IPv4
			}
			return tuple.IPv4 || tuple.IPv6
		}
	default:
		log.Errorf("received an unknown Polling Protocol: %s", protocol)
	}
	return func(cache.AvailableTuple, tc.TrafficServer) bool { return false }
}

// CalcAvailability calculates the availability of each cache in results.
// statResultHistory may be nil, in which case stats won't be used to calculate
// availability.
func CalcAvailability(
	results []cache.Result,
	pollerName string,
	statResultHistory *threadsafe.ResultStatHistory,
	mc tc.TrafficMonitorConfigMap,
	toData todata.TOData,
	localCacheStatusThreadsafe threadsafe.CacheAvailableStatus,
	localStates peer.CRStatesThreadsafe,
	events ThreadsafeEvents,
	protocol config.PollingProtocol,
) {
	localCacheStatuses := localCacheStatusThreadsafe.Get().Copy()
	var statResultsVal *threadsafe.CacheStatHistory
	processAvailableTuple := getProcessAvailableTuple(protocol)

	for _, result := range results {
		if statResultHistory != nil {
			t := statResultHistory.LoadOrStore(result.ID)
			statResultsVal = &t
		}
		serverInfo, ok := mc.TrafficServer[result.ID]
		if !ok {
			log.Errorf("Cache %v missing from from Traffic Ops Monitor Config - treating as OFFLINE\n", result.ID)
		}

		availStatus := cache.AvailableStatus{
			LastCheckedIPv4:    result.UsingIPv4,
			ProcessedAvailable: true,
			Poller:             pollerName,
			Status:             serverInfo.ServerStatus,
		}

		lastStatus, ok := localCacheStatuses[result.ID]
		if ok {
			if result.UsingIPv4 {
				availStatus.Available.IPv4 = true
				availStatus.Available.IPv6 = serverInfo.IPv6() != "" && lastStatus.Available.IPv6
			} else {
				availStatus.Available.IPv6 = true
				availStatus.Available.IPv4 = serverInfo.IPv4() != "" && lastStatus.Available.IPv4
			}
		}

		reasons := []string{}
		resultInfo := cache.ToInfo(result)
		for _, inf := range serverInfo.Interfaces {
			if !inf.Monitor {
				continue
			}

			available, why := EvalInterface(resultInfo.InterfaceVitals, inf)
			if result.UsingIPv4 {
				availStatus.Available.IPv4 = availStatus.Available.IPv4 && available
			} else {
				availStatus.Available.IPv6 = availStatus.Available.IPv6 && available
			}

			if why != "" {
				reasons = append(reasons, inf.Name+": "+why)
			}
		}

		var aggIsAvailable bool
		var aggWhyAvailable string
		var aggUnavailableStat string

		if statResultsVal != nil {
			aggIsAvailable, aggWhyAvailable, aggUnavailableStat = EvalAggregate(cache.ToInfo(result), &statResultsVal.Stats, &mc)
		} else {
			aggIsAvailable, aggWhyAvailable, aggUnavailableStat = EvalAggregate(cache.ToInfo(result), nil, &mc)
		}

		if result.UsingIPv4 {
			availStatus.Available.IPv4 = availStatus.Available.IPv4 && aggIsAvailable
		} else {
			availStatus.Available.IPv6 = availStatus.Available.IPv6 && aggIsAvailable
		}

		availStatus.ProcessedAvailable = processAvailableTuple(availStatus.Available, serverInfo)

		if aggWhyAvailable != "" {
			reasons = append([]string{aggWhyAvailable}, reasons...)
		}
		availStatus.Why = strings.Join(reasons, "; ")
		if aggUnavailableStat != "" {
			availStatus.UnavailableStat = aggUnavailableStat
		}

		localStates.SetCache(tc.CacheName(result.ID), tc.IsAvailable{
			IsAvailable:    availStatus.ProcessedAvailable,
			Ipv4Available:  availStatus.Available.IPv4,
			Ipv6Available:  availStatus.Available.IPv6,
			DirectlyPolled: true, // we know this cache was directly polled because otherwise we wouldn't have a cache.Result for it
			Status:         availStatus.Why,
			LastPoll:       result.Time,
		})

		if available, ok := localStates.GetCache(tc.CacheName(result.ID)); !ok || available.IsAvailable != lastStatus.ProcessedAvailable {
			protocol := "IPv4"
			if !availStatus.LastCheckedIPv4 {
				protocol = "IPv6"
			}
			log.Infof("Changing state for %s was: %t now: %t because %s poller: %v on protocol %v error: %v",
				result.ID, available.IsAvailable, availStatus.ProcessedAvailable, availStatus.Why, pollerName, protocol, result.Error)

			event := Event{
				Time:          Time(time.Now()),
				Description:   "Protocol (" + protocol + ") " + availStatus.Why + " (" + pollerName + ") ",
				Name:          result.ID,
				Hostname:      result.ID,
				Type:          toData.ServerTypes[tc.CacheName(result.ID)].String(),
				Available:     availStatus.ProcessedAvailable,
				IPv4Available: availStatus.Available.IPv4,
				IPv6Available: availStatus.Available.IPv6,
			}
			events.Add(event)
		}

		localCacheStatuses[result.ID] = availStatus
	}
	calculateDeliveryServiceState(localStates)
	localCacheStatusThreadsafe.Set(localCacheStatuses)
}

// ExceedsThresholdMsg returns a human-readable message for why the given value exceeds the threshold. It does NOT check whether the value actually exceeds the threshold; call `InThreshold` to check first.
func exceedsThresholdMsg(stat string, threshold tc.HealthThreshold, val float64) string {
	switch threshold.Comparator {
	case "=":
		return fmt.Sprintf("%s not equal (%.2f != %.2f)", stat, val, threshold.Val)
	case ">":
		return fmt.Sprintf("%s too low (%.2f < %.2f)", stat, val, threshold.Val)
	case "<":
		return fmt.Sprintf("%s too high (%.2f > %.2f)", stat, val, threshold.Val)
	case ">=":
		return fmt.Sprintf("%s too low (%.2f <= %.2f)", stat, val, threshold.Val)
	case "<=":
		return fmt.Sprintf("%s too high (%.2f >= %.2f)", stat, val, threshold.Val)
	default:
		return fmt.Sprintf("ERROR: Invalid Threshold: %+v", threshold)
	}
}

func inThreshold(threshold tc.HealthThreshold, val float64) bool {
	switch threshold.Comparator {
	case "=":
		return val == threshold.Val
	case ">":
		return val > threshold.Val
	case "<":
		return val < threshold.Val
	case ">=":
		return val >= threshold.Val
	case "<=":
		return val <= threshold.Val
	default:
		log.Errorf("Invalid Threshold: %+v", threshold)
		return true // for safety, if a threshold somehow gets corrupted, don't start marking caches down.
	}
}

func eventDesc(status tc.CacheStatus, message string) string {
	return fmt.Sprintf("%s - %s", status, message)
}

//calculateDeliveryServiceState calculates the state of delivery services from the new cache state data `cacheState` and the CRConfig data `deliveryServiceServers` and puts the calculated state in the outparam `deliveryServiceStates`
func calculateDeliveryServiceState(states peer.CRStatesThreadsafe) {
	deliveryServices := states.GetDeliveryServices()
	for deliveryServiceName, deliveryServiceState := range deliveryServices {
		// NOTE: DisabledLocations is always empty, and it's important that it isn't nil, so it serialises to the JSON `[]` instead of `null`.
		// It's no longer populated due to it being an unnecessary optimization for Traffic Router, but the field is kept for compatibility.
		deliveryServiceState.DisabledLocations = []tc.CacheGroupName{}
		states.SetDeliveryService(deliveryServiceName, deliveryServiceState)
	}
}
