blob: dacee32bb3a6b4724597b64eede241e5e89dff82 [file] [log] [blame]
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"fmt"
"sort"
"strings"
apimachineryconfig "k8s.io/apimachinery/pkg/apis/config"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// KubeProxyIPTablesConfiguration contains iptables-related configuration
// details for the Kubernetes proxy server.
type KubeProxyIPTablesConfiguration struct {
// masqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
MasqueradeBit *int32
// masqueradeAll tells kube-proxy to SNAT everything if using the pure iptables proxy mode.
MasqueradeAll bool
// syncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
SyncPeriod metav1.Duration
// minSyncPeriod is the minimum period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m').
MinSyncPeriod metav1.Duration
}
// KubeProxyIPVSConfiguration contains ipvs-related configuration
// details for the Kubernetes proxy server.
type KubeProxyIPVSConfiguration struct {
// syncPeriod is the period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
SyncPeriod metav1.Duration
// minSyncPeriod is the minimum period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m').
MinSyncPeriod metav1.Duration
// ipvs scheduler
Scheduler string
// excludeCIDRs is a list of CIDR's which the ipvs proxier should not touch
// when cleaning up ipvs services.
ExcludeCIDRs []string
}
// KubeProxyConntrackConfiguration contains conntrack settings for
// the Kubernetes proxy server.
type KubeProxyConntrackConfiguration struct {
// max is the maximum number of NAT connections to track (0 to
// leave as-is). This takes precedence over maxPerCore and min.
Max *int32
// maxPerCore is the maximum number of NAT connections to track
// per CPU core (0 to leave the limit as-is and ignore min).
MaxPerCore *int32
// min is the minimum value of connect-tracking records to allocate,
// regardless of maxPerCore (set maxPerCore=0 to leave the limit as-is).
Min *int32
// tcpEstablishedTimeout is how long an idle TCP connection will be kept open
// (e.g. '2s'). Must be greater than 0 to set.
TCPEstablishedTimeout *metav1.Duration
// tcpCloseWaitTimeout is how long an idle conntrack entry
// in CLOSE_WAIT state will remain in the conntrack
// table. (e.g. '60s'). Must be greater than 0 to set.
TCPCloseWaitTimeout *metav1.Duration
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// KubeProxyConfiguration contains everything necessary to configure the
// Kubernetes proxy server.
type KubeProxyConfiguration struct {
metav1.TypeMeta
// featureGates is a map of feature names to bools that enable or disable alpha/experimental features.
FeatureGates map[string]bool
// bindAddress is the IP address for the proxy server to serve on (set to 0.0.0.0
// for all interfaces)
BindAddress string
// healthzBindAddress is the IP address and port for the health check server to serve on,
// defaulting to 0.0.0.0:10256
HealthzBindAddress string
// metricsBindAddress is the IP address and port for the metrics server to serve on,
// defaulting to 127.0.0.1:10249 (set to 0.0.0.0 for all interfaces)
MetricsBindAddress string
// enableProfiling enables profiling via web interface on /debug/pprof handler.
// Profiling handlers will be handled by metrics server.
EnableProfiling bool
// clusterCIDR is the CIDR range of the pods in the cluster. It is used to
// bridge traffic coming from outside of the cluster. If not provided,
// no off-cluster bridging will be performed.
ClusterCIDR string
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string
// clientConnection specifies the kubeconfig file and client connection settings for the proxy
// server to use when communicating with the apiserver.
ClientConnection apimachineryconfig.ClientConnectionConfiguration
// iptables contains iptables-related configuration options.
IPTables KubeProxyIPTablesConfiguration
// ipvs contains ipvs-related configuration options.
IPVS KubeProxyIPVSConfiguration
// oomScoreAdj is the oom-score-adj value for kube-proxy process. Values must be within
// the range [-1000, 1000]
OOMScoreAdj *int32
// mode specifies which proxy mode to use.
Mode ProxyMode
// portRange is the range of host ports (beginPort-endPort, inclusive) that may be consumed
// in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.
PortRange string
// resourceContainer is the absolute name of the resource-only container to create and run
// the Kube-proxy in (Default: /kube-proxy).
ResourceContainer string
// udpIdleTimeout is how long an idle UDP connection will be kept open (e.g. '250ms', '2s').
// Must be greater than 0. Only applicable for proxyMode=userspace.
UDPIdleTimeout metav1.Duration
// conntrack contains conntrack-related configuration options.
Conntrack KubeProxyConntrackConfiguration
// configSyncPeriod is how often configuration from the apiserver is refreshed. Must be greater
// than 0.
ConfigSyncPeriod metav1.Duration
// nodePortAddresses is the --nodeport-addresses value for kube-proxy process. Values must be valid
// IP blocks. These values are as a parameter to select the interfaces where nodeport works.
// In case someone would like to expose a service on localhost for local visit and some other interfaces for
// particular purpose, a list of IP blocks would do that.
// If set it to "127.0.0.0/8", kube-proxy will only select the loopback interface for NodePort.
// If set it to a non-zero IP block, kube-proxy will filter that down to just the IPs that applied to the node.
// An empty string slice is meant to select all network interfaces.
NodePortAddresses []string
}
// Currently, three modes of proxy are available in Linux platform: 'userspace' (older, going to be EOL), 'iptables'
// (newer, faster), 'ipvs'(newest, better in performance and scalability).
//
// Two modes of proxy are available in Windows platform: 'userspace'(older, stable) and 'kernelspace' (newer, faster).
//
// In Linux platform, if proxy mode is blank, use the best-available proxy (currently iptables, but may change in the
// future). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are
// insufficient, this always falls back to the userspace proxy. IPVS mode will be enabled when proxy mode is set to 'ipvs',
// and the fall back path is firstly iptables and then userspace.
// In Windows platform, if proxy mode is blank, use the best-available proxy (currently userspace, but may change in the
// future). If winkernel proxy is selected, regardless of how, but the Windows kernel can't support this mode of proxy,
// this always falls back to the userspace proxy.
type ProxyMode string
const (
ProxyModeUserspace ProxyMode = "userspace"
ProxyModeIPTables ProxyMode = "iptables"
ProxyModeIPVS ProxyMode = "ipvs"
ProxyModeKernelspace ProxyMode = "kernelspace"
)
// IPVSSchedulerMethod is the algorithm for allocating TCP connections and
// UDP datagrams to real servers. Scheduling algorithms are imple-
//wanted as kernel modules. Ten are shipped with the Linux Virtual Server.
type IPVSSchedulerMethod string
const (
// RoundRobin distributes jobs equally amongst the available real servers.
RoundRobin IPVSSchedulerMethod = "rr"
// WeightedRoundRobin assigns jobs to real servers proportionally to their real servers' weight.
// Servers with higher weights receive new jobs first and get more jobs than servers with lower weights.
// Servers with equal weights get an equal distribution of new jobs.
WeightedRoundRobin IPVSSchedulerMethod = "wrr"
// LeastConnection assigns more jobs to real servers with fewer active jobs.
LeastConnection IPVSSchedulerMethod = "lc"
// WeightedLeastConnection assigns more jobs to servers with fewer jobs and
// relative to the real servers' weight(Ci/Wi).
WeightedLeastConnection IPVSSchedulerMethod = "wlc"
// LocalityBasedLeastConnection assigns jobs destined for the same IP address to the same server if
// the server is not overloaded and available; otherwise assigns jobs to servers with fewer jobs,
// and keep it for future assignment.
LocalityBasedLeastConnection IPVSSchedulerMethod = "lblc"
// LocalityBasedLeastConnectionWithReplication with Replication assigns jobs destined for the same IP address to the
// least-connection node in the server set for the IP address. If all the node in the server set are overloaded,
// it picks up a node with fewer jobs in the cluster and adds it to the sever set for the target.
// If the server set has not been modified for the specified time, the most loaded node is removed from the server set,
// in order to avoid high degree of replication.
LocalityBasedLeastConnectionWithReplication IPVSSchedulerMethod = "lblcr"
// SourceHashing assigns jobs to servers through looking up a statically assigned hash table
// by their source IP addresses.
SourceHashing IPVSSchedulerMethod = "sh"
// DestinationHashing assigns jobs to servers through looking up a statically assigned hash table
// by their destination IP addresses.
DestinationHashing IPVSSchedulerMethod = "dh"
// ShortestExpectedDelay assigns an incoming job to the server with the shortest expected delay.
// The expected delay that the job will experience is (Ci + 1) / Ui if sent to the ith server, in which
// Ci is the number of jobs on the ith server and Ui is the fixed service rate (weight) of the ith server.
ShortestExpectedDelay IPVSSchedulerMethod = "sed"
// NeverQueue assigns an incoming job to an idle server if there is, instead of waiting for a fast one;
// if all the servers are busy, it adopts the ShortestExpectedDelay policy to assign the job.
NeverQueue IPVSSchedulerMethod = "nq"
)
func (m *ProxyMode) Set(s string) error {
*m = ProxyMode(s)
return nil
}
func (m *ProxyMode) String() string {
if m != nil {
return string(*m)
}
return ""
}
func (m *ProxyMode) Type() string {
return "ProxyMode"
}
type ConfigurationMap map[string]string
func (m *ConfigurationMap) String() string {
pairs := []string{}
for k, v := range *m {
pairs = append(pairs, fmt.Sprintf("%s=%s", k, v))
}
sort.Strings(pairs)
return strings.Join(pairs, ",")
}
func (m *ConfigurationMap) Set(value string) error {
for _, s := range strings.Split(value, ",") {
if len(s) == 0 {
continue
}
arr := strings.SplitN(s, "=", 2)
if len(arr) == 2 {
(*m)[strings.TrimSpace(arr[0])] = strings.TrimSpace(arr[1])
} else {
(*m)[strings.TrimSpace(arr[0])] = ""
}
}
return nil
}
func (*ConfigurationMap) Type() string {
return "mapStringString"
}