blob: d1eddd8fee74f033b8a9cfddcad1b6d12b055bab [file] [log] [blame]
// Copyright Istio Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package capture
import (
"bufio"
"fmt"
"net"
"os"
"strings"
"time"
)
import (
"github.com/vishvananda/netlink"
"istio.io/pkg/log"
)
import (
"github.com/apache/dubbo-go-pixiu/tools/istio-iptables/pkg/builder"
"github.com/apache/dubbo-go-pixiu/tools/istio-iptables/pkg/config"
"github.com/apache/dubbo-go-pixiu/tools/istio-iptables/pkg/constants"
dep "github.com/apache/dubbo-go-pixiu/tools/istio-iptables/pkg/dependencies"
iptableslog "github.com/apache/dubbo-go-pixiu/tools/istio-iptables/pkg/log"
)
type Ops int
const (
// AppendOps performs append operations of rules
AppendOps Ops = iota
// DeleteOps performs delete operations of rules
DeleteOps
// In TPROXY mode, mark the packet from envoy outbound to app by podIP,
// this is to prevent it being intercepted to envoy inbound listener.
outboundMark = "1338"
)
var opsToString = map[Ops]string{
AppendOps: "-A",
DeleteOps: "-D",
}
type IptablesConfigurator struct {
iptables *builder.IptablesBuilder
// TODO(abhide): Fix dep.Dependencies with better interface
ext dep.Dependencies
cfg *config.Config
}
func NewIptablesConfigurator(cfg *config.Config, ext dep.Dependencies) *IptablesConfigurator {
return &IptablesConfigurator{
iptables: builder.NewIptablesBuilder(cfg),
ext: ext,
cfg: cfg,
}
}
type NetworkRange struct {
IsWildcard bool
IPNets []*net.IPNet
HasLoopBackIP bool
}
func split(s string) []string {
return config.Split(s)
}
func (cfg *IptablesConfigurator) separateV4V6(cidrList string) (NetworkRange, NetworkRange, error) {
if cidrList == "*" {
return NetworkRange{IsWildcard: true}, NetworkRange{IsWildcard: true}, nil
}
ipv6Ranges := NetworkRange{}
ipv4Ranges := NetworkRange{}
for _, ipRange := range split(cidrList) {
ip, ipNet, err := net.ParseCIDR(ipRange)
if err != nil {
_, err = fmt.Fprintf(os.Stderr, "Ignoring error for bug compatibility with istio-iptables: %s\n", err.Error())
if err != nil {
return ipv4Ranges, ipv6Ranges, err
}
continue
}
if ip.To4() != nil {
ipv4Ranges.IPNets = append(ipv4Ranges.IPNets, ipNet)
if ip.IsLoopback() {
ipv4Ranges.HasLoopBackIP = true
}
} else {
ipv6Ranges.IPNets = append(ipv6Ranges.IPNets, ipNet)
if ip.IsLoopback() {
ipv6Ranges.HasLoopBackIP = true
}
}
}
return ipv4Ranges, ipv6Ranges, nil
}
func (cfg *IptablesConfigurator) logConfig() {
// Dump out our environment for debugging purposes.
var b strings.Builder
b.WriteString(fmt.Sprintf("ENVOY_PORT=%s\n", os.Getenv("ENVOY_PORT")))
b.WriteString(fmt.Sprintf("INBOUND_CAPTURE_PORT=%s\n", os.Getenv("INBOUND_CAPTURE_PORT")))
b.WriteString(fmt.Sprintf("ISTIO_INBOUND_INTERCEPTION_MODE=%s\n", os.Getenv("ISTIO_INBOUND_INTERCEPTION_MODE")))
b.WriteString(fmt.Sprintf("ISTIO_INBOUND_TPROXY_ROUTE_TABLE=%s\n", os.Getenv("ISTIO_INBOUND_TPROXY_ROUTE_TABLE")))
b.WriteString(fmt.Sprintf("ISTIO_INBOUND_PORTS=%s\n", os.Getenv("ISTIO_INBOUND_PORTS")))
b.WriteString(fmt.Sprintf("ISTIO_OUTBOUND_PORTS=%s\n", os.Getenv("ISTIO_OUTBOUND_PORTS")))
b.WriteString(fmt.Sprintf("ISTIO_LOCAL_EXCLUDE_PORTS=%s\n", os.Getenv("ISTIO_LOCAL_EXCLUDE_PORTS")))
b.WriteString(fmt.Sprintf("ISTIO_EXCLUDE_INTERFACES=%s\n", os.Getenv("ISTIO_EXCLUDE_INTERFACES")))
b.WriteString(fmt.Sprintf("ISTIO_SERVICE_CIDR=%s\n", os.Getenv("ISTIO_SERVICE_CIDR")))
b.WriteString(fmt.Sprintf("ISTIO_SERVICE_EXCLUDE_CIDR=%s\n", os.Getenv("ISTIO_SERVICE_EXCLUDE_CIDR")))
b.WriteString(fmt.Sprintf("ISTIO_META_DNS_CAPTURE=%s\n", os.Getenv("ISTIO_META_DNS_CAPTURE")))
b.WriteString(fmt.Sprintf("INVALID_DROP=%s\n", os.Getenv("INVALID_DROP")))
log.Infof("Istio iptables environment:\n%s", b.String())
cfg.cfg.Print()
}
func (cfg *IptablesConfigurator) handleInboundPortsInclude() {
// Handling of inbound ports. Traffic will be redirected to Envoy, which will process and forward
// to the local service. If not set, no inbound port will be intercepted by istio iptablesOrFail.
var table string
if cfg.cfg.InboundPortsInclude != "" {
if cfg.cfg.InboundInterceptionMode == constants.TPROXY {
// When using TPROXY, create a new chain for routing all inbound traffic to
// Envoy. Any packet entering this chain gets marked with the ${INBOUND_TPROXY_MARK} mark,
// so that they get routed to the loopback interface in order to get redirected to Envoy.
// In the ISTIOINBOUND chain, '-j ISTIODIVERT' reroutes to the loopback
// interface.
// Mark all inbound packets.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIODIVERT, constants.MANGLE, "-j", constants.MARK, "--set-mark",
cfg.cfg.InboundTProxyMark)
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIODIVERT, constants.MANGLE, "-j", constants.ACCEPT)
// Create a new chain for redirecting inbound traffic to the common Envoy
// port.
// In the ISTIOINBOUND chain, '-j RETURN' bypasses Envoy and
// '-j ISTIOTPROXY' redirects to Envoy.
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand,
constants.ISTIOTPROXY, constants.MANGLE, "!", "-d", constants.IPVersionSpecific,
"-p", constants.TCP, "-j", constants.TPROXY,
"--tproxy-mark", cfg.cfg.InboundTProxyMark+"/0xffffffff", "--on-port", cfg.cfg.InboundCapturePort)
table = constants.MANGLE
} else {
table = constants.NAT
}
cfg.iptables.AppendRule(iptableslog.JumpInbound, constants.PREROUTING, table, "-p", constants.TCP,
"-j", constants.ISTIOINBOUND)
if cfg.cfg.InboundPortsInclude == "*" {
// Apply any user-specified port exclusions.
if cfg.cfg.InboundPortsExclude != "" {
for _, port := range split(cfg.cfg.InboundPortsExclude) {
cfg.iptables.AppendRule(iptableslog.ExcludeInboundPort, constants.ISTIOINBOUND, table, "-p", constants.TCP,
"--dport", port, "-j", constants.RETURN)
}
}
// Redirect remaining inbound traffic to Envoy.
if cfg.cfg.InboundInterceptionMode == constants.TPROXY {
// If an inbound packet belongs to an established socket, route it to the
// loopback interface.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.MANGLE, "-p", constants.TCP,
"-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", constants.ISTIODIVERT)
// Otherwise, it's a new connection. Redirect it using TPROXY.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.MANGLE, "-p", constants.TCP,
"-j", constants.ISTIOTPROXY)
} else {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.NAT, "-p", constants.TCP,
"-j", constants.ISTIOINREDIRECT)
}
} else {
// User has specified a non-empty list of ports to be redirected to Envoy.
for _, port := range split(cfg.cfg.InboundPortsInclude) {
if cfg.cfg.InboundInterceptionMode == constants.TPROXY {
cfg.iptables.AppendRule(iptableslog.IncludeInboundPort, constants.ISTIOINBOUND, constants.MANGLE, "-p", constants.TCP,
"--dport", port, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", constants.ISTIODIVERT)
cfg.iptables.AppendRule(iptableslog.IncludeInboundPort,
constants.ISTIOINBOUND, constants.MANGLE, "-p", constants.TCP, "--dport", port, "-j", constants.ISTIOTPROXY)
} else {
cfg.iptables.AppendRule(iptableslog.IncludeInboundPort,
constants.ISTIOINBOUND, constants.NAT, "-p", constants.TCP, "--dport", port, "-j", constants.ISTIOINREDIRECT)
}
}
}
}
}
func (cfg *IptablesConfigurator) handleOutboundIncludeRules(
rangeInclude NetworkRange,
appendRule func(command iptableslog.Command, chain string, table string, params ...string) *builder.IptablesBuilder,
insert func(command iptableslog.Command, chain string, table string, position int, params ...string) *builder.IptablesBuilder) {
// Apply outbound IP inclusions.
if rangeInclude.IsWildcard {
// Wildcard specified. Redirect all remaining outbound traffic to Envoy.
appendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-j", constants.ISTIOREDIRECT)
for _, internalInterface := range split(cfg.cfg.KubeVirtInterfaces) {
insert(iptableslog.KubevirtCommand,
constants.PREROUTING, constants.NAT, 1, "-i", internalInterface, "-j", constants.ISTIOREDIRECT)
}
} else if len(rangeInclude.IPNets) > 0 {
// User has specified a non-empty list of cidrs to be redirected to Envoy.
for _, cidr := range rangeInclude.IPNets {
for _, internalInterface := range split(cfg.cfg.KubeVirtInterfaces) {
insert(iptableslog.KubevirtCommand, constants.PREROUTING, constants.NAT, 1, "-i", internalInterface,
"-d", cidr.String(), "-j", constants.ISTIOREDIRECT)
}
appendRule(iptableslog.UndefinedCommand,
constants.ISTIOOUTPUT, constants.NAT, "-d", cidr.String(), "-j", constants.ISTIOREDIRECT)
}
// All other traffic is not redirected.
appendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-j", constants.RETURN)
}
}
func (cfg *IptablesConfigurator) shortCircuitKubeInternalInterface() {
for _, internalInterface := range split(cfg.cfg.KubeVirtInterfaces) {
cfg.iptables.InsertRule(iptableslog.KubevirtCommand, constants.PREROUTING, constants.NAT, 1, "-i", internalInterface, "-j", constants.RETURN)
}
}
func (cfg *IptablesConfigurator) shortCircuitExcludeInterfaces() {
for _, excludeInterface := range split(cfg.cfg.ExcludeInterfaces) {
cfg.iptables.AppendRule(
iptableslog.ExcludeInterfaceCommand, constants.PREROUTING, constants.NAT, "-i", excludeInterface, "-j", constants.RETURN)
cfg.iptables.AppendRule(iptableslog.ExcludeInterfaceCommand, constants.OUTPUT, constants.NAT, "-o", excludeInterface, "-j", constants.RETURN)
}
if cfg.cfg.InboundInterceptionMode == constants.TPROXY {
for _, excludeInterface := range split(cfg.cfg.ExcludeInterfaces) {
cfg.iptables.AppendRule(
iptableslog.ExcludeInterfaceCommand, constants.PREROUTING, constants.MANGLE, "-i", excludeInterface, "-j", constants.RETURN)
cfg.iptables.AppendRule(iptableslog.ExcludeInterfaceCommand, constants.OUTPUT, constants.MANGLE, "-o", excludeInterface, "-j", constants.RETURN)
}
}
}
func ignoreExists(err error) error {
if err == nil {
return nil
}
if strings.Contains(strings.ToLower(err.Error()), "file exists") {
return nil
}
return err
}
func SplitV4V6(ips []string) (ipv4 []string, ipv6 []string) {
for _, i := range ips {
parsed := net.ParseIP(i)
if parsed.To4() != nil {
ipv4 = append(ipv4, i)
} else {
ipv6 = append(ipv6, i)
}
}
return
}
func ConfigureRoutes(cfg *config.Config, ext dep.Dependencies) error {
if cfg.DryRun {
log.Infof("skipping configuring routes due to dry run mode")
return nil
}
if ext != nil && cfg.CNIMode {
command := os.Args[0]
return ext.Run(command, constants.CommandConfigureRoutes)
}
if err := configureIPv6Addresses(cfg); err != nil {
return err
}
if err := configureTProxyRoutes(cfg); err != nil {
return err
}
return nil
}
// configureIPv6Addresses sets up a new IP address on local interface. This is used as the source IP
// for inbound traffic to distinguish traffic we want to capture vs traffic we do not. This is needed
// for IPv6 but not IPv4, as IPv4 defaults to `netmask 255.0.0.0`, which allows binding to addresses
// in the 127.x.y.z range, while IPv6 defaults to `prefixlen 128` which allows binding only to ::1.
// Equivalent to `ip -6 addr add "::6/128" dev lo`
func configureIPv6Addresses(cfg *config.Config) error {
if !cfg.EnableInboundIPv6 {
return nil
}
link, err := netlink.LinkByName("lo")
if err != nil {
return fmt.Errorf("failed to find 'lo' link: %v", err)
}
// Setup a new IP address on local interface. This is used as the source IP for inbound traffic
// to distinguish traffic we want to capture vs traffic we do not.
// Equivalent to `ip -6 addr add "::6/128" dev lo`
address := &net.IPNet{IP: net.ParseIP("::6"), Mask: net.CIDRMask(128, 128)}
addr := &netlink.Addr{IPNet: address}
err = netlink.AddrAdd(link, addr)
if ignoreExists(err) != nil {
return fmt.Errorf("failed to add IPv6 inbound address: %v", err)
}
log.Infof("Added ::6 address")
return nil
}
func (cfg *IptablesConfigurator) Run() {
defer func() {
// Best effort since we don't know if the commands exist
_ = cfg.ext.Run(constants.IPTABLESSAVE)
if cfg.cfg.EnableInboundIPv6 {
_ = cfg.ext.Run(constants.IP6TABLESSAVE)
}
}()
// Since OUTBOUND_IP_RANGES_EXCLUDE could carry ipv4 and ipv6 ranges
// need to split them in different arrays one for ipv4 and one for ipv6
// in order to not to fail
ipv4RangesExclude, ipv6RangesExclude, err := cfg.separateV4V6(cfg.cfg.OutboundIPRangesExclude)
if err != nil {
panic(err)
}
if ipv4RangesExclude.IsWildcard {
panic("Invalid value for OUTBOUND_IP_RANGES_EXCLUDE")
}
// FixMe: Do we need similar check for ipv6RangesExclude as well ??
ipv4RangesInclude, ipv6RangesInclude, err := cfg.separateV4V6(cfg.cfg.OutboundIPRangesInclude)
if err != nil {
panic(err)
}
redirectDNS := cfg.cfg.RedirectDNS
cfg.logConfig()
cfg.shortCircuitExcludeInterfaces()
// Do not capture internal interface.
cfg.shortCircuitKubeInternalInterface()
// Create a rule for invalid drop in PREROUTING chain in mangle table, so the iptables will drop the out of window packets instead of reset connection .
dropInvalid := cfg.cfg.DropInvalid
if dropInvalid {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.PREROUTING, constants.MANGLE, "-m", "conntrack", "--ctstate",
"INVALID", "-j", constants.DROP)
}
// Create a new chain for to hit tunnel port directly. Envoy will be listening on port acting as VPN tunnel.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.NAT, "-p", constants.TCP, "--dport",
cfg.cfg.InboundTunnelPort, "-j", constants.RETURN)
// Create a new chain for redirecting outbound traffic to the common Envoy port.
// In both chains, '-j RETURN' bypasses Envoy and '-j ISTIOREDIRECT'
// redirects to Envoy.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand,
constants.ISTIOREDIRECT, constants.NAT, "-p", constants.TCP, "-j", constants.REDIRECT, "--to-ports", cfg.cfg.ProxyPort)
// Use this chain also for redirecting inbound traffic to the common Envoy port
// when not using TPROXY.
cfg.iptables.AppendRule(iptableslog.InboundCapture, constants.ISTIOINREDIRECT, constants.NAT, "-p", constants.TCP, "-j", constants.REDIRECT,
"--to-ports", cfg.cfg.InboundCapturePort)
cfg.handleInboundPortsInclude()
// TODO: change the default behavior to not intercept any output - user may use http_proxy or another
// iptablesOrFail wrapper (like ufw). Current default is similar with 0.1
// Jump to the ISTIOOUTPUT chain from OUTPUT chain for all tcp traffic, and UDP dns (if enabled)
cfg.iptables.AppendRule(iptableslog.JumpOutbound, constants.OUTPUT, constants.NAT, "-p", constants.TCP, "-j", constants.ISTIOOUTPUT)
// Apply port based exclusions. Must be applied before connections back to self are redirected.
if cfg.cfg.OutboundPortsExclude != "" {
for _, port := range split(cfg.cfg.OutboundPortsExclude) {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-p", constants.TCP,
"--dport", port, "-j", constants.RETURN)
}
}
// 127.0.0.6/::7 is bind connect from inbound passthrough cluster
cfg.iptables.AppendVersionedRule("127.0.0.6/32", "::6/128", iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "-s", constants.IPVersionSpecific, "-j", constants.RETURN)
for _, uid := range split(cfg.cfg.ProxyUID) {
// Redirect app calls back to itself via Envoy when using the service VIP
// e.g. appN => Envoy (client) => Envoy (server) => appN.
// nolint: lll
if redirectDNS {
// When DNS is enabled, we skip this for port 53. This ensures we do not have:
// app => istio-agent => Envoy inbound => dns server
// Instead, we just have:
// app => istio-agent => dns server
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "!", "-d", constants.IPVersionSpecific,
"-p", "tcp", "!", "--dport", "53",
"-m", "owner", "--uid-owner", uid, "-j", constants.ISTIOINREDIRECT)
} else {
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "!", "-d", constants.IPVersionSpecific,
"-m", "owner", "--uid-owner", uid, "-j", constants.ISTIOINREDIRECT)
}
// Do not redirect app calls to back itself via Envoy when using the endpoint address
// e.g. appN => appN by lo
// If loopback explicitly set via OutboundIPRangesInclude, then don't return.
if !ipv4RangesInclude.HasLoopBackIP && !ipv6RangesInclude.HasLoopBackIP {
if redirectDNS {
// Users may have a DNS server that is on localhost. In these cases, applications may
// send TCP traffic to the DNS server that we actually *do* want to intercept. To
// handle this case, we exclude port 53 from this rule. Note: We cannot just move the
// port 53 redirection rule further up the list, as we will want to avoid capturing
// DNS requests from the proxy UID/GID
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-o", "lo", "-p", "tcp",
"!", "--dport", "53",
"-m", "owner", "!", "--uid-owner", uid, "-j", constants.RETURN)
} else {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "-m", "owner", "!", "--uid-owner", uid, "-j", constants.RETURN)
}
}
// Avoid infinite loops. Don't redirect Envoy traffic directly back to
// Envoy for non-loopback traffic.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-m", "owner", "--uid-owner", uid, "-j", constants.RETURN)
}
for _, gid := range split(cfg.cfg.ProxyGID) {
// Redirect app calls back to itself via Envoy when using the service VIP
// e.g. appN => Envoy (client) => Envoy (server) => appN.
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "!", "-d", constants.IPVersionSpecific,
"-m", "owner", "--gid-owner", gid, "-j", constants.ISTIOINREDIRECT)
// Do not redirect app calls to back itself via Envoy when using the endpoint address
// e.g. appN => appN by lo
// If loopback explicitly set via OutboundIPRangesInclude, then don't return.
if !ipv4RangesInclude.HasLoopBackIP && !ipv6RangesInclude.HasLoopBackIP {
if redirectDNS {
// Users may have a DNS server that is on localhost. In these cases, applications may
// send TCP traffic to the DNS server that we actually *do* want to intercept. To
// handle this case, we exclude port 53 from this rule. Note: We cannot just move the
// port 53 redirection rule further up the list, as we will want to avoid capturing
// DNS requests from the proxy UID/GID
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "-p", "tcp",
"!", "--dport", "53",
"-m", "owner", "!", "--gid-owner", gid, "-j", constants.RETURN)
} else {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-o", "lo", "-m", "owner", "!", "--gid-owner", gid, "-j", constants.RETURN)
}
}
// Avoid infinite loops. Don't redirect Envoy traffic directly back to
// Envoy for non-loopback traffic.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-m", "owner", "--gid-owner", gid, "-j", constants.RETURN)
}
ownerGroupsFilter := config.ParseInterceptFilter(cfg.cfg.OwnerGroupsInclude, cfg.cfg.OwnerGroupsExclude)
cfg.handleCaptureByOwnerGroup(ownerGroupsFilter)
if redirectDNS {
if cfg.cfg.CaptureAllDNS {
// Redirect all TCP dns traffic on port 53 to the agent on port 15053
// This will be useful for the CNI case where pod DNS server address cannot be decided.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand,
constants.ISTIOOUTPUT, constants.NAT,
"-p", constants.TCP,
"--dport", "53",
"-j", constants.REDIRECT,
"--to-ports", constants.IstioAgentDNSListenerPort)
} else {
for _, s := range cfg.cfg.DNSServersV4 {
// redirect all TCP dns traffic on port 53 to the agent on port 15053 for all servers
// in etc/resolv.conf
// We avoid redirecting all IP ranges to avoid infinite loops when there are local DNS proxies
// such as: app -> istio dns server -> dnsmasq -> upstream
// This ensures that we do not get requests from dnsmasq sent back to the agent dns server in a loop.
// Note: If a user somehow configured etc/resolv.conf to point to dnsmasq and server X, and dnsmasq also
// pointed to server X, this would not work. However, the assumption is that is not a common case.
cfg.iptables.AppendRuleV4(iptableslog.UndefinedCommand,
constants.ISTIOOUTPUT, constants.NAT,
"-p", constants.TCP,
"--dport", "53",
"-d", s+"/32",
"-j", constants.REDIRECT,
"--to-ports", constants.IstioAgentDNSListenerPort)
}
for _, s := range cfg.cfg.DNSServersV6 {
cfg.iptables.AppendRuleV6(iptableslog.UndefinedCommand,
constants.ISTIOOUTPUT, constants.NAT,
"-p", constants.TCP,
"--dport", "53",
"-d", s+"/128",
"-j", constants.REDIRECT,
"--to-ports", constants.IstioAgentDNSListenerPort)
}
}
}
// Skip redirection for Envoy-aware applications and
// container-to-container traffic both of which explicitly use
// localhost.
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-d", constants.IPVersionSpecific, "-j", constants.RETURN)
// Apply outbound IPv4 exclusions. Must be applied before inclusions.
for _, cidr := range ipv4RangesExclude.IPNets {
cfg.iptables.AppendRuleV4(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-d", cidr.String(), "-j", constants.RETURN)
}
for _, cidr := range ipv6RangesExclude.IPNets {
cfg.iptables.AppendRuleV6(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT, "-d", cidr.String(), "-j", constants.RETURN)
}
cfg.handleOutboundPortsInclude()
cfg.handleOutboundIncludeRules(ipv4RangesInclude, cfg.iptables.AppendRuleV4, cfg.iptables.InsertRuleV4)
cfg.handleOutboundIncludeRules(ipv6RangesInclude, cfg.iptables.AppendRuleV6, cfg.iptables.InsertRuleV6)
if redirectDNS {
HandleDNSUDP(
AppendOps, cfg.iptables, cfg.ext, "",
cfg.cfg.ProxyUID, cfg.cfg.ProxyGID,
cfg.cfg.DNSServersV4, cfg.cfg.DNSServersV6, cfg.cfg.CaptureAllDNS,
ownerGroupsFilter)
}
if cfg.cfg.InboundInterceptionMode == constants.TPROXY {
// save packet mark set by envoy.filters.listener.original_src as connection mark
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.PREROUTING, constants.MANGLE,
"-p", constants.TCP, "-m", "mark", "--mark", cfg.cfg.InboundTProxyMark, "-j", "CONNMARK", "--save-mark")
// If the packet is already marked with 1337, then return. This is to prevent mark envoy --> app traffic again.
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.OUTPUT, constants.MANGLE,
"-p", constants.TCP, "-o", "lo", "-m", "mark", "--mark", cfg.cfg.InboundTProxyMark, "-j", constants.RETURN)
for _, uid := range split(cfg.cfg.ProxyUID) {
// mark outgoing packets from envoy to workload by pod ip
// app call VIP --> envoy outbound -(mark 1338)-> envoy inbound --> app
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand, constants.OUTPUT, constants.MANGLE,
"!", "-d", constants.IPVersionSpecific, "-p", constants.TCP, "-o", "lo",
"-m", "owner", "--uid-owner", uid, "-j", constants.MARK, "--set-mark", outboundMark)
}
for _, gid := range split(cfg.cfg.ProxyGID) {
// mark outgoing packets from envoy to workload by pod ip
// app call VIP --> envoy outbound -(mark 1338)-> envoy inbound --> app
cfg.iptables.AppendVersionedRule("127.0.0.1/32", "::1/128", iptableslog.UndefinedCommand, constants.OUTPUT, constants.MANGLE,
"!", "-d", constants.IPVersionSpecific, "-p", constants.TCP, "-o", "lo",
"-m", "owner", "--gid-owner", gid, "-j", constants.MARK, "--set-mark", outboundMark)
}
// mark outgoing packets from workload, match it to policy routing entry setup for TPROXY mode
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.OUTPUT, constants.MANGLE,
"-p", constants.TCP, "-m", "connmark", "--mark", cfg.cfg.InboundTProxyMark, "-j", "CONNMARK", "--restore-mark")
// prevent infinite redirect
cfg.iptables.InsertRule(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.MANGLE, 1,
"-p", constants.TCP, "-m", "mark", "--mark", cfg.cfg.InboundTProxyMark, "-j", constants.RETURN)
// prevent intercept traffic from envoy/pilot-agent ==> app by 127.0.0.6 --> podip
cfg.iptables.InsertRuleV4(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.MANGLE, 2,
"-p", constants.TCP, "-s", "127.0.0.6/32", "-i", "lo", "-j", constants.RETURN)
cfg.iptables.InsertRuleV6(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.MANGLE, 2,
"-p", constants.TCP, "-s", "::6/128", "-i", "lo", "-j", constants.RETURN)
// prevent intercept traffic from app ==> app by pod ip
cfg.iptables.InsertRule(iptableslog.UndefinedCommand, constants.ISTIOINBOUND, constants.MANGLE, 3,
"-p", constants.TCP, "-i", "lo", "-m", "mark", "!", "--mark", outboundMark, "-j", constants.RETURN)
}
cfg.executeCommands()
}
type UDPRuleApplier struct {
iptables *builder.IptablesBuilder
ext dep.Dependencies
ops Ops
table string
chain string
cmd string
}
func (f UDPRuleApplier) RunV4(args ...string) {
switch f.ops {
case AppendOps:
f.iptables.AppendRuleV4(iptableslog.UndefinedCommand, f.chain, f.table, args...)
case DeleteOps:
deleteArgs := []string{"-t", f.table, opsToString[f.ops], f.chain}
deleteArgs = append(deleteArgs, args...)
f.ext.RunQuietlyAndIgnore(f.cmd, deleteArgs...)
}
}
func (f UDPRuleApplier) RunV6(args ...string) {
switch f.ops {
case AppendOps:
f.iptables.AppendRuleV6(iptableslog.UndefinedCommand, f.chain, f.table, args...)
case DeleteOps:
deleteArgs := []string{"-t", f.table, opsToString[f.ops], f.chain}
deleteArgs = append(deleteArgs, args...)
f.ext.RunQuietlyAndIgnore(f.cmd, deleteArgs...)
}
}
func (f UDPRuleApplier) Run(args ...string) {
f.RunV4(args...)
f.RunV6(args...)
}
func (f UDPRuleApplier) WithChain(chain string) UDPRuleApplier {
f.chain = chain
return f
}
func (f UDPRuleApplier) WithTable(table string) UDPRuleApplier {
f.table = table
return f
}
// HandleDNSUDP is a helper function to tackle with DNS UDP specific operations.
// This helps the creation logic of DNS UDP rules in sync with the deletion.
func HandleDNSUDP(
ops Ops, iptables *builder.IptablesBuilder, ext dep.Dependencies,
cmd, proxyUID, proxyGID string, dnsServersV4 []string, dnsServersV6 []string, captureAllDNS bool,
ownerGroupsFilter config.InterceptFilter) {
f := UDPRuleApplier{
iptables: iptables,
ext: ext,
ops: ops,
table: constants.NAT,
chain: constants.OUTPUT,
cmd: cmd,
}
// Make sure that upstream DNS requests from agent/envoy dont get captured.
// TODO: add ip6 as well
for _, uid := range split(proxyUID) {
f.Run("-p", "udp", "--dport", "53", "-m", "owner", "--uid-owner", uid, "-j", constants.RETURN)
}
for _, gid := range split(proxyGID) {
f.Run("-p", "udp", "--dport", "53", "-m", "owner", "--gid-owner", gid, "-j", constants.RETURN)
}
if ownerGroupsFilter.Except {
for _, group := range ownerGroupsFilter.Values {
f.Run("-p", "udp", "--dport", "53", "-m", "owner", "--gid-owner", group, "-j", constants.RETURN)
}
} else {
groupIsNoneOf := CombineMatchers(ownerGroupsFilter.Values, func(group string) []string {
return []string{"-m", "owner", "!", "--gid-owner", group}
})
f.Run(Flatten([]string{"-p", "udp", "--dport", "53"}, groupIsNoneOf, []string{"-j", constants.RETURN})...)
}
if captureAllDNS {
// Redirect all TCP dns traffic on port 53 to the agent on port 15053
// This will be useful for the CNI case where pod DNS server address cannot be decided.
f.Run("-p", "udp", "--dport", "53", "-j", constants.REDIRECT, "--to-port", constants.IstioAgentDNSListenerPort)
} else {
// redirect all TCP dns traffic on port 53 to the agent on port 15053 for all servers
// in etc/resolv.conf
// We avoid redirecting all IP ranges to avoid infinite loops when there are local DNS proxies
// such as: app -> istio dns server -> dnsmasq -> upstream
// This ensures that we do not get requests from dnsmasq sent back to the agent dns server in a loop.
// Note: If a user somehow configured etc/resolv.conf to point to dnsmasq and server X, and dnsmasq also
// pointed to server X, this would not work. However, the assumption is that is not a common case.
for _, s := range dnsServersV4 {
f.RunV4("-p", "udp", "--dport", "53", "-d", s+"/32",
"-j", constants.REDIRECT, "--to-port", constants.IstioAgentDNSListenerPort)
}
for _, s := range dnsServersV6 {
f.RunV6("-p", "udp", "--dport", "53", "-d", s+"/128",
"-j", constants.REDIRECT, "--to-port", constants.IstioAgentDNSListenerPort)
}
}
// Split UDP DNS traffic to separate conntrack zones
addConntrackZoneDNSUDP(f.WithTable(constants.RAW), proxyUID, proxyGID, dnsServersV4, dnsServersV6, captureAllDNS)
}
// addConntrackZoneDNSUDP is a helper function to add iptables rules to split DNS traffic
// in two separate conntrack zones to avoid issues with UDP conntrack race conditions.
// Traffic that goes from istio to DNS servers and vice versa are zone 1 and traffic from
// DNS client to istio and vice versa goes to zone 2
func addConntrackZoneDNSUDP(
f UDPRuleApplier, proxyUID, proxyGID string, dnsServersV4 []string, dnsServersV6 []string, captureAllDNS bool) {
// TODO: add ip6 as well
for _, uid := range split(proxyUID) {
// Packets with dst port 53 from istio to zone 1. These are Istio calls to upstream resolvers
f.Run("-p", "udp", "--dport", "53", "-m", "owner", "--uid-owner", uid, "-j", constants.CT, "--zone", "1")
// Packets with src port 15053 from istio to zone 2. These are Istio response packets to application clients
f.Run("-p", "udp", "--sport", "15053", "-m", "owner", "--uid-owner", uid, "-j", constants.CT, "--zone", "2")
}
for _, gid := range split(proxyGID) {
// Packets with dst port 53 from istio to zone 1. These are Istio calls to upstream resolvers
f.Run("-p", "udp", "--dport", "53", "-m", "owner", "--gid-owner", gid, "-j", constants.CT, "--zone", "1")
// Packets with src port 15053 from istio to zone 2. These are Istio response packets to application clients
f.Run("-p", "udp", "--sport", "15053", "-m", "owner", "--gid-owner", gid, "-j", constants.CT, "--zone", "2")
}
if captureAllDNS {
// Not specifying destination address is useful for the CNI case where pod DNS server address cannot be decided.
// Mark all UDP dns traffic with dst port 53 as zone 2. These are application client packets towards DNS resolvers.
f.Run("-p", "udp", "--dport", "53",
"-j", constants.CT, "--zone", "2")
// Mark all UDP dns traffic with src port 53 as zone 1. These are response packets from the DNS resolvers.
f.WithChain(constants.PREROUTING).Run("-p", "udp", "--sport", "53",
"-j", constants.CT, "--zone", "1")
} else {
// Go through all DNS servers in etc/resolv.conf and mark the packets based on these destination addresses.
for _, s := range dnsServersV4 {
// Mark all UDP dns traffic with dst port 53 as zone 2. These are application client packets towards DNS resolvers.
f.RunV4("-p", "udp", "--dport", "53", "-d", s+"/32",
"-j", constants.CT, "--zone", "2")
// Mark all UDP dns traffic with src port 53 as zone 1. These are response packets from the DNS resolvers.
f.WithChain(constants.PREROUTING).RunV4("-p", "udp", "--sport", "53", "-d", s+"/32",
"-j", constants.CT, "--zone", "1")
}
for _, s := range dnsServersV6 {
// Mark all UDP dns traffic with dst port 53 as zone 2. These are application client packets towards DNS resolvers.
f.RunV6("-p", "udp", "--dport", "53", "-d", s+"/128",
"-j", constants.CT, "--zone", "2")
// Mark all UDP dns traffic with src port 53 as zone 1. These are response packets from the DNS resolvers.
f.WithChain(constants.PREROUTING).RunV6("-p", "udp", "--sport", "53", "-d", s+"/128",
"-j", constants.CT, "--zone", "1")
}
}
}
func (cfg *IptablesConfigurator) handleOutboundPortsInclude() {
if cfg.cfg.OutboundPortsInclude != "" {
for _, port := range split(cfg.cfg.OutboundPortsInclude) {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand,
constants.ISTIOOUTPUT, constants.NAT, "-p", constants.TCP, "--dport", port, "-j", constants.ISTIOREDIRECT)
}
}
}
func (cfg *IptablesConfigurator) handleCaptureByOwnerGroup(filter config.InterceptFilter) {
if filter.Except {
for _, group := range filter.Values {
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
"-m", "owner", "--gid-owner", group, "-j", constants.RETURN)
}
} else {
groupIsNoneOf := CombineMatchers(filter.Values, func(group string) []string {
return []string{"-m", "owner", "!", "--gid-owner", group}
})
cfg.iptables.AppendRule(iptableslog.UndefinedCommand, constants.ISTIOOUTPUT, constants.NAT,
append(groupIsNoneOf, "-j", constants.RETURN)...)
}
}
func (cfg *IptablesConfigurator) createRulesFile(f *os.File, contents string) error {
defer f.Close()
log.Infof("Writing following contents to rules file: %v\n%v", f.Name(), strings.TrimSpace(contents))
writer := bufio.NewWriter(f)
_, err := writer.WriteString(contents)
if err != nil {
return fmt.Errorf("unable to write iptables-restore file: %v", err)
}
err = writer.Flush()
return err
}
func (cfg *IptablesConfigurator) executeIptablesCommands(commands [][]string) {
for _, cmd := range commands {
if len(cmd) > 1 {
cfg.ext.RunOrFail(cmd[0], cmd[1:]...)
} else {
cfg.ext.RunOrFail(cmd[0])
}
}
}
func (cfg *IptablesConfigurator) executeIptablesRestoreCommand(isIpv4 bool) error {
var data, filename, cmd string
if isIpv4 {
data = cfg.iptables.BuildV4Restore()
filename = fmt.Sprintf("iptables-rules-%d.txt", time.Now().UnixNano())
cmd = constants.IPTABLESRESTORE
} else {
data = cfg.iptables.BuildV6Restore()
filename = fmt.Sprintf("ip6tables-rules-%d.txt", time.Now().UnixNano())
cmd = constants.IP6TABLESRESTORE
}
var rulesFile *os.File
var err error
if cfg.cfg.OutputPath != "" {
// Print the iptables rules into the given output file.
rulesFile, err = os.OpenFile(cfg.cfg.OutputPath, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return fmt.Errorf("unable to open iptables rules output file %v: %v", cfg.cfg.OutputPath, err)
}
} else {
// Otherwise create a temporary file to write iptables rules to, which will be cleaned up at the end.
rulesFile, err = os.CreateTemp("", filename)
if err != nil {
return fmt.Errorf("unable to create iptables-restore file: %v", err)
}
defer os.Remove(rulesFile.Name())
}
if err := cfg.createRulesFile(rulesFile, data); err != nil {
return err
}
// --noflush to prevent flushing/deleting previous contents from table
cfg.ext.RunOrFail(cmd, "--noflush", rulesFile.Name())
return nil
}
func (cfg *IptablesConfigurator) executeCommands() {
if cfg.cfg.RestoreFormat {
// Execute iptables-restore
err := cfg.executeIptablesRestoreCommand(true)
if err != nil {
log.Errorf("Failed to execute iptables-restore command: %v", err)
os.Exit(1)
}
// Execute ip6tables-restore
err = cfg.executeIptablesRestoreCommand(false)
if err != nil {
log.Errorf("Failed to execute iptables-restore command: %v", err)
os.Exit(1)
}
} else {
// Execute iptables commands
cfg.executeIptablesCommands(cfg.iptables.BuildV4())
// Execute ip6tables commands
cfg.executeIptablesCommands(cfg.iptables.BuildV6())
}
}