| /* |
| Copyright 2016 The Kubernetes Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package winuserspace |
| |
| import ( |
| "fmt" |
| "io" |
| "net" |
| "strconv" |
| "strings" |
| "sync" |
| "sync/atomic" |
| "time" |
| |
| "github.com/miekg/dns" |
| "k8s.io/api/core/v1" |
| "k8s.io/apimachinery/pkg/types" |
| "k8s.io/apimachinery/pkg/util/runtime" |
| "k8s.io/klog" |
| "k8s.io/kubernetes/pkg/proxy" |
| "k8s.io/kubernetes/pkg/util/ipconfig" |
| "k8s.io/utils/exec" |
| ) |
| |
| const ( |
| // Kubernetes DNS suffix search list |
| // TODO: Get DNS suffix search list from docker containers. |
| // --dns-search option doesn't work on Windows containers and has been |
| // fixed recently in docker. |
| |
| // Kubernetes cluster domain |
| clusterDomain = "cluster.local" |
| |
| // Kubernetes service domain |
| serviceDomain = "svc." + clusterDomain |
| |
| // Kubernetes default namespace domain |
| namespaceServiceDomain = "default." + serviceDomain |
| |
| // Kubernetes DNS service port name |
| dnsPortName = "dns" |
| |
| // DNS TYPE value A (a host address) |
| dnsTypeA uint16 = 0x01 |
| |
| // DNS TYPE value AAAA (a host IPv6 address) |
| dnsTypeAAAA uint16 = 0x1c |
| |
| // DNS CLASS value IN (the Internet) |
| dnsClassInternet uint16 = 0x01 |
| ) |
| |
| // Abstraction over TCP/UDP sockets which are proxied. |
| type proxySocket interface { |
| // Addr gets the net.Addr for a proxySocket. |
| Addr() net.Addr |
| // Close stops the proxySocket from accepting incoming connections. |
| // Each implementation should comment on the impact of calling Close |
| // while sessions are active. |
| Close() error |
| // ProxyLoop proxies incoming connections for the specified service to the service endpoints. |
| ProxyLoop(service ServicePortPortalName, info *serviceInfo, proxier *Proxier) |
| // ListenPort returns the host port that the proxySocket is listening on |
| ListenPort() int |
| } |
| |
| func newProxySocket(protocol v1.Protocol, ip net.IP, port int) (proxySocket, error) { |
| host := "" |
| if ip != nil { |
| host = ip.String() |
| } |
| |
| switch strings.ToUpper(string(protocol)) { |
| case "TCP": |
| listener, err := net.Listen("tcp", net.JoinHostPort(host, strconv.Itoa(port))) |
| if err != nil { |
| return nil, err |
| } |
| return &tcpProxySocket{Listener: listener, port: port}, nil |
| case "UDP": |
| addr, err := net.ResolveUDPAddr("udp", net.JoinHostPort(host, strconv.Itoa(port))) |
| if err != nil { |
| return nil, err |
| } |
| conn, err := net.ListenUDP("udp", addr) |
| if err != nil { |
| return nil, err |
| } |
| return &udpProxySocket{UDPConn: conn, port: port}, nil |
| case "SCTP": |
| return nil, fmt.Errorf("SCTP is not supported for user space proxy") |
| } |
| return nil, fmt.Errorf("unknown protocol %q", protocol) |
| } |
| |
| // How long we wait for a connection to a backend in seconds |
| var endpointDialTimeout = []time.Duration{250 * time.Millisecond, 500 * time.Millisecond, 1 * time.Second, 2 * time.Second} |
| |
| // tcpProxySocket implements proxySocket. Close() is implemented by net.Listener. When Close() is called, |
| // no new connections are allowed but existing connections are left untouched. |
| type tcpProxySocket struct { |
| net.Listener |
| port int |
| } |
| |
| func (tcp *tcpProxySocket) ListenPort() int { |
| return tcp.port |
| } |
| |
| func tryConnect(service ServicePortPortalName, srcAddr net.Addr, protocol string, proxier *Proxier) (out net.Conn, err error) { |
| sessionAffinityReset := false |
| for _, dialTimeout := range endpointDialTimeout { |
| servicePortName := proxy.ServicePortName{ |
| NamespacedName: types.NamespacedName{ |
| Namespace: service.Namespace, |
| Name: service.Name, |
| }, |
| Port: service.Port, |
| } |
| endpoint, err := proxier.loadBalancer.NextEndpoint(servicePortName, srcAddr, sessionAffinityReset) |
| if err != nil { |
| klog.Errorf("Couldn't find an endpoint for %s: %v", service, err) |
| return nil, err |
| } |
| klog.V(3).Infof("Mapped service %q to endpoint %s", service, endpoint) |
| // TODO: This could spin up a new goroutine to make the outbound connection, |
| // and keep accepting inbound traffic. |
| outConn, err := net.DialTimeout(protocol, endpoint, dialTimeout) |
| if err != nil { |
| if isTooManyFDsError(err) { |
| panic("Dial failed: " + err.Error()) |
| } |
| klog.Errorf("Dial failed: %v", err) |
| sessionAffinityReset = true |
| continue |
| } |
| return outConn, nil |
| } |
| return nil, fmt.Errorf("failed to connect to an endpoint.") |
| } |
| |
| func (tcp *tcpProxySocket) ProxyLoop(service ServicePortPortalName, myInfo *serviceInfo, proxier *Proxier) { |
| for { |
| if !myInfo.isAlive() { |
| // The service port was closed or replaced. |
| return |
| } |
| // Block until a connection is made. |
| inConn, err := tcp.Accept() |
| if err != nil { |
| if isTooManyFDsError(err) { |
| panic("Accept failed: " + err.Error()) |
| } |
| |
| if isClosedError(err) { |
| return |
| } |
| if !myInfo.isAlive() { |
| // Then the service port was just closed so the accept failure is to be expected. |
| return |
| } |
| klog.Errorf("Accept failed: %v", err) |
| continue |
| } |
| klog.V(3).Infof("Accepted TCP connection from %v to %v", inConn.RemoteAddr(), inConn.LocalAddr()) |
| outConn, err := tryConnect(service, inConn.(*net.TCPConn).RemoteAddr(), "tcp", proxier) |
| if err != nil { |
| klog.Errorf("Failed to connect to balancer: %v", err) |
| inConn.Close() |
| continue |
| } |
| // Spin up an async copy loop. |
| go proxyTCP(inConn.(*net.TCPConn), outConn.(*net.TCPConn)) |
| } |
| } |
| |
| // proxyTCP proxies data bi-directionally between in and out. |
| func proxyTCP(in, out *net.TCPConn) { |
| var wg sync.WaitGroup |
| wg.Add(2) |
| klog.V(4).Infof("Creating proxy between %v <-> %v <-> %v <-> %v", |
| in.RemoteAddr(), in.LocalAddr(), out.LocalAddr(), out.RemoteAddr()) |
| go copyBytes("from backend", in, out, &wg) |
| go copyBytes("to backend", out, in, &wg) |
| wg.Wait() |
| } |
| |
| func copyBytes(direction string, dest, src *net.TCPConn, wg *sync.WaitGroup) { |
| defer wg.Done() |
| klog.V(4).Infof("Copying %s: %s -> %s", direction, src.RemoteAddr(), dest.RemoteAddr()) |
| n, err := io.Copy(dest, src) |
| if err != nil { |
| if !isClosedError(err) { |
| klog.Errorf("I/O error: %v", err) |
| } |
| } |
| klog.V(4).Infof("Copied %d bytes %s: %s -> %s", n, direction, src.RemoteAddr(), dest.RemoteAddr()) |
| dest.Close() |
| src.Close() |
| } |
| |
| // udpProxySocket implements proxySocket. Close() is implemented by net.UDPConn. When Close() is called, |
| // no new connections are allowed and existing connections are broken. |
| // TODO: We could lame-duck this ourselves, if it becomes important. |
| type udpProxySocket struct { |
| *net.UDPConn |
| port int |
| } |
| |
| func (udp *udpProxySocket) ListenPort() int { |
| return udp.port |
| } |
| |
| func (udp *udpProxySocket) Addr() net.Addr { |
| return udp.LocalAddr() |
| } |
| |
| // Holds all the known UDP clients that have not timed out. |
| type clientCache struct { |
| mu sync.Mutex |
| clients map[string]net.Conn // addr string -> connection |
| } |
| |
| func newClientCache() *clientCache { |
| return &clientCache{clients: map[string]net.Conn{}} |
| } |
| |
| // DNS query client classified by address and QTYPE |
| type dnsClientQuery struct { |
| clientAddress string |
| dnsQType uint16 |
| } |
| |
| // Holds DNS client query, the value contains the index in DNS suffix search list, |
| // the original DNS message and length for the same client and QTYPE |
| type dnsClientCache struct { |
| mu sync.Mutex |
| clients map[dnsClientQuery]*dnsQueryState |
| } |
| |
| type dnsQueryState struct { |
| searchIndex int32 |
| msg *dns.Msg |
| } |
| |
| func newDNSClientCache() *dnsClientCache { |
| return &dnsClientCache{clients: map[dnsClientQuery]*dnsQueryState{}} |
| } |
| |
| func packetRequiresDNSSuffix(dnsType, dnsClass uint16) bool { |
| return (dnsType == dnsTypeA || dnsType == dnsTypeAAAA) && dnsClass == dnsClassInternet |
| } |
| |
| func isDNSService(portName string) bool { |
| return portName == dnsPortName |
| } |
| |
| func appendDNSSuffix(msg *dns.Msg, buffer []byte, length int, dnsSuffix string) (int, error) { |
| if msg == nil || len(msg.Question) == 0 { |
| return length, fmt.Errorf("DNS message parameter is invalid") |
| } |
| |
| // Save the original name since it will be reused for next iteration |
| origName := msg.Question[0].Name |
| if dnsSuffix != "" { |
| msg.Question[0].Name += dnsSuffix + "." |
| } |
| mbuf, err := msg.PackBuffer(buffer) |
| msg.Question[0].Name = origName |
| |
| if err != nil { |
| klog.Warningf("Unable to pack DNS packet. Error is: %v", err) |
| return length, err |
| } |
| |
| if &buffer[0] != &mbuf[0] { |
| return length, fmt.Errorf("Buffer is too small in packing DNS packet") |
| } |
| |
| return len(mbuf), nil |
| } |
| |
| func recoverDNSQuestion(origName string, msg *dns.Msg, buffer []byte, length int) (int, error) { |
| if msg == nil || len(msg.Question) == 0 { |
| return length, fmt.Errorf("DNS message parameter is invalid") |
| } |
| |
| if origName == msg.Question[0].Name { |
| return length, nil |
| } |
| |
| msg.Question[0].Name = origName |
| if len(msg.Answer) > 0 { |
| msg.Answer[0].Header().Name = origName |
| } |
| mbuf, err := msg.PackBuffer(buffer) |
| |
| if err != nil { |
| klog.Warningf("Unable to pack DNS packet. Error is: %v", err) |
| return length, err |
| } |
| |
| if &buffer[0] != &mbuf[0] { |
| return length, fmt.Errorf("Buffer is too small in packing DNS packet") |
| } |
| |
| return len(mbuf), nil |
| } |
| |
| func processUnpackedDNSQueryPacket( |
| dnsClients *dnsClientCache, |
| msg *dns.Msg, |
| host string, |
| dnsQType uint16, |
| buffer []byte, |
| length int, |
| dnsSearch []string) int { |
| if dnsSearch == nil || len(dnsSearch) == 0 { |
| klog.V(1).Infof("DNS search list is not initialized and is empty.") |
| return length |
| } |
| |
| // TODO: handle concurrent queries from a client |
| dnsClients.mu.Lock() |
| state, found := dnsClients.clients[dnsClientQuery{host, dnsQType}] |
| if !found { |
| state = &dnsQueryState{0, msg} |
| dnsClients.clients[dnsClientQuery{host, dnsQType}] = state |
| } |
| dnsClients.mu.Unlock() |
| |
| index := atomic.SwapInt32(&state.searchIndex, state.searchIndex+1) |
| // Also update message ID if the client retries due to previous query time out |
| state.msg.MsgHdr.Id = msg.MsgHdr.Id |
| |
| if index < 0 || index >= int32(len(dnsSearch)) { |
| klog.V(1).Infof("Search index %d is out of range.", index) |
| return length |
| } |
| |
| length, err := appendDNSSuffix(msg, buffer, length, dnsSearch[index]) |
| if err != nil { |
| klog.Errorf("Append DNS suffix failed: %v", err) |
| } |
| |
| return length |
| } |
| |
| func processUnpackedDNSResponsePacket( |
| svrConn net.Conn, |
| dnsClients *dnsClientCache, |
| msg *dns.Msg, |
| rcode int, |
| host string, |
| dnsQType uint16, |
| buffer []byte, |
| length int, |
| dnsSearch []string) (bool, int) { |
| var drop bool |
| var err error |
| if dnsSearch == nil || len(dnsSearch) == 0 { |
| klog.V(1).Infof("DNS search list is not initialized and is empty.") |
| return drop, length |
| } |
| |
| dnsClients.mu.Lock() |
| state, found := dnsClients.clients[dnsClientQuery{host, dnsQType}] |
| dnsClients.mu.Unlock() |
| |
| if found { |
| index := atomic.SwapInt32(&state.searchIndex, state.searchIndex+1) |
| if rcode != 0 && index >= 0 && index < int32(len(dnsSearch)) { |
| // If the response has failure and iteration through the search list has not |
| // reached the end, retry on behalf of the client using the original query message |
| drop = true |
| length, err = appendDNSSuffix(state.msg, buffer, length, dnsSearch[index]) |
| if err != nil { |
| klog.Errorf("Append DNS suffix failed: %v", err) |
| } |
| |
| _, err = svrConn.Write(buffer[0:length]) |
| if err != nil { |
| if !logTimeout(err) { |
| klog.Errorf("Write failed: %v", err) |
| } |
| } |
| } else { |
| length, err = recoverDNSQuestion(state.msg.Question[0].Name, msg, buffer, length) |
| if err != nil { |
| klog.Errorf("Recover DNS question failed: %v", err) |
| } |
| |
| dnsClients.mu.Lock() |
| delete(dnsClients.clients, dnsClientQuery{host, dnsQType}) |
| dnsClients.mu.Unlock() |
| } |
| } |
| |
| return drop, length |
| } |
| |
| func processDNSQueryPacket( |
| dnsClients *dnsClientCache, |
| cliAddr net.Addr, |
| buffer []byte, |
| length int, |
| dnsSearch []string) (int, error) { |
| msg := &dns.Msg{} |
| if err := msg.Unpack(buffer[:length]); err != nil { |
| klog.Warningf("Unable to unpack DNS packet. Error is: %v", err) |
| return length, err |
| } |
| |
| // Query - Response bit that specifies whether this message is a query (0) or a response (1). |
| if msg.MsgHdr.Response == true { |
| return length, fmt.Errorf("DNS packet should be a query message") |
| } |
| |
| // QDCOUNT |
| if len(msg.Question) != 1 { |
| klog.V(1).Infof("Number of entries in the question section of the DNS packet is: %d", len(msg.Question)) |
| klog.V(1).Infof("DNS suffix appending does not support more than one question.") |
| return length, nil |
| } |
| |
| // ANCOUNT, NSCOUNT, ARCOUNT |
| if len(msg.Answer) != 0 || len(msg.Ns) != 0 || len(msg.Extra) != 0 { |
| klog.V(1).Infof("DNS packet contains more than question section.") |
| return length, nil |
| } |
| |
| dnsQType := msg.Question[0].Qtype |
| dnsQClass := msg.Question[0].Qclass |
| if packetRequiresDNSSuffix(dnsQType, dnsQClass) { |
| host, _, err := net.SplitHostPort(cliAddr.String()) |
| if err != nil { |
| klog.V(1).Infof("Failed to get host from client address: %v", err) |
| host = cliAddr.String() |
| } |
| |
| length = processUnpackedDNSQueryPacket(dnsClients, msg, host, dnsQType, buffer, length, dnsSearch) |
| } |
| |
| return length, nil |
| } |
| |
| func processDNSResponsePacket( |
| svrConn net.Conn, |
| dnsClients *dnsClientCache, |
| cliAddr net.Addr, |
| buffer []byte, |
| length int, |
| dnsSearch []string) (bool, int, error) { |
| var drop bool |
| msg := &dns.Msg{} |
| if err := msg.Unpack(buffer[:length]); err != nil { |
| klog.Warningf("Unable to unpack DNS packet. Error is: %v", err) |
| return drop, length, err |
| } |
| |
| // Query - Response bit that specifies whether this message is a query (0) or a response (1). |
| if msg.MsgHdr.Response == false { |
| return drop, length, fmt.Errorf("DNS packet should be a response message") |
| } |
| |
| // QDCOUNT |
| if len(msg.Question) != 1 { |
| klog.V(1).Infof("Number of entries in the response section of the DNS packet is: %d", len(msg.Answer)) |
| return drop, length, nil |
| } |
| |
| dnsQType := msg.Question[0].Qtype |
| dnsQClass := msg.Question[0].Qclass |
| if packetRequiresDNSSuffix(dnsQType, dnsQClass) { |
| host, _, err := net.SplitHostPort(cliAddr.String()) |
| if err != nil { |
| klog.V(1).Infof("Failed to get host from client address: %v", err) |
| host = cliAddr.String() |
| } |
| |
| drop, length = processUnpackedDNSResponsePacket(svrConn, dnsClients, msg, msg.MsgHdr.Rcode, host, dnsQType, buffer, length, dnsSearch) |
| } |
| |
| return drop, length, nil |
| } |
| |
| func (udp *udpProxySocket) ProxyLoop(service ServicePortPortalName, myInfo *serviceInfo, proxier *Proxier) { |
| var buffer [4096]byte // 4KiB should be enough for most whole-packets |
| var dnsSearch []string |
| if isDNSService(service.Port) { |
| dnsSearch = []string{"", namespaceServiceDomain, serviceDomain, clusterDomain} |
| execer := exec.New() |
| ipconfigInterface := ipconfig.New(execer) |
| suffixList, err := ipconfigInterface.GetDNSSuffixSearchList() |
| if err == nil { |
| for _, suffix := range suffixList { |
| dnsSearch = append(dnsSearch, suffix) |
| } |
| } |
| } |
| |
| for { |
| if !myInfo.isAlive() { |
| // The service port was closed or replaced. |
| break |
| } |
| |
| // Block until data arrives. |
| // TODO: Accumulate a histogram of n or something, to fine tune the buffer size. |
| n, cliAddr, err := udp.ReadFrom(buffer[0:]) |
| if err != nil { |
| if e, ok := err.(net.Error); ok { |
| if e.Temporary() { |
| klog.V(1).Infof("ReadFrom had a temporary failure: %v", err) |
| continue |
| } |
| } |
| klog.Errorf("ReadFrom failed, exiting ProxyLoop: %v", err) |
| break |
| } |
| |
| // If this is DNS query packet |
| if isDNSService(service.Port) { |
| n, err = processDNSQueryPacket(myInfo.dnsClients, cliAddr, buffer[:], n, dnsSearch) |
| if err != nil { |
| klog.Errorf("Process DNS query packet failed: %v", err) |
| } |
| } |
| |
| // If this is a client we know already, reuse the connection and goroutine. |
| svrConn, err := udp.getBackendConn(myInfo.activeClients, myInfo.dnsClients, cliAddr, proxier, service, myInfo.timeout, dnsSearch) |
| if err != nil { |
| continue |
| } |
| // TODO: It would be nice to let the goroutine handle this write, but we don't |
| // really want to copy the buffer. We could do a pool of buffers or something. |
| _, err = svrConn.Write(buffer[0:n]) |
| if err != nil { |
| if !logTimeout(err) { |
| klog.Errorf("Write failed: %v", err) |
| // TODO: Maybe tear down the goroutine for this client/server pair? |
| } |
| continue |
| } |
| err = svrConn.SetDeadline(time.Now().Add(myInfo.timeout)) |
| if err != nil { |
| klog.Errorf("SetDeadline failed: %v", err) |
| continue |
| } |
| } |
| } |
| |
| func (udp *udpProxySocket) getBackendConn(activeClients *clientCache, dnsClients *dnsClientCache, cliAddr net.Addr, proxier *Proxier, service ServicePortPortalName, timeout time.Duration, dnsSearch []string) (net.Conn, error) { |
| activeClients.mu.Lock() |
| defer activeClients.mu.Unlock() |
| |
| svrConn, found := activeClients.clients[cliAddr.String()] |
| if !found { |
| // TODO: This could spin up a new goroutine to make the outbound connection, |
| // and keep accepting inbound traffic. |
| klog.V(3).Infof("New UDP connection from %s", cliAddr) |
| var err error |
| svrConn, err = tryConnect(service, cliAddr, "udp", proxier) |
| if err != nil { |
| return nil, err |
| } |
| if err = svrConn.SetDeadline(time.Now().Add(timeout)); err != nil { |
| klog.Errorf("SetDeadline failed: %v", err) |
| return nil, err |
| } |
| activeClients.clients[cliAddr.String()] = svrConn |
| go func(cliAddr net.Addr, svrConn net.Conn, activeClients *clientCache, dnsClients *dnsClientCache, service ServicePortPortalName, timeout time.Duration, dnsSearch []string) { |
| defer runtime.HandleCrash() |
| udp.proxyClient(cliAddr, svrConn, activeClients, dnsClients, service, timeout, dnsSearch) |
| }(cliAddr, svrConn, activeClients, dnsClients, service, timeout, dnsSearch) |
| } |
| return svrConn, nil |
| } |
| |
| // This function is expected to be called as a goroutine. |
| // TODO: Track and log bytes copied, like TCP |
| func (udp *udpProxySocket) proxyClient(cliAddr net.Addr, svrConn net.Conn, activeClients *clientCache, dnsClients *dnsClientCache, service ServicePortPortalName, timeout time.Duration, dnsSearch []string) { |
| defer svrConn.Close() |
| var buffer [4096]byte |
| for { |
| n, err := svrConn.Read(buffer[0:]) |
| if err != nil { |
| if !logTimeout(err) { |
| klog.Errorf("Read failed: %v", err) |
| } |
| break |
| } |
| |
| drop := false |
| if isDNSService(service.Port) { |
| drop, n, err = processDNSResponsePacket(svrConn, dnsClients, cliAddr, buffer[:], n, dnsSearch) |
| if err != nil { |
| klog.Errorf("Process DNS response packet failed: %v", err) |
| } |
| } |
| |
| if !drop { |
| err = svrConn.SetDeadline(time.Now().Add(timeout)) |
| if err != nil { |
| klog.Errorf("SetDeadline failed: %v", err) |
| break |
| } |
| n, err = udp.WriteTo(buffer[0:n], cliAddr) |
| if err != nil { |
| if !logTimeout(err) { |
| klog.Errorf("WriteTo failed: %v", err) |
| } |
| break |
| } |
| } |
| } |
| activeClients.mu.Lock() |
| delete(activeClients.clients, cliAddr.String()) |
| activeClients.mu.Unlock() |
| } |