// Package kubernetes provides the kubernetes backend. package kubernetes import ( "context" "errors" "fmt" "net" "strconv" "strings" "time" "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/etcd/msg" "github.com/coredns/coredns/plugin/kubernetes/object" "github.com/coredns/coredns/plugin/pkg/dnsutil" "github.com/coredns/coredns/plugin/pkg/fall" "github.com/coredns/coredns/request" "github.com/miekg/dns" api "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" discoveryV1beta1 "k8s.io/api/discovery/v1beta1" kerrors "k8s.io/apimachinery/pkg/api/errors" meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" ) // Kubernetes implements a plugin that connects to a Kubernetes cluster. type Kubernetes struct { Next plugin.Handler Zones []string Upstream Upstreamer APIServerList []string APICertAuth string APIClientCert string APIClientKey string ClientConfig clientcmd.ClientConfig APIConn dnsController Namespaces map[string]struct{} podMode string endpointNameMode bool Fall fall.F ttl uint32 opts dnsControlOpts primaryZoneIndex int localIPs []net.IP autoPathSearch []string // Local search path from /etc/resolv.conf. Needed for autopath. } // Upstreamer is used to resolve CNAME or other external targets type Upstreamer interface { Lookup(ctx context.Context, state request.Request, name string, typ uint16) (*dns.Msg, error) } // New returns a initialized Kubernetes. It default interfaceAddrFunc to return 127.0.0.1. All other // values default to their zero value, primaryZoneIndex will thus point to the first zone. func New(zones []string) *Kubernetes { k := new(Kubernetes) k.Zones = zones k.Namespaces = make(map[string]struct{}) k.podMode = podModeDisabled k.ttl = defaultTTL return k } const ( // podModeDisabled is the default value where pod requests are ignored podModeDisabled = "disabled" // podModeVerified is where Pod requests are answered only if they exist podModeVerified = "verified" // podModeInsecure is where pod requests are answered without verifying they exist podModeInsecure = "insecure" // DNSSchemaVersion is the schema version: https://github.com/kubernetes/dns/blob/master/docs/specification.md DNSSchemaVersion = "1.1.0" // Svc is the DNS schema for kubernetes services Svc = "svc" // Pod is the DNS schema for kubernetes pods Pod = "pod" // defaultTTL to apply to all answers. defaultTTL = 5 ) var ( errNoItems = errors.New("no items found") errNsNotExposed = errors.New("namespace is not exposed") errInvalidRequest = errors.New("invalid query name") ) // Services implements the ServiceBackend interface. func (k *Kubernetes) Services(ctx context.Context, state request.Request, exact bool, opt plugin.Options) (svcs []msg.Service, err error) { // We're looking again at types, which we've already done in ServeDNS, but there are some types k8s just can't answer. switch state.QType() { case dns.TypeTXT: // 1 label + zone, label must be "dns-version". t, _ := dnsutil.TrimZone(state.Name(), state.Zone) segs := dns.SplitDomainName(t) if len(segs) != 1 { return nil, nil } if segs[0] != "dns-version" { return nil, nil } svc := msg.Service{Text: DNSSchemaVersion, TTL: 28800, Key: msg.Path(state.QName(), coredns)} return []msg.Service{svc}, nil case dns.TypeNS: // We can only get here if the qname equals the zone, see ServeDNS in handler.go. nss := k.nsAddrs(false, state.Zone) var svcs []msg.Service for _, ns := range nss { if ns.Header().Rrtype == dns.TypeA { svcs = append(svcs, msg.Service{Host: ns.(*dns.A).A.String(), Key: msg.Path(ns.Header().Name, coredns), TTL: k.ttl}) continue } if ns.Header().Rrtype == dns.TypeAAAA { svcs = append(svcs, msg.Service{Host: ns.(*dns.AAAA).AAAA.String(), Key: msg.Path(ns.Header().Name, coredns), TTL: k.ttl}) } } return svcs, nil } if isDefaultNS(state.Name(), state.Zone) { nss := k.nsAddrs(false, state.Zone) var svcs []msg.Service for _, ns := range nss { if ns.Header().Rrtype == dns.TypeA && state.QType() == dns.TypeA { svcs = append(svcs, msg.Service{Host: ns.(*dns.A).A.String(), Key: msg.Path(state.QName(), coredns), TTL: k.ttl}) continue } if ns.Header().Rrtype == dns.TypeAAAA && state.QType() == dns.TypeAAAA { svcs = append(svcs, msg.Service{Host: ns.(*dns.AAAA).AAAA.String(), Key: msg.Path(state.QName(), coredns), TTL: k.ttl}) } } return svcs, nil } s, e := k.Records(ctx, state, false) // SRV for external services is not yet implemented, so remove those records. if state.QType() != dns.TypeSRV { return s, e } internal := []msg.Service{} for _, svc := range s { if t, _ := svc.HostType(); t != dns.TypeCNAME { internal = append(internal, svc) } } return internal, e } // primaryZone will return the first non-reverse zone being handled by this plugin func (k *Kubernetes) primaryZone() string { return k.Zones[k.primaryZoneIndex] } // Lookup implements the ServiceBackend interface. func (k *Kubernetes) Lookup(ctx context.Context, state request.Request, name string, typ uint16) (*dns.Msg, error) { return k.Upstream.Lookup(ctx, state, name, typ) } // IsNameError implements the ServiceBackend interface. func (k *Kubernetes) IsNameError(err error) bool { return err == errNoItems || err == errNsNotExposed || err == errInvalidRequest } func (k *Kubernetes) getClientConfig() (*rest.Config, error) { if k.ClientConfig != nil { return k.ClientConfig.ClientConfig() } loadingRules := &clientcmd.ClientConfigLoadingRules{} overrides := &clientcmd.ConfigOverrides{} clusterinfo := clientcmdapi.Cluster{} authinfo := clientcmdapi.AuthInfo{} // Connect to API from in cluster if len(k.APIServerList) == 0 { cc, err := rest.InClusterConfig() if err != nil { return nil, err } cc.ContentType = "application/vnd.kubernetes.protobuf" return cc, err } // Connect to API from out of cluster // Only the first one is used. We will deprecate multiple endpoints later. clusterinfo.Server = k.APIServerList[0] if len(k.APICertAuth) > 0 { clusterinfo.CertificateAuthority = k.APICertAuth } if len(k.APIClientCert) > 0 { authinfo.ClientCertificate = k.APIClientCert } if len(k.APIClientKey) > 0 { authinfo.ClientKey = k.APIClientKey } overrides.ClusterInfo = clusterinfo overrides.AuthInfo = authinfo clientConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, overrides) cc, err := clientConfig.ClientConfig() if err != nil { return nil, err } cc.ContentType = "application/vnd.kubernetes.protobuf" return cc, err } // InitKubeCache initializes a new Kubernetes cache. func (k *Kubernetes) InitKubeCache(ctx context.Context) (onStart func() error, onShut func() error, err error) { config, err := k.getClientConfig() if err != nil { return nil, nil, err } kubeClient, err := kubernetes.NewForConfig(config) if err != nil { return nil, nil, fmt.Errorf("failed to create kubernetes notification controller: %q", err) } if k.opts.labelSelector != nil { var selector labels.Selector selector, err = meta.LabelSelectorAsSelector(k.opts.labelSelector) if err != nil { return nil, nil, fmt.Errorf("unable to create Selector for LabelSelector '%s': %q", k.opts.labelSelector, err) } k.opts.selector = selector } if k.opts.namespaceLabelSelector != nil { var selector labels.Selector selector, err = meta.LabelSelectorAsSelector(k.opts.namespaceLabelSelector) if err != nil { return nil, nil, fmt.Errorf("unable to create Selector for LabelSelector '%s': %q", k.opts.namespaceLabelSelector, err) } k.opts.namespaceSelector = selector } k.opts.initPodCache = k.podMode == podModeVerified k.opts.zones = k.Zones k.opts.endpointNameMode = k.endpointNameMode k.APIConn = newdnsController(ctx, kubeClient, k.opts) initEndpointWatch := k.opts.initEndpointsCache onStart = func() error { go func() { if initEndpointWatch { // Revert to watching Endpoints for incompatible K8s. // This can be removed when all supported k8s versions support endpointslices. ok, v := k.endpointSliceSupported(kubeClient) if !ok { k.APIConn.(*dnsControl).WatchEndpoints(ctx) } // Revert to EndpointSlice v1beta1 if v1 is not supported if ok && v == discoveryV1beta1.SchemeGroupVersion.String() { k.APIConn.(*dnsControl).WatchEndpointSliceV1beta1(ctx) } } k.APIConn.Run() }() timeout := 5 * time.Second timeoutTicker := time.NewTicker(timeout) defer timeoutTicker.Stop() logDelay := 500 * time.Millisecond logTicker := time.NewTicker(logDelay) defer logTicker.Stop() checkSyncTicker := time.NewTicker(100 * time.Millisecond) defer checkSyncTicker.Stop() for { select { case <-checkSyncTicker.C: if k.APIConn.HasSynced() { return nil } case <-logTicker.C: log.Info("waiting for Kubernetes API before starting server") case <-timeoutTicker.C: log.Warning("starting server with unsynced Kubernetes API") return nil } } } onShut = func() error { return k.APIConn.Stop() } return onStart, onShut, err } // endpointSliceSupported will determine which endpoint object type to watch (endpointslices or endpoints) // based on the supportability of endpointslices in the API and server version. It will return true when endpointslices // should be watched, and false when endpoints should be watched. // If the API supports discovery, and the server versions >= 1.19, true is returned. // Also returned is the discovery version supported: "v1" if v1 is supported, and v1beta1 if v1beta1 is supported and // v1 is not supported. // This function should be removed, when all supported versions of k8s support v1. func (k *Kubernetes) endpointSliceSupported(kubeClient *kubernetes.Clientset) (bool, string) { ticker := time.NewTicker(100 * time.Millisecond) defer ticker.Stop() logTicker := time.NewTicker(10 * time.Second) defer logTicker.Stop() var connErr error for { select { case <-logTicker.C: if connErr == nil { continue } log.Warningf("Kubernetes API connection failure: %v", connErr) case <-ticker.C: sv, err := kubeClient.ServerVersion() if err != nil { connErr = err continue } // Disable use of endpoint slices for k8s versions 1.18 and earlier. The Endpointslices API was enabled // by default in 1.17 but Service -> Pod proxy continued to use Endpoints by default until 1.19. // DNS results should be built from the same source data that the proxy uses. This decision assumes // k8s EndpointSliceProxying feature gate is at the default (i.e. only enabled for k8s >= 1.19). major, _ := strconv.Atoi(sv.Major) minor, _ := strconv.Atoi(strings.TrimRight(sv.Minor, "+")) if major <= 1 && minor <= 18 { log.Info("Watching Endpoints instead of EndpointSlices in k8s versions < 1.19") return false, "" } // Enable use of endpoint slices if the API supports the discovery api _, err = kubeClient.Discovery().ServerResourcesForGroupVersion(discovery.SchemeGroupVersion.String()) if err == nil { return true, discovery.SchemeGroupVersion.String() } else if !kerrors.IsNotFound(err) { connErr = err continue } _, err = kubeClient.Discovery().ServerResourcesForGroupVersion(discoveryV1beta1.SchemeGroupVersion.String()) if err == nil { return true, discoveryV1beta1.SchemeGroupVersion.String() } else if !kerrors.IsNotFound(err) { connErr = err continue } // Disable use of endpoint slices in case that it is disabled in k8s versions 1.19 and newer. log.Info("Endpointslices API disabled. Watching Endpoints instead.") return false, "" } } } // Records looks up services in kubernetes. func (k *Kubernetes) Records(ctx context.Context, state request.Request, exact bool) ([]msg.Service, error) { r, e := parseRequest(state.Name(), state.Zone) if e != nil { return nil, e } if r.podOrSvc == "" { return nil, nil } if dnsutil.IsReverse(state.Name()) > 0 { return nil, errNoItems } if !k.namespaceExposed(r.namespace) { return nil, errNsNotExposed } if r.podOrSvc == Pod { pods, err := k.findPods(r, state.Zone) return pods, err } services, err := k.findServices(r, state.Zone) return services, err } func endpointHostname(addr object.EndpointAddress, endpointNameMode bool) string { if addr.Hostname != "" { return addr.Hostname } if endpointNameMode && addr.TargetRefName != "" { return addr.TargetRefName } if strings.Contains(addr.IP, ".") { return strings.Replace(addr.IP, ".", "-", -1) } if strings.Contains(addr.IP, ":") { return strings.Replace(addr.IP, ":", "-", -1) } return "" } func (k *Kubernetes) findPods(r recordRequest, zone string) (pods []msg.Service, err error) { if k.podMode == podModeDisabled { return nil, errNoItems } namespace := r.namespace if !k.namespaceExposed(namespace) { return nil, errNoItems } podname := r.service // handle empty pod name if podname == "" { if k.namespaceExposed(namespace) { // NODATA return nil, nil } // NXDOMAIN return nil, errNoItems } zonePath := msg.Path(zone, coredns) ip := "" if strings.Count(podname, "-") == 3 && !strings.Contains(podname, "--") { ip = strings.ReplaceAll(podname, "-", ".") } else { ip = strings.ReplaceAll(podname, "-", ":") } if k.podMode == podModeInsecure { if !k.namespaceExposed(namespace) { // namespace does not exist return nil, errNoItems } // If ip does not parse as an IP address, we return an error, otherwise we assume a CNAME and will try to resolve it in backend_lookup.go if net.ParseIP(ip) == nil { return nil, errNoItems } return []msg.Service{{Key: strings.Join([]string{zonePath, Pod, namespace, podname}, "/"), Host: ip, TTL: k.ttl}}, err } // PodModeVerified err = errNoItems for _, p := range k.APIConn.PodIndex(ip) { // check for matching ip and namespace if ip == p.PodIP && match(namespace, p.Namespace) { s := msg.Service{Key: strings.Join([]string{zonePath, Pod, namespace, podname}, "/"), Host: ip, TTL: k.ttl} pods = append(pods, s) err = nil } } return pods, err } // findServices returns the services matching r from the cache. func (k *Kubernetes) findServices(r recordRequest, zone string) (services []msg.Service, err error) { if !k.namespaceExposed(r.namespace) { return nil, errNoItems } // handle empty service name if r.service == "" { if k.namespaceExposed(r.namespace) { // NODATA return nil, nil } // NXDOMAIN return nil, errNoItems } err = errNoItems var ( endpointsListFunc func() []*object.Endpoints endpointsList []*object.Endpoints serviceList []*object.Service ) idx := object.ServiceKey(r.service, r.namespace) serviceList = k.APIConn.SvcIndex(idx) endpointsListFunc = func() []*object.Endpoints { return k.APIConn.EpIndex(idx) } zonePath := msg.Path(zone, coredns) for _, svc := range serviceList { if !(match(r.namespace, svc.Namespace) && match(r.service, svc.Name)) { continue } // If "ignore empty_service" option is set and no endpoints exist, return NXDOMAIN unless // it's a headless or externalName service (covered below). if k.opts.ignoreEmptyService && svc.Type != api.ServiceTypeExternalName && !svc.Headless() { // serve NXDOMAIN if no endpoint is able to answer podsCount := 0 for _, ep := range endpointsListFunc() { for _, eps := range ep.Subsets { podsCount += len(eps.Addresses) } } if podsCount == 0 { continue } } // External service if svc.Type == api.ServiceTypeExternalName { s := msg.Service{Key: strings.Join([]string{zonePath, Svc, svc.Namespace, svc.Name}, "/"), Host: svc.ExternalName, TTL: k.ttl} if t, _ := s.HostType(); t == dns.TypeCNAME { s.Key = strings.Join([]string{zonePath, Svc, svc.Namespace, svc.Name}, "/") services = append(services, s) err = nil } continue } // Endpoint query or headless service if svc.Headless() || r.endpoint != "" { if endpointsList == nil { endpointsList = endpointsListFunc() } for _, ep := range endpointsList { if object.EndpointsKey(svc.Name, svc.Namespace) != ep.Index { continue } for _, eps := range ep.Subsets { for _, addr := range eps.Addresses { // See comments in parse.go parseRequest about the endpoint handling. if r.endpoint != "" { if !match(r.endpoint, endpointHostname(addr, k.endpointNameMode)) { continue } } for _, p := range eps.Ports { if !(matchPortAndProtocol(r.port, p.Name, r.protocol, p.Protocol)) { continue } s := msg.Service{Host: addr.IP, Port: int(p.Port), TTL: k.ttl} s.Key = strings.Join([]string{zonePath, Svc, svc.Namespace, svc.Name, endpointHostname(addr, k.endpointNameMode)}, "/") err = nil services = append(services, s) } } } } continue } // ClusterIP service for _, p := range svc.Ports { if !(matchPortAndProtocol(r.port, p.Name, r.protocol, string(p.Protocol))) { continue } err = nil for _, ip := range svc.ClusterIPs { s := msg.Service{Host: ip, Port: int(p.Port), TTL: k.ttl} s.Key = strings.Join([]string{zonePath, Svc, svc.Namespace, svc.Name}, "/") services = append(services, s) } } } return services, err } // Serial return the SOA serial. func (k *Kubernetes) Serial(state request.Request) uint32 { return uint32(k.APIConn.Modified(false)) } // MinTTL returns the minimal TTL. func (k *Kubernetes) MinTTL(state request.Request) uint32 { return k.ttl } // match checks if a and b are equal. func match(a, b string) bool { return strings.EqualFold(a, b) } // matchPortAndProtocol matches port and protocol, permitting the the 'a' inputs to be wild func matchPortAndProtocol(aPort, bPort, aProtocol, bProtocol string) bool { return (match(aPort, bPort) || aPort == "") && (match(aProtocol, bProtocol) || aProtocol == "") } const coredns = "c" // used as a fake key prefix in msg.Service