Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle federated service name lookups in kube-dns. #25727

Merged
merged 4 commits into from
May 23, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions cmd/kube-dns/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ type KubeDNSConfig struct {
KubeConfigFile string
KubeMasterURL string
HealthzPort int
// Federations maps federation names to their registered domain names.
Federations map[string]string
}

func NewKubeDNSConfig() *KubeDNSConfig {
Expand All @@ -41,6 +43,7 @@ func NewKubeDNSConfig() *KubeDNSConfig {
KubeConfigFile: "",
KubeMasterURL: "",
HealthzPort: 8081,
Federations: make(map[string]string),
}
}

Expand Down Expand Up @@ -95,9 +98,47 @@ func (m kubeMasterURLVar) Type() string {
return "string"
}

type federationsVar struct {
nameDomainMap map[string]string
}

// Set deserializes the input string in the format
// "myfederation1=example.com,myfederation2=second.example.com,myfederation3=example.com"
// into a map of key-value pairs of federation names to domain names.
func (fv federationsVar) Set(keyVal string) error {
for _, val := range strings.Split(keyVal, ",") {
splits := strings.SplitN(strings.TrimSpace(val), "=", 2)
name := strings.TrimSpace(splits[0])
domain := strings.TrimSpace(splits[1])
if len(validation.IsDNS1123Label(name)) != 0 {
return fmt.Errorf("%s not a valid federation name", name)
}
// The federation domain name need not strictly be domain names, we
// accept valid dns names with subdomain components.
if len(validation.IsDNS1123Subdomain(domain)) != 0 {
return fmt.Errorf("%s not a valid federation name", name)
}
fv.nameDomainMap[name] = domain
}
return nil
}

func (fv federationsVar) String() string {
var splits []string
for name, domain := range fv.nameDomainMap {
splits = append(splits, fmt.Sprintf("%s=%s", name, domain))
}
return strings.Join(splits, ",")
}

func (fv federationsVar) Type() string {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be called? What is it for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to satisfy the pflag's Value interface so that we can pass it as an argument to fs.Var here - 3ad8ad1#diff-0f7323379621678150800d4b5da58230R139

return "[]string"
}

func (s *KubeDNSConfig) AddFlags(fs *pflag.FlagSet) {
fs.Var(clusterDomainVar{&s.ClusterDomain}, "domain", "domain under which to create names")
fs.StringVar(&s.KubeConfigFile, "kubecfg-file", s.KubeConfigFile, "Location of kubecfg file for access to kubernetes master service; --kube-master-url overrides the URL part of this; if neither this nor --kube-master-url are provided, defaults to service account tokens")
fs.Var(kubeMasterURLVar{&s.KubeMasterURL}, "kube-master-url", "URL to reach kubernetes master. Env variables in this flag will be expanded.")
fs.IntVar(&s.HealthzPort, "healthz-port", s.HealthzPort, "port on which to serve a kube-dns HTTP readiness probe.")
fs.Var(federationsVar{s.Federations}, "federations", "a comma separated list of the federation names and their corresponding domain names to which this cluster belongs. Example: \"myfederation1=example.com,myfederation2=example2.com,myfederation3=example.com\"")
}
8 changes: 4 additions & 4 deletions cmd/kube-dns/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
"github.com/skynetservices/skydns/server"
"k8s.io/kubernetes/cmd/kube-dns/app/options"
"k8s.io/kubernetes/pkg/api/unversioned"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_3"
"k8s.io/kubernetes/pkg/client/restclient"
kclient "k8s.io/kubernetes/pkg/client/unversioned"
kclientcmd "k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
kdns "k8s.io/kubernetes/pkg/dns"
)
Expand All @@ -51,12 +51,12 @@ func NewKubeDNSServerDefault(config *options.KubeDNSConfig) *KubeDNSServer {
glog.Fatalf("Failed to create a kubernetes client: %v", err)
}
ks.healthzPort = config.HealthzPort
ks.kd = kdns.NewKubeDNS(kubeClient, config.ClusterDomain)
ks.kd = kdns.NewKubeDNS(kubeClient, config.ClusterDomain, config.Federations)
return &ks
}

// TODO: evaluate using pkg/client/clientcmd
func newKubeClient(dnsConfig *options.KubeDNSConfig) (*kclient.Client, error) {
func newKubeClient(dnsConfig *options.KubeDNSConfig) (clientset.Interface, error) {
var (
config *restclient.Config
err error
Expand Down Expand Up @@ -85,7 +85,7 @@ func newKubeClient(dnsConfig *options.KubeDNSConfig) (*kclient.Client, error) {

glog.Infof("Using %s for kubernetes master", config.Host)
glog.Infof("Using kubernetes API %v", config.GroupVersion)
return kclient.New(config)
return clientset.NewForConfig(config)
}

func (server *KubeDNSServer) Run() {
Expand Down
197 changes: 177 additions & 20 deletions pkg/dns/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,15 @@ import (
skymsg "github.com/skynetservices/skydns/msg"
kapi "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/endpoints"
"k8s.io/kubernetes/pkg/api/unversioned"
v1 "k8s.io/kubernetes/pkg/api/v1"
kcache "k8s.io/kubernetes/pkg/client/cache"
kclient "k8s.io/kubernetes/pkg/client/unversioned"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_3"
kframework "k8s.io/kubernetes/pkg/controller/framework"
kselector "k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util/validation"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/watch"
)

const (
Expand All @@ -49,12 +52,19 @@ const (

// Resync period for the kube controller loop.
resyncPeriod = 5 * time.Minute

// Duration for which the TTL cache should hold the node resource to retrieve the zone
// annotation from it so that it could be added to federation CNAMEs. There is ideally
// no need to expire this cache, but we don't want to assume that node annotations
// never change. So we expire the cache and retrieve a node once every 180 seconds.
// The value is chosen to be neither too long nor too short.
nodeCacheTTL = 180 * time.Second
)

type KubeDNS struct {
// kubeClient makes calls to API Server and registers calls with API Server
// to get Endpoints and Service objects.
kubeClient *kclient.Client
kubeClient clientset.Interface

// The domain for which this DNS Server is authoritative.
domain string
Expand Down Expand Up @@ -83,15 +93,25 @@ type KubeDNS struct {

// serviceController invokes registered callbacks when services change.
serviceController *kframework.Controller

// Map of federation names that the cluster in which this kube-dns is running belongs to, to
// the corresponding domain names.
federations map[string]string

// A TTL cache that contains some subset of nodes in the system so that we can retrieve the
// cluster zone annotation from the cached node instead of getting it from the API server
// every time.
nodesStore kcache.Store
}

func NewKubeDNS(client *kclient.Client, domain string) *KubeDNS {
func NewKubeDNS(client clientset.Interface, domain string, federations map[string]string) *KubeDNS {
kd := &KubeDNS{
kubeClient: client,
domain: domain,
cache: NewTreeCache(),
cacheLock: sync.RWMutex{},
domainPath: reverseArray(strings.Split(strings.TrimRight(domain, "."), ".")),
kubeClient: client,
domain: domain,
cache: NewTreeCache(),
cacheLock: sync.RWMutex{},
domainPath: reverseArray(strings.Split(strings.TrimRight(domain, "."), ".")),
federations: federations,
}
kd.setEndpointsStore()
kd.setServicesStore()
Expand All @@ -109,13 +129,13 @@ func (kd *KubeDNS) Start() {
kd.waitForKubernetesService()
}

func (kd *KubeDNS) waitForKubernetesService() (svc *kapi.Service) {
func (kd *KubeDNS) waitForKubernetesService() (svc *v1.Service) {
name := fmt.Sprintf("%v/%v", kapi.NamespaceDefault, kubernetesSvcName)
glog.Infof("Waiting for service: %v", name)
var err error
servicePollInterval := 1 * time.Second
for {
svc, err = kd.kubeClient.Services(kapi.NamespaceDefault).Get(kubernetesSvcName)
svc, err = kd.kubeClient.Core().Services(kapi.NamespaceDefault).Get(kubernetesSvcName)
if err != nil || svc == nil {
glog.Infof("Ignoring error while waiting for service %v: %v. Sleeping %v before retrying.", name, err, servicePollInterval)
time.Sleep(servicePollInterval)
Expand All @@ -135,10 +155,16 @@ func (kd *KubeDNS) GetCacheAsJSON() (string, error) {

func (kd *KubeDNS) setServicesStore() {
// Returns a cache.ListWatch that gets all changes to services.
serviceWatch := kcache.NewListWatchFromClient(kd.kubeClient, "services", kapi.NamespaceAll, kselector.Everything())
kd.servicesStore, kd.serviceController = kframework.NewInformer(
serviceWatch,
&kapi.Service{},
&kcache.ListWatch{
ListFunc: func(options kapi.ListOptions) (runtime.Object, error) {
return kd.kubeClient.Core().Services(v1.NamespaceAll).List(options)
},
WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) {
return kd.kubeClient.Core().Services(v1.NamespaceAll).Watch(options)
},
},
&v1.Service{},
resyncPeriod,
kframework.ResourceEventHandlerFuncs{
AddFunc: kd.newService,
Expand All @@ -150,10 +176,16 @@ func (kd *KubeDNS) setServicesStore() {

func (kd *KubeDNS) setEndpointsStore() {
// Returns a cache.ListWatch that gets all changes to endpoints.
endpointsWatch := kcache.NewListWatchFromClient(kd.kubeClient, "endpoints", kapi.NamespaceAll, kselector.Everything())
kd.endpointsStore, kd.endpointsController = kframework.NewInformer(
endpointsWatch,
&kapi.Endpoints{},
&kcache.ListWatch{
ListFunc: func(options kapi.ListOptions) (runtime.Object, error) {
return kd.kubeClient.Core().Endpoints(v1.NamespaceAll).List(options)
},
WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) {
return kd.kubeClient.Core().Endpoints(v1.NamespaceAll).Watch(options)
},
},
&v1.Endpoints{},
resyncPeriod,
kframework.ResourceEventHandlerFuncs{
AddFunc: kd.handleEndpointAdd,
Expand Down Expand Up @@ -383,10 +415,17 @@ func (kd *KubeDNS) Records(name string, exact bool) ([]skymsg.Service, error) {
retval = append(retval, *(val.(*skymsg.Service)))
}
glog.Infof("records:%v, retval:%v, path:%v", records, retval, path)
if len(retval) == 0 {
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
if len(retval) > 0 {
return retval, nil
}

// If the name query is not an exact query and does not match any records in the local store,
// attempt to send a federation redirect (CNAME) response.
if !exact {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand what an 'exact' query is, and why you would not want to handle it like any other request. Can you add a comment to explain?

Copy link
Contributor Author

@madhusudancs madhusudancs May 19, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not very well documented, but if I understand it correctly, it means that SkyDNS wants the query names to be interpreted as is, without any modifications. Since we are going to modify the query name and send it as a CNAME response I think it is not appropriate to do it when it is true.

I could be wrong here and will be happy to modify the code if this is not the behavior we want.

@ArtfulCoder @thockin please correct me if I am wrong.

return kd.federationRecords(path)
}
return retval, nil

return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}

func (kd *KubeDNS) ReverseRecord(name string) (*skymsg.Service, error) {
Expand Down Expand Up @@ -446,6 +485,124 @@ func getSkyMsg(ip string, port int) (*skymsg.Service, string) {
return msg, fmt.Sprintf("%x", hash)
}

// isFederationQuery checks if the given query `path` matches the federated service query pattern.
// The conjunction of the following conditions forms the test for the federated service query
// pattern:
// 1. `path` has exactly 4+len(domainPath) segments: mysvc.myns.myfederation.svc.domain.path.
// 2. Service name component must be a valid RFC 952 name.
// 3. Namespace component must be a valid RFC 1123 name.
// 4. Federation component must also be a valid RFC 1123 name.
// 5. Fourth segment is exactly "svc"
// 6. The remaining segments match kd.domainPath.
// 7. And federation must be one of the listed federations in the config.
func (kd *KubeDNS) isFederationQuery(path []string) bool {
if len(path) == 4+len(kd.domainPath) &&
len(validation.IsDNS952Label(path[0])) == 0 &&
len(validation.IsDNS1123Label(path[1])) == 0 &&
len(validation.IsDNS1123Label(path[2])) == 0 &&
path[3] == serviceSubdomain {
for i, domComp := range kd.domainPath {
// kd.domainPath is reversed, so we need to look in the `path` in the reverse order.
if domComp != path[len(path)-i-1] {
return false
}
}
if _, ok := kd.federations[path[2]]; ok {
return true
}
}
return false
}

// federationRecords checks if the given `queryPath` is for a federated service and if it is,
// it returns a CNAME response containing the cluster zone name and federation domain name
// suffix.
func (kd *KubeDNS) federationRecords(queryPath []string) ([]skymsg.Service, error) {
// `queryPath` is a reversed-array of the queried name, reverse it back to make it easy
// to follow through this code and reduce confusion. There is no reason for it to be
// reversed here.
path := reverseArray(queryPath)

// Check if the name query matches the federation query pattern.
if !kd.isFederationQuery(path) {
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}

// Now that we have already established that the query is a federation query, remove the local
// domain path components, i.e. kd.domainPath, from the query.
path = path[:len(path)-len(kd.domainPath)]

// Append the zone name (zone in the cloud provider terminology, not a DNS zone)
zone, err := kd.getClusterZone()
if err != nil {
return nil, fmt.Errorf("failed to obtain the cluster zone: %v", err)
}
path = append(path, zone)

// We have already established that the map entry exists for the given federation,
// we just need to retrieve the domain name, validate it and append it to the path.
domain := kd.federations[path[2]]
// We accept valid subdomains as well, so just let all the valid subdomains.
if len(validation.IsDNS1123Subdomain(domain)) != 0 {
return nil, fmt.Errorf("%s is not a valid domain name for federation %s", domain, path[2])
}
name := strings.Join(append(path, domain), ".")

// Ensure that this name that we are returning as a CNAME response is a fully qualified
// domain name so that the client's resolver library doesn't have to go through its
// search list all over again.
if !strings.HasSuffix(name, ".") {
name = name + "."
}
return []skymsg.Service{{Host: name}}, nil
}

// getClusterZone returns the name of the zone the cluster is running in. It arbitrarily selects
// a node and reads the failure domain annotation on the node. An alternative is to obtain this
// pod's (i.e. kube-dns pod's) name using the downward API, get the pod, get the node the pod is
// bound to and retrieve that node's annotations. But even just by reading those steps, it looks
// complex and it is not entirely clear what that complexity is going to buy us. So taking a
// simpler approach here.
// Also note that zone here means the zone in cloud provider terminology, not the DNS zone.
func (kd *KubeDNS) getClusterZone() (string, error) {
var node *v1.Node

objs := kd.nodesStore.List()
if len(objs) > 0 {
var ok bool
if node, ok = objs[0].(*v1.Node); !ok {
return "", fmt.Errorf("expected node object, got: %T", objs[0])
}
} else {
// An alternative to listing nodes each time is to set a watch, but that is totally
// wasteful in case of non-federated independent Kubernetes clusters. So carefully
// proceeding here.
// TODO(madhusudancs): Move this to external/v1 API.
nodeList, err := kd.kubeClient.Core().Nodes().List(kapi.ListOptions{})
if err != nil || len(nodeList.Items) == 0 {
return "", fmt.Errorf("failed to retrieve the cluster nodes: %v", err)
}

// Select a node (arbitrarily the first node) that has `LabelZoneFailureDomain` set.
for _, nodeItem := range nodeList.Items {
if _, ok := nodeItem.Annotations[unversioned.LabelZoneFailureDomain]; !ok {
continue
}
// Make a copy of the node, don't rely on the loop variable.
node = &(*(&nodeItem))
if err := kd.nodesStore.Add(node); err != nil {
return "", fmt.Errorf("couldn't add the retrieved node to the cache: %v", err)
}
}
}

zone, ok := node.Annotations[unversioned.LabelZoneFailureDomain]
if !ok || zone == "" {
return "", fmt.Errorf("unknown cluster zone")
}
return zone, nil
}

func reverseArray(arr []string) []string {
for i := 0; i < len(arr)/2; i++ {
j := len(arr) - i - 1
Expand Down
Loading