-
Notifications
You must be signed in to change notification settings - Fork 40k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handle federated service name lookups in kube-dns. #25727
Changes from all commits
26028bc
3ee2b76
13fef23
006580a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,12 +30,15 @@ import ( | |
skymsg "github.com/skynetservices/skydns/msg" | ||
kapi "k8s.io/kubernetes/pkg/api" | ||
"k8s.io/kubernetes/pkg/api/endpoints" | ||
"k8s.io/kubernetes/pkg/api/unversioned" | ||
v1 "k8s.io/kubernetes/pkg/api/v1" | ||
kcache "k8s.io/kubernetes/pkg/client/cache" | ||
kclient "k8s.io/kubernetes/pkg/client/unversioned" | ||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_3" | ||
kframework "k8s.io/kubernetes/pkg/controller/framework" | ||
kselector "k8s.io/kubernetes/pkg/fields" | ||
"k8s.io/kubernetes/pkg/runtime" | ||
"k8s.io/kubernetes/pkg/util/validation" | ||
"k8s.io/kubernetes/pkg/util/wait" | ||
"k8s.io/kubernetes/pkg/watch" | ||
) | ||
|
||
const ( | ||
|
@@ -49,12 +52,19 @@ const ( | |
|
||
// Resync period for the kube controller loop. | ||
resyncPeriod = 5 * time.Minute | ||
|
||
// Duration for which the TTL cache should hold the node resource to retrieve the zone | ||
// annotation from it so that it could be added to federation CNAMEs. There is ideally | ||
// no need to expire this cache, but we don't want to assume that node annotations | ||
// never change. So we expire the cache and retrieve a node once every 180 seconds. | ||
// The value is chosen to be neither too long nor too short. | ||
nodeCacheTTL = 180 * time.Second | ||
) | ||
|
||
type KubeDNS struct { | ||
// kubeClient makes calls to API Server and registers calls with API Server | ||
// to get Endpoints and Service objects. | ||
kubeClient *kclient.Client | ||
kubeClient clientset.Interface | ||
|
||
// The domain for which this DNS Server is authoritative. | ||
domain string | ||
|
@@ -83,15 +93,25 @@ type KubeDNS struct { | |
|
||
// serviceController invokes registered callbacks when services change. | ||
serviceController *kframework.Controller | ||
|
||
// Map of federation names that the cluster in which this kube-dns is running belongs to, to | ||
// the corresponding domain names. | ||
federations map[string]string | ||
|
||
// A TTL cache that contains some subset of nodes in the system so that we can retrieve the | ||
// cluster zone annotation from the cached node instead of getting it from the API server | ||
// every time. | ||
nodesStore kcache.Store | ||
} | ||
|
||
func NewKubeDNS(client *kclient.Client, domain string) *KubeDNS { | ||
func NewKubeDNS(client clientset.Interface, domain string, federations map[string]string) *KubeDNS { | ||
kd := &KubeDNS{ | ||
kubeClient: client, | ||
domain: domain, | ||
cache: NewTreeCache(), | ||
cacheLock: sync.RWMutex{}, | ||
domainPath: reverseArray(strings.Split(strings.TrimRight(domain, "."), ".")), | ||
kubeClient: client, | ||
domain: domain, | ||
cache: NewTreeCache(), | ||
cacheLock: sync.RWMutex{}, | ||
domainPath: reverseArray(strings.Split(strings.TrimRight(domain, "."), ".")), | ||
federations: federations, | ||
} | ||
kd.setEndpointsStore() | ||
kd.setServicesStore() | ||
|
@@ -109,13 +129,13 @@ func (kd *KubeDNS) Start() { | |
kd.waitForKubernetesService() | ||
} | ||
|
||
func (kd *KubeDNS) waitForKubernetesService() (svc *kapi.Service) { | ||
func (kd *KubeDNS) waitForKubernetesService() (svc *v1.Service) { | ||
name := fmt.Sprintf("%v/%v", kapi.NamespaceDefault, kubernetesSvcName) | ||
glog.Infof("Waiting for service: %v", name) | ||
var err error | ||
servicePollInterval := 1 * time.Second | ||
for { | ||
svc, err = kd.kubeClient.Services(kapi.NamespaceDefault).Get(kubernetesSvcName) | ||
svc, err = kd.kubeClient.Core().Services(kapi.NamespaceDefault).Get(kubernetesSvcName) | ||
if err != nil || svc == nil { | ||
glog.Infof("Ignoring error while waiting for service %v: %v. Sleeping %v before retrying.", name, err, servicePollInterval) | ||
time.Sleep(servicePollInterval) | ||
|
@@ -135,10 +155,16 @@ func (kd *KubeDNS) GetCacheAsJSON() (string, error) { | |
|
||
func (kd *KubeDNS) setServicesStore() { | ||
// Returns a cache.ListWatch that gets all changes to services. | ||
serviceWatch := kcache.NewListWatchFromClient(kd.kubeClient, "services", kapi.NamespaceAll, kselector.Everything()) | ||
kd.servicesStore, kd.serviceController = kframework.NewInformer( | ||
serviceWatch, | ||
&kapi.Service{}, | ||
&kcache.ListWatch{ | ||
ListFunc: func(options kapi.ListOptions) (runtime.Object, error) { | ||
return kd.kubeClient.Core().Services(v1.NamespaceAll).List(options) | ||
}, | ||
WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) { | ||
return kd.kubeClient.Core().Services(v1.NamespaceAll).Watch(options) | ||
}, | ||
}, | ||
&v1.Service{}, | ||
resyncPeriod, | ||
kframework.ResourceEventHandlerFuncs{ | ||
AddFunc: kd.newService, | ||
|
@@ -150,10 +176,16 @@ func (kd *KubeDNS) setServicesStore() { | |
|
||
func (kd *KubeDNS) setEndpointsStore() { | ||
// Returns a cache.ListWatch that gets all changes to endpoints. | ||
endpointsWatch := kcache.NewListWatchFromClient(kd.kubeClient, "endpoints", kapi.NamespaceAll, kselector.Everything()) | ||
kd.endpointsStore, kd.endpointsController = kframework.NewInformer( | ||
endpointsWatch, | ||
&kapi.Endpoints{}, | ||
&kcache.ListWatch{ | ||
ListFunc: func(options kapi.ListOptions) (runtime.Object, error) { | ||
return kd.kubeClient.Core().Endpoints(v1.NamespaceAll).List(options) | ||
}, | ||
WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) { | ||
return kd.kubeClient.Core().Endpoints(v1.NamespaceAll).Watch(options) | ||
}, | ||
}, | ||
&v1.Endpoints{}, | ||
resyncPeriod, | ||
kframework.ResourceEventHandlerFuncs{ | ||
AddFunc: kd.handleEndpointAdd, | ||
|
@@ -383,10 +415,17 @@ func (kd *KubeDNS) Records(name string, exact bool) ([]skymsg.Service, error) { | |
retval = append(retval, *(val.(*skymsg.Service))) | ||
} | ||
glog.Infof("records:%v, retval:%v, path:%v", records, retval, path) | ||
if len(retval) == 0 { | ||
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound} | ||
if len(retval) > 0 { | ||
return retval, nil | ||
} | ||
|
||
// If the name query is not an exact query and does not match any records in the local store, | ||
// attempt to send a federation redirect (CNAME) response. | ||
if !exact { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand what an 'exact' query is, and why you would not want to handle it like any other request. Can you add a comment to explain? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not very well documented, but if I understand it correctly, it means that SkyDNS wants the query names to be interpreted as is, without any modifications. Since we are going to modify the query name and send it as a CNAME response I think it is not appropriate to do it when it is true. I could be wrong here and will be happy to modify the code if this is not the behavior we want. @ArtfulCoder @thockin please correct me if I am wrong. |
||
return kd.federationRecords(path) | ||
} | ||
return retval, nil | ||
|
||
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound} | ||
} | ||
|
||
func (kd *KubeDNS) ReverseRecord(name string) (*skymsg.Service, error) { | ||
|
@@ -446,6 +485,124 @@ func getSkyMsg(ip string, port int) (*skymsg.Service, string) { | |
return msg, fmt.Sprintf("%x", hash) | ||
} | ||
|
||
// isFederationQuery checks if the given query `path` matches the federated service query pattern. | ||
// The conjunction of the following conditions forms the test for the federated service query | ||
// pattern: | ||
// 1. `path` has exactly 4+len(domainPath) segments: mysvc.myns.myfederation.svc.domain.path. | ||
// 2. Service name component must be a valid RFC 952 name. | ||
// 3. Namespace component must be a valid RFC 1123 name. | ||
// 4. Federation component must also be a valid RFC 1123 name. | ||
// 5. Fourth segment is exactly "svc" | ||
// 6. The remaining segments match kd.domainPath. | ||
// 7. And federation must be one of the listed federations in the config. | ||
func (kd *KubeDNS) isFederationQuery(path []string) bool { | ||
if len(path) == 4+len(kd.domainPath) && | ||
len(validation.IsDNS952Label(path[0])) == 0 && | ||
len(validation.IsDNS1123Label(path[1])) == 0 && | ||
len(validation.IsDNS1123Label(path[2])) == 0 && | ||
path[3] == serviceSubdomain { | ||
for i, domComp := range kd.domainPath { | ||
// kd.domainPath is reversed, so we need to look in the `path` in the reverse order. | ||
if domComp != path[len(path)-i-1] { | ||
return false | ||
} | ||
} | ||
if _, ok := kd.federations[path[2]]; ok { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
// federationRecords checks if the given `queryPath` is for a federated service and if it is, | ||
// it returns a CNAME response containing the cluster zone name and federation domain name | ||
// suffix. | ||
func (kd *KubeDNS) federationRecords(queryPath []string) ([]skymsg.Service, error) { | ||
// `queryPath` is a reversed-array of the queried name, reverse it back to make it easy | ||
// to follow through this code and reduce confusion. There is no reason for it to be | ||
// reversed here. | ||
path := reverseArray(queryPath) | ||
|
||
// Check if the name query matches the federation query pattern. | ||
if !kd.isFederationQuery(path) { | ||
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound} | ||
} | ||
|
||
// Now that we have already established that the query is a federation query, remove the local | ||
// domain path components, i.e. kd.domainPath, from the query. | ||
path = path[:len(path)-len(kd.domainPath)] | ||
|
||
// Append the zone name (zone in the cloud provider terminology, not a DNS zone) | ||
zone, err := kd.getClusterZone() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to obtain the cluster zone: %v", err) | ||
} | ||
path = append(path, zone) | ||
|
||
// We have already established that the map entry exists for the given federation, | ||
// we just need to retrieve the domain name, validate it and append it to the path. | ||
domain := kd.federations[path[2]] | ||
// We accept valid subdomains as well, so just let all the valid subdomains. | ||
if len(validation.IsDNS1123Subdomain(domain)) != 0 { | ||
return nil, fmt.Errorf("%s is not a valid domain name for federation %s", domain, path[2]) | ||
} | ||
name := strings.Join(append(path, domain), ".") | ||
|
||
// Ensure that this name that we are returning as a CNAME response is a fully qualified | ||
// domain name so that the client's resolver library doesn't have to go through its | ||
// search list all over again. | ||
if !strings.HasSuffix(name, ".") { | ||
name = name + "." | ||
} | ||
return []skymsg.Service{{Host: name}}, nil | ||
} | ||
|
||
// getClusterZone returns the name of the zone the cluster is running in. It arbitrarily selects | ||
// a node and reads the failure domain annotation on the node. An alternative is to obtain this | ||
// pod's (i.e. kube-dns pod's) name using the downward API, get the pod, get the node the pod is | ||
// bound to and retrieve that node's annotations. But even just by reading those steps, it looks | ||
// complex and it is not entirely clear what that complexity is going to buy us. So taking a | ||
// simpler approach here. | ||
// Also note that zone here means the zone in cloud provider terminology, not the DNS zone. | ||
func (kd *KubeDNS) getClusterZone() (string, error) { | ||
var node *v1.Node | ||
|
||
objs := kd.nodesStore.List() | ||
if len(objs) > 0 { | ||
var ok bool | ||
if node, ok = objs[0].(*v1.Node); !ok { | ||
return "", fmt.Errorf("expected node object, got: %T", objs[0]) | ||
} | ||
} else { | ||
// An alternative to listing nodes each time is to set a watch, but that is totally | ||
// wasteful in case of non-federated independent Kubernetes clusters. So carefully | ||
// proceeding here. | ||
// TODO(madhusudancs): Move this to external/v1 API. | ||
nodeList, err := kd.kubeClient.Core().Nodes().List(kapi.ListOptions{}) | ||
if err != nil || len(nodeList.Items) == 0 { | ||
return "", fmt.Errorf("failed to retrieve the cluster nodes: %v", err) | ||
} | ||
|
||
// Select a node (arbitrarily the first node) that has `LabelZoneFailureDomain` set. | ||
for _, nodeItem := range nodeList.Items { | ||
if _, ok := nodeItem.Annotations[unversioned.LabelZoneFailureDomain]; !ok { | ||
continue | ||
} | ||
// Make a copy of the node, don't rely on the loop variable. | ||
node = &(*(&nodeItem)) | ||
if err := kd.nodesStore.Add(node); err != nil { | ||
return "", fmt.Errorf("couldn't add the retrieved node to the cache: %v", err) | ||
} | ||
} | ||
} | ||
|
||
zone, ok := node.Annotations[unversioned.LabelZoneFailureDomain] | ||
if !ok || zone == "" { | ||
return "", fmt.Errorf("unknown cluster zone") | ||
} | ||
return zone, nil | ||
} | ||
|
||
func reverseArray(arr []string) []string { | ||
for i := 0; i < len(arr)/2; i++ { | ||
j := len(arr) - i - 1 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't seem to be called? What is it for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is to satisfy the pflag's
Value
interface so that we can pass it as an argument to fs.Var here - 3ad8ad1#diff-0f7323379621678150800d4b5da58230R139