Skip to content

Commit

Permalink
Add comments & misc review fixes
Browse files Browse the repository at this point in the history
Lots of comments describing the heuristics, how it fits together and the
limitations.

In particular, we can't guarantee correct volume placement if the set of
zones is changing between allocating volumes.
  • Loading branch information
justinsb committed Jun 21, 2016
1 parent 9c25665 commit dd94997
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 3 deletions.
5 changes: 4 additions & 1 deletion pkg/cloudprovider/providers/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,10 @@ func (aws *AWSCloud) List(filter string) ([]string, error) {
// getAllZones retrieves a list of all the zones in which nodes are running
// It currently involves querying all instances
func (c *AWSCloud) getAllZones() (sets.String, error) {
// TODO: Caching, although we currently only use this in volume creation
// We don't currently cache this; it is currently used only in volume
// creation which is expected to be a comparatively rare occurence.

// TODO: Caching / expose api.Nodes to the cloud provider?
// TODO: We could also query for subnets, I think

filters := []*ec2.Filter{newEc2Filter("instance-state-name", "running")}
Expand Down
16 changes: 16 additions & 0 deletions pkg/cloudprovider/providers/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -2070,6 +2070,7 @@ func (gce *GCECloud) List(filter string) ([]string, error) {

// GetAllZones returns all the zones in which nodes are running
func (gce *GCECloud) GetAllZones() (sets.String, error) {
// Fast-path for non-multizone
if len(gce.managedZones) == 1 {
return sets.NewString(gce.managedZones...), nil
}
Expand All @@ -2084,6 +2085,15 @@ func (gce *GCECloud) GetAllZones() (sets.String, error) {
listCall := gce.service.Instances.List(gce.projectID, zone)

// No filter: We assume that a zone is either used or unused
// We could only consider running nodes (like we do in List above),
// but probably if instances are starting we still want to consider them.
// I think we should wait until we have a reason to make the
// call one way or the other; we generally can't guarantee correct
// volume spreading if the set of zones is changing
// (and volume spreading is currently only a heuristic).
// Long term we want to replace GetAllZones (which primarily supports volume
// spreading) with a scheduler policy that is able to see the global state of
// volumes and the health of zones.

// Just a minimal set of fields - we only care about existence
listCall = listCall.Fields("items(name)")
Expand Down Expand Up @@ -2258,6 +2268,12 @@ func (gce *GCECloud) GetAutoLabelsForPD(name string, zone string) (map[string]st
var disk *gceDisk
var err error
if zone == "" {
// We would like as far as possible to avoid this case,
// because GCE doesn't guarantee that volumes are uniquely named per region,
// just per zone. However, creation of GCE PDs was originally done only
// by name, so we have to continue to support that.
// However, wherever possible the zone should be passed (and it is passed
// for most cases that we can control, e.g. dynamic volume provisioning)
disk, err = gce.getDiskByNameUnknownZone(name)
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion pkg/volume/plugins.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type VolumeOptions struct {
// PV.Name of the appropriate PersistentVolume. Used to generate cloud
// volume name.
PVName string
// PVC.Name of the PersistentVolumeClaim, if we are auto-provisioning.
// PVC.Name of the PersistentVolumeClaim; only set during dynamic provisioning.
PVCName string
// Unique name of Kubernetes cluster.
ClusterName string
Expand Down
14 changes: 13 additions & 1 deletion pkg/volume/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ func GenerateVolumeName(clusterName, pvName string, maxLength int) string {
}

// ChooseZone implements our heuristics for choosing a zone for volume creation based on the volume name
// Volumes are generally round-robin-ed across all active zones, using the hash of the PVC Name.
// However, if the PVCName ends with `-<integer>`, we will hash the prefix, and then add the integer to the hash.
// This means that a PetSet's volumes (`claimname-petsetname-id`) will spread across available zones,
// assuming the id values are consecutive.
func ChooseZoneForVolume(zones sets.String, pvcName string) string {
// We create the volume in a zone determined by the name
// Eventually the scheduler will coordinate placement into an available zone
Expand All @@ -202,7 +206,7 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string {

if pvcName == "" {
// We should always be called with a name; this shouldn't happen
glog.Warningf("No Name defined during volume create; choosing random zone")
glog.Warningf("No name defined during volume create; choosing random zone")

hash = rand.Uint32()
} else {
Expand Down Expand Up @@ -230,6 +234,14 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string {
hash = h.Sum32()
}

// Zones.List returns zones in a consistent order (sorted)
// We do have a potential failure case where volumes will not be properly spread,
// if the set of zones changes during PetSet volume creation. However, this is
// probably relatively unlikely because we expect the set of zones to be essentially
// static for clusters.
// Hopefully we can address this problem if/when we do full scheduler integration of
// PVC placement (which could also e.g. avoid putting volumes in overloaded or
// unhealthy zones)
zoneSlice := zones.List()
zone := zoneSlice[(hash+index)%uint32(len(zoneSlice))]

Expand Down

0 comments on commit dd94997

Please sign in to comment.