| /* |
| Copyright 2015 The Kubernetes Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package images |
| |
| import ( |
| goerrors "errors" |
| "fmt" |
| "math" |
| "sort" |
| "sync" |
| "time" |
| |
| "k8s.io/klog" |
| |
| "k8s.io/api/core/v1" |
| "k8s.io/apimachinery/pkg/util/errors" |
| "k8s.io/apimachinery/pkg/util/sets" |
| "k8s.io/apimachinery/pkg/util/wait" |
| "k8s.io/client-go/tools/record" |
| statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" |
| "k8s.io/kubernetes/pkg/kubelet/container" |
| "k8s.io/kubernetes/pkg/kubelet/events" |
| "k8s.io/kubernetes/pkg/kubelet/util/sliceutils" |
| ) |
| |
| // StatsProvider is an interface for fetching stats used during image garbage |
| // collection. |
| type StatsProvider interface { |
| // ImageFsStats returns the stats of the image filesystem. |
| ImageFsStats() (*statsapi.FsStats, error) |
| } |
| |
| // ImageGCManager is an interface for managing lifecycle of all images. |
| // Implementation is thread-safe. |
| type ImageGCManager interface { |
| // Applies the garbage collection policy. Errors include being unable to free |
| // enough space as per the garbage collection policy. |
| GarbageCollect() error |
| |
| // Start async garbage collection of images. |
| Start() |
| |
| GetImageList() ([]container.Image, error) |
| |
| // Delete all unused images. |
| DeleteUnusedImages() error |
| } |
| |
| // ImageGCPolicy is a policy for garbage collecting images. Policy defines an allowed band in |
| // which garbage collection will be run. |
| type ImageGCPolicy struct { |
| // Any usage above this threshold will always trigger garbage collection. |
| // This is the highest usage we will allow. |
| HighThresholdPercent int |
| |
| // Any usage below this threshold will never trigger garbage collection. |
| // This is the lowest threshold we will try to garbage collect to. |
| LowThresholdPercent int |
| |
| // Minimum age at which an image can be garbage collected. |
| MinAge time.Duration |
| } |
| |
| type realImageGCManager struct { |
| // Container runtime |
| runtime container.Runtime |
| |
| // Records of images and their use. |
| imageRecords map[string]*imageRecord |
| imageRecordsLock sync.Mutex |
| |
| // The image garbage collection policy in use. |
| policy ImageGCPolicy |
| |
| // statsProvider provides stats used during image garbage collection. |
| statsProvider StatsProvider |
| |
| // Recorder for Kubernetes events. |
| recorder record.EventRecorder |
| |
| // Reference to this node. |
| nodeRef *v1.ObjectReference |
| |
| // Track initialization |
| initialized bool |
| |
| // imageCache is the cache of latest image list. |
| imageCache imageCache |
| |
| // sandbox image exempted from GC |
| sandboxImage string |
| } |
| |
| // imageCache caches latest result of ListImages. |
| type imageCache struct { |
| // sync.RWMutex is the mutex protects the image cache. |
| sync.RWMutex |
| // images is the image cache. |
| images []container.Image |
| } |
| |
| // set updates image cache. |
| func (i *imageCache) set(images []container.Image) { |
| i.Lock() |
| defer i.Unlock() |
| i.images = images |
| } |
| |
| // get gets a sorted (by image size) image list from image cache. |
| // There is a potentical data race in this function. See PR #60448 |
| // Because there is deepcopy function available currently, move sort |
| // function inside this function |
| func (i *imageCache) get() []container.Image { |
| i.Lock() |
| defer i.Unlock() |
| sort.Sort(sliceutils.ByImageSize(i.images)) |
| return i.images |
| } |
| |
| // Information about the images we track. |
| type imageRecord struct { |
| // Time when this image was first detected. |
| firstDetected time.Time |
| |
| // Time when we last saw this image being used. |
| lastUsed time.Time |
| |
| // Size of the image in bytes. |
| size int64 |
| } |
| |
| // NewImageGCManager instantiates a new ImageGCManager object. |
| func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) { |
| // Validate policy. |
| if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 { |
| return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent) |
| } |
| if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 { |
| return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent) |
| } |
| if policy.LowThresholdPercent > policy.HighThresholdPercent { |
| return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent) |
| } |
| im := &realImageGCManager{ |
| runtime: runtime, |
| policy: policy, |
| imageRecords: make(map[string]*imageRecord), |
| statsProvider: statsProvider, |
| recorder: recorder, |
| nodeRef: nodeRef, |
| initialized: false, |
| sandboxImage: sandboxImage, |
| } |
| |
| return im, nil |
| } |
| |
| func (im *realImageGCManager) Start() { |
| go wait.Until(func() { |
| // Initial detection make detected time "unknown" in the past. |
| var ts time.Time |
| if im.initialized { |
| ts = time.Now() |
| } |
| _, err := im.detectImages(ts) |
| if err != nil { |
| klog.Warningf("[imageGCManager] Failed to monitor images: %v", err) |
| } else { |
| im.initialized = true |
| } |
| }, 5*time.Minute, wait.NeverStop) |
| |
| // Start a goroutine periodically updates image cache. |
| // TODO(random-liu): Merge this with the previous loop. |
| go wait.Until(func() { |
| images, err := im.runtime.ListImages() |
| if err != nil { |
| klog.Warningf("[imageGCManager] Failed to update image list: %v", err) |
| } else { |
| im.imageCache.set(images) |
| } |
| }, 30*time.Second, wait.NeverStop) |
| |
| } |
| |
| // Get a list of images on this node |
| func (im *realImageGCManager) GetImageList() ([]container.Image, error) { |
| return im.imageCache.get(), nil |
| } |
| |
| func (im *realImageGCManager) detectImages(detectTime time.Time) (sets.String, error) { |
| imagesInUse := sets.NewString() |
| |
| // Always consider the container runtime pod sandbox image in use |
| imageRef, err := im.runtime.GetImageRef(container.ImageSpec{Image: im.sandboxImage}) |
| if err == nil && imageRef != "" { |
| imagesInUse.Insert(imageRef) |
| } |
| |
| images, err := im.runtime.ListImages() |
| if err != nil { |
| return imagesInUse, err |
| } |
| pods, err := im.runtime.GetPods(true) |
| if err != nil { |
| return imagesInUse, err |
| } |
| |
| // Make a set of images in use by containers. |
| for _, pod := range pods { |
| for _, container := range pod.Containers { |
| klog.V(5).Infof("Pod %s/%s, container %s uses image %s(%s)", pod.Namespace, pod.Name, container.Name, container.Image, container.ImageID) |
| imagesInUse.Insert(container.ImageID) |
| } |
| } |
| |
| // Add new images and record those being used. |
| now := time.Now() |
| currentImages := sets.NewString() |
| im.imageRecordsLock.Lock() |
| defer im.imageRecordsLock.Unlock() |
| for _, image := range images { |
| klog.V(5).Infof("Adding image ID %s to currentImages", image.ID) |
| currentImages.Insert(image.ID) |
| |
| // New image, set it as detected now. |
| if _, ok := im.imageRecords[image.ID]; !ok { |
| klog.V(5).Infof("Image ID %s is new", image.ID) |
| im.imageRecords[image.ID] = &imageRecord{ |
| firstDetected: detectTime, |
| } |
| } |
| |
| // Set last used time to now if the image is being used. |
| if isImageUsed(image.ID, imagesInUse) { |
| klog.V(5).Infof("Setting Image ID %s lastUsed to %v", image.ID, now) |
| im.imageRecords[image.ID].lastUsed = now |
| } |
| |
| klog.V(5).Infof("Image ID %s has size %d", image.ID, image.Size) |
| im.imageRecords[image.ID].size = image.Size |
| } |
| |
| // Remove old images from our records. |
| for image := range im.imageRecords { |
| if !currentImages.Has(image) { |
| klog.V(5).Infof("Image ID %s is no longer present; removing from imageRecords", image) |
| delete(im.imageRecords, image) |
| } |
| } |
| |
| return imagesInUse, nil |
| } |
| |
| func (im *realImageGCManager) GarbageCollect() error { |
| // Get disk usage on disk holding images. |
| fsStats, err := im.statsProvider.ImageFsStats() |
| if err != nil { |
| return err |
| } |
| |
| var capacity, available int64 |
| if fsStats.CapacityBytes != nil { |
| capacity = int64(*fsStats.CapacityBytes) |
| } |
| if fsStats.AvailableBytes != nil { |
| available = int64(*fsStats.AvailableBytes) |
| } |
| |
| if available > capacity { |
| klog.Warningf("available %d is larger than capacity %d", available, capacity) |
| available = capacity |
| } |
| |
| // Check valid capacity. |
| if capacity == 0 { |
| err := goerrors.New("invalid capacity 0 on image filesystem") |
| im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error()) |
| return err |
| } |
| |
| // If over the max threshold, free enough to place us at the lower threshold. |
| usagePercent := 100 - int(available*100/capacity) |
| if usagePercent >= im.policy.HighThresholdPercent { |
| amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available |
| klog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes down to the low threshold (%d%%).", usagePercent, im.policy.HighThresholdPercent, amountToFree, im.policy.LowThresholdPercent) |
| freed, err := im.freeSpace(amountToFree, time.Now()) |
| if err != nil { |
| return err |
| } |
| |
| if freed < amountToFree { |
| err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed) |
| im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error()) |
| return err |
| } |
| } |
| |
| return nil |
| } |
| |
| func (im *realImageGCManager) DeleteUnusedImages() error { |
| klog.Infof("attempting to delete unused images") |
| _, err := im.freeSpace(math.MaxInt64, time.Now()) |
| return err |
| } |
| |
| // Tries to free bytesToFree worth of images on the disk. |
| // |
| // Returns the number of bytes free and an error if any occurred. The number of |
| // bytes freed is always returned. |
| // Note that error may be nil and the number of bytes free may be less |
| // than bytesToFree. |
| func (im *realImageGCManager) freeSpace(bytesToFree int64, freeTime time.Time) (int64, error) { |
| imagesInUse, err := im.detectImages(freeTime) |
| if err != nil { |
| return 0, err |
| } |
| |
| im.imageRecordsLock.Lock() |
| defer im.imageRecordsLock.Unlock() |
| |
| // Get all images in eviction order. |
| images := make([]evictionInfo, 0, len(im.imageRecords)) |
| for image, record := range im.imageRecords { |
| if isImageUsed(image, imagesInUse) { |
| klog.V(5).Infof("Image ID %s is being used", image) |
| continue |
| } |
| images = append(images, evictionInfo{ |
| id: image, |
| imageRecord: *record, |
| }) |
| } |
| sort.Sort(byLastUsedAndDetected(images)) |
| |
| // Delete unused images until we've freed up enough space. |
| var deletionErrors []error |
| spaceFreed := int64(0) |
| for _, image := range images { |
| klog.V(5).Infof("Evaluating image ID %s for possible garbage collection", image.id) |
| // Images that are currently in used were given a newer lastUsed. |
| if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) { |
| klog.V(5).Infof("Image ID %s has lastUsed=%v which is >= freeTime=%v, not eligible for garbage collection", image.id, image.lastUsed, freeTime) |
| continue |
| } |
| |
| // Avoid garbage collect the image if the image is not old enough. |
| // In such a case, the image may have just been pulled down, and will be used by a container right away. |
| |
| if freeTime.Sub(image.firstDetected) < im.policy.MinAge { |
| klog.V(5).Infof("Image ID %s has age %v which is less than the policy's minAge of %v, not eligible for garbage collection", image.id, freeTime.Sub(image.firstDetected), im.policy.MinAge) |
| continue |
| } |
| |
| // Remove image. Continue despite errors. |
| klog.Infof("[imageGCManager]: Removing image %q to free %d bytes", image.id, image.size) |
| err := im.runtime.RemoveImage(container.ImageSpec{Image: image.id}) |
| if err != nil { |
| deletionErrors = append(deletionErrors, err) |
| continue |
| } |
| delete(im.imageRecords, image.id) |
| spaceFreed += image.size |
| |
| if spaceFreed >= bytesToFree { |
| break |
| } |
| } |
| |
| if len(deletionErrors) > 0 { |
| return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors)) |
| } |
| return spaceFreed, nil |
| } |
| |
| type evictionInfo struct { |
| id string |
| imageRecord |
| } |
| |
| type byLastUsedAndDetected []evictionInfo |
| |
| func (ev byLastUsedAndDetected) Len() int { return len(ev) } |
| func (ev byLastUsedAndDetected) Swap(i, j int) { ev[i], ev[j] = ev[j], ev[i] } |
| func (ev byLastUsedAndDetected) Less(i, j int) bool { |
| // Sort by last used, break ties by detected. |
| if ev[i].lastUsed.Equal(ev[j].lastUsed) { |
| return ev[i].firstDetected.Before(ev[j].firstDetected) |
| } |
| return ev[i].lastUsed.Before(ev[j].lastUsed) |
| } |
| |
| func isImageUsed(imageID string, imagesInUse sets.String) bool { |
| // Check the image ID. |
| if _, ok := imagesInUse[imageID]; ok { |
| return true |
| } |
| return false |
| } |