nuke_prefixed_buckets deletes objects in batches

speed up the cleanup by using delete_objects() with batches of 128

Signed-off-by: Casey Bodley <cbodley@redhat.com>
This commit is contained in:
Casey Bodley 2021-08-04 13:56:39 -04:00
parent 41ebef2540
commit bb995c2aeb

View file

@ -74,38 +74,38 @@ def get_objects_list(bucket, client=None, prefix=None):
return objects_list
def get_versioned_objects_list(bucket, client=None):
if client == None:
client = get_client()
response = client.list_object_versions(Bucket=bucket)
versioned_objects_list = []
# generator function that returns object listings in batches, where each
# batch is a list of dicts compatible with delete_objects()
def list_versions(client, bucket, batch_size):
key_marker = ''
version_marker = ''
truncated = True
while truncated:
listing = client.list_object_versions(
Bucket=bucket,
KeyMarker=key_marker,
VersionIdMarker=version_marker,
MaxKeys=batch_size)
if 'Versions' in response:
contents = response['Versions']
for obj in contents:
key = obj['Key']
version_id = obj['VersionId']
versioned_obj = (key,version_id)
versioned_objects_list.append(versioned_obj)
key_marker = listing.get('NextKeyMarker')
version_marker = listing.get('NextVersionIdMarker')
truncated = listing['IsTruncated']
return versioned_objects_list
objs = listing.get('Versions', []) + listing.get('DeleteMarkers', [])
if len(objs):
yield [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs]
def get_delete_markers_list(bucket, client=None):
if client == None:
client = get_client()
response = client.list_object_versions(Bucket=bucket)
delete_markers = []
def nuke_bucket(client, bucket):
batch_size = 128
max_retain_date = None
if 'DeleteMarkers' in response:
contents = response['DeleteMarkers']
for obj in contents:
key = obj['Key']
version_id = obj['VersionId']
versioned_obj = (key,version_id)
delete_markers.append(versioned_obj)
return delete_markers
# list and delete objects in batches
for objects in list_versions(client, bucket, batch_size):
client.delete_objects(Bucket=bucket,
Delete={'Objects': objects, 'Quiet': True},
BypassGovernanceRetention=True)
client.delete_bucket(Bucket=bucket)
def nuke_prefixed_buckets(prefix, client=None):
if client == None:
@ -114,20 +114,10 @@ def nuke_prefixed_buckets(prefix, client=None):
buckets = get_buckets_list(client, prefix)
err = None
if buckets != []:
for bucket_name in buckets:
objects_list = get_objects_list(bucket_name, client)
for obj in objects_list:
response = client.delete_object(Bucket=bucket_name,Key=obj)
versioned_objects_list = get_versioned_objects_list(bucket_name, client)
for obj in versioned_objects_list:
response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1])
delete_markers = get_delete_markers_list(bucket_name, client)
for obj in delete_markers:
response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1])
try:
response = client.delete_bucket(Bucket=bucket_name)
except ClientError as e:
nuke_bucket(client, bucket_name)
except Exception as e:
# The exception shouldn't be raised when doing cleanup. Pass and continue
# the bucket cleanup process. Otherwise left buckets wouldn't be cleared
# resulting in some kind of resource leak. err is used to hint user some