From bb995c2aeb8fbbf7c5a9547827b0afa9e1ad15ac Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Wed, 4 Aug 2021 13:56:39 -0400 Subject: [PATCH] nuke_prefixed_buckets deletes objects in batches speed up the cleanup by using delete_objects() with batches of 128 Signed-off-by: Casey Bodley --- s3tests_boto3/functional/__init__.py | 88 ++++++++++++---------------- 1 file changed, 39 insertions(+), 49 deletions(-) diff --git a/s3tests_boto3/functional/__init__.py b/s3tests_boto3/functional/__init__.py index ba8f9a7..ee91a85 100644 --- a/s3tests_boto3/functional/__init__.py +++ b/s3tests_boto3/functional/__init__.py @@ -74,38 +74,38 @@ def get_objects_list(bucket, client=None, prefix=None): return objects_list -def get_versioned_objects_list(bucket, client=None): - if client == None: - client = get_client() - response = client.list_object_versions(Bucket=bucket) - versioned_objects_list = [] +# generator function that returns object listings in batches, where each +# batch is a list of dicts compatible with delete_objects() +def list_versions(client, bucket, batch_size): + key_marker = '' + version_marker = '' + truncated = True + while truncated: + listing = client.list_object_versions( + Bucket=bucket, + KeyMarker=key_marker, + VersionIdMarker=version_marker, + MaxKeys=batch_size) - if 'Versions' in response: - contents = response['Versions'] - for obj in contents: - key = obj['Key'] - version_id = obj['VersionId'] - versioned_obj = (key,version_id) - versioned_objects_list.append(versioned_obj) + key_marker = listing.get('NextKeyMarker') + version_marker = listing.get('NextVersionIdMarker') + truncated = listing['IsTruncated'] - return versioned_objects_list + objs = listing.get('Versions', []) + listing.get('DeleteMarkers', []) + if len(objs): + yield [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs] -def get_delete_markers_list(bucket, client=None): - if client == None: - client = get_client() - response = client.list_object_versions(Bucket=bucket) - delete_markers = [] +def nuke_bucket(client, bucket): + batch_size = 128 + max_retain_date = None - if 'DeleteMarkers' in response: - contents = response['DeleteMarkers'] - for obj in contents: - key = obj['Key'] - version_id = obj['VersionId'] - versioned_obj = (key,version_id) - delete_markers.append(versioned_obj) - - return delete_markers + # list and delete objects in batches + for objects in list_versions(client, bucket, batch_size): + client.delete_objects(Bucket=bucket, + Delete={'Objects': objects, 'Quiet': True}, + BypassGovernanceRetention=True) + client.delete_bucket(Bucket=bucket) def nuke_prefixed_buckets(prefix, client=None): if client == None: @@ -114,28 +114,18 @@ def nuke_prefixed_buckets(prefix, client=None): buckets = get_buckets_list(client, prefix) err = None - if buckets != []: - for bucket_name in buckets: - objects_list = get_objects_list(bucket_name, client) - for obj in objects_list: - response = client.delete_object(Bucket=bucket_name,Key=obj) - versioned_objects_list = get_versioned_objects_list(bucket_name, client) - for obj in versioned_objects_list: - response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1]) - delete_markers = get_delete_markers_list(bucket_name, client) - for obj in delete_markers: - response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1]) - try: - response = client.delete_bucket(Bucket=bucket_name) - except ClientError as e: - # The exception shouldn't be raised when doing cleanup. Pass and continue - # the bucket cleanup process. Otherwise left buckets wouldn't be cleared - # resulting in some kind of resource leak. err is used to hint user some - # exception once occurred. - err = e - pass - if err: - raise err + for bucket_name in buckets: + try: + nuke_bucket(client, bucket_name) + except Exception as e: + # The exception shouldn't be raised when doing cleanup. Pass and continue + # the bucket cleanup process. Otherwise left buckets wouldn't be cleared + # resulting in some kind of resource leak. err is used to hint user some + # exception once occurred. + err = e + pass + if err: + raise err print('Done with cleanup of buckets in tests.')