From bb995c2aeb8fbbf7c5a9547827b0afa9e1ad15ac Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Wed, 4 Aug 2021 13:56:39 -0400 Subject: [PATCH 1/3] nuke_prefixed_buckets deletes objects in batches speed up the cleanup by using delete_objects() with batches of 128 Signed-off-by: Casey Bodley --- s3tests_boto3/functional/__init__.py | 88 ++++++++++++---------------- 1 file changed, 39 insertions(+), 49 deletions(-) diff --git a/s3tests_boto3/functional/__init__.py b/s3tests_boto3/functional/__init__.py index ba8f9a7..ee91a85 100644 --- a/s3tests_boto3/functional/__init__.py +++ b/s3tests_boto3/functional/__init__.py @@ -74,38 +74,38 @@ def get_objects_list(bucket, client=None, prefix=None): return objects_list -def get_versioned_objects_list(bucket, client=None): - if client == None: - client = get_client() - response = client.list_object_versions(Bucket=bucket) - versioned_objects_list = [] +# generator function that returns object listings in batches, where each +# batch is a list of dicts compatible with delete_objects() +def list_versions(client, bucket, batch_size): + key_marker = '' + version_marker = '' + truncated = True + while truncated: + listing = client.list_object_versions( + Bucket=bucket, + KeyMarker=key_marker, + VersionIdMarker=version_marker, + MaxKeys=batch_size) - if 'Versions' in response: - contents = response['Versions'] - for obj in contents: - key = obj['Key'] - version_id = obj['VersionId'] - versioned_obj = (key,version_id) - versioned_objects_list.append(versioned_obj) + key_marker = listing.get('NextKeyMarker') + version_marker = listing.get('NextVersionIdMarker') + truncated = listing['IsTruncated'] - return versioned_objects_list + objs = listing.get('Versions', []) + listing.get('DeleteMarkers', []) + if len(objs): + yield [{'Key': o['Key'], 'VersionId': o['VersionId']} for o in objs] -def get_delete_markers_list(bucket, client=None): - if client == None: - client = get_client() - response = client.list_object_versions(Bucket=bucket) - delete_markers = [] +def nuke_bucket(client, bucket): + batch_size = 128 + max_retain_date = None - if 'DeleteMarkers' in response: - contents = response['DeleteMarkers'] - for obj in contents: - key = obj['Key'] - version_id = obj['VersionId'] - versioned_obj = (key,version_id) - delete_markers.append(versioned_obj) - - return delete_markers + # list and delete objects in batches + for objects in list_versions(client, bucket, batch_size): + client.delete_objects(Bucket=bucket, + Delete={'Objects': objects, 'Quiet': True}, + BypassGovernanceRetention=True) + client.delete_bucket(Bucket=bucket) def nuke_prefixed_buckets(prefix, client=None): if client == None: @@ -114,28 +114,18 @@ def nuke_prefixed_buckets(prefix, client=None): buckets = get_buckets_list(client, prefix) err = None - if buckets != []: - for bucket_name in buckets: - objects_list = get_objects_list(bucket_name, client) - for obj in objects_list: - response = client.delete_object(Bucket=bucket_name,Key=obj) - versioned_objects_list = get_versioned_objects_list(bucket_name, client) - for obj in versioned_objects_list: - response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1]) - delete_markers = get_delete_markers_list(bucket_name, client) - for obj in delete_markers: - response = client.delete_object(Bucket=bucket_name,Key=obj[0],VersionId=obj[1]) - try: - response = client.delete_bucket(Bucket=bucket_name) - except ClientError as e: - # The exception shouldn't be raised when doing cleanup. Pass and continue - # the bucket cleanup process. Otherwise left buckets wouldn't be cleared - # resulting in some kind of resource leak. err is used to hint user some - # exception once occurred. - err = e - pass - if err: - raise err + for bucket_name in buckets: + try: + nuke_bucket(client, bucket_name) + except Exception as e: + # The exception shouldn't be raised when doing cleanup. Pass and continue + # the bucket cleanup process. Otherwise left buckets wouldn't be cleared + # resulting in some kind of resource leak. err is used to hint user some + # exception once occurred. + err = e + pass + if err: + raise err print('Done with cleanup of buckets in tests.') From 9c4f15a47e1ee899a7dc453a4971a164e10521d8 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Wed, 4 Aug 2021 15:00:04 -0400 Subject: [PATCH 2/3] nuke_prefixed_buckets waits up to 60 seconds for object locks to expire objects locked in GOVERNANCE mode can be removed with BypassGovernanceRetention, but some tests may leave an object locked in COMPLIANCE mode, which blocks deletion until the retention period expires nuke_prefixed_buckets now checks the retention policy of objects that it fails to delete with AccessDenied, and will wait up to 60 seconds for locks to expire before retrying the deletes. if the wait exceeds 60 seconds, it instead throws an error without deleting the bucket instead of doing this in nuke_prefixed_buckets, we could potentially have each object-lock test case handle this manually, but that would add a separate delay to each test case Signed-off-by: Casey Bodley --- s3tests_boto3/functional/__init__.py | 35 +++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/s3tests_boto3/functional/__init__.py b/s3tests_boto3/functional/__init__.py index ee91a85..40c6b25 100644 --- a/s3tests_boto3/functional/__init__.py +++ b/s3tests_boto3/functional/__init__.py @@ -4,6 +4,8 @@ from botocore.client import Config from botocore.exceptions import ClientError from botocore.handlers import disable_signing import configparser +import datetime +import time import os import munch import random @@ -101,10 +103,41 @@ def nuke_bucket(client, bucket): # list and delete objects in batches for objects in list_versions(client, bucket, batch_size): - client.delete_objects(Bucket=bucket, + delete = client.delete_objects(Bucket=bucket, Delete={'Objects': objects, 'Quiet': True}, BypassGovernanceRetention=True) + # check for object locks on 403 AccessDenied errors + for err in delete.get('Errors', []): + if err.get('Code') != 'AccessDenied': + continue + try: + res = client.get_object_retention(Bucket=bucket, + Key=err['Key'], VersionId=err['VersionId']) + retain_date = res['Retention']['RetainUntilDate'] + if not max_retain_date or max_retain_date < retain_date: + max_retain_date = retain_date + except ClientError: + pass + + if max_retain_date: + # wait out the retention period (up to 60 seconds) + now = datetime.datetime.now(max_retain_date.tzinfo) + if max_retain_date > now: + delta = max_retain_date - now + if delta.total_seconds() > 60: + raise RuntimeError('bucket {} still has objects \ +locked for {} more seconds, not waiting for \ +bucket cleanup'.format(bucket, delta.total_seconds())) + print('nuke_bucket', bucket, 'waiting', delta.total_seconds(), + 'seconds for object locks to expire') + time.sleep(delta.total_seconds()) + + for objects in list_versions(client, bucket, batch_size): + client.delete_objects(Bucket=bucket, + Delete={'Objects': objects, 'Quiet': True}, + BypassGovernanceRetention=True) + client.delete_bucket(Bucket=bucket) def nuke_prefixed_buckets(prefix, client=None): From 8662815ebe2d8b105307edccf56b11ebb7b6824f Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Tue, 3 Aug 2021 17:04:15 -0400 Subject: [PATCH 3/3] object-lock: test changes between retention modes Signed-off-by: Casey Bodley --- s3tests_boto3/functional/test_s3.py | 60 +++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index 4482635..4479b6d 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -12789,6 +12789,66 @@ def test_object_lock_uploading_obj(): client.put_object_legal_hold(Bucket=bucket_name, Key=key, LegalHold={'Status':'OFF'}) client.delete_object(Bucket=bucket_name, Key=key, VersionId=response['VersionId'], BypassGovernanceRetention=True) +@attr(resource='object') +@attr(method='put') +@attr(operation='Test changing object retention mode from GOVERNANCE to COMPLIANCE with bypass') +@attr(assertion='succeeds') +@attr('object-lock') +def test_object_lock_changing_mode_from_governance_with_bypass(): + bucket_name = get_new_bucket_name() + key = 'file1' + client = get_client() + client.create_bucket(Bucket=bucket_name, ObjectLockEnabledForBucket=True) + # upload object with mode=GOVERNANCE + retain_until = datetime.datetime.now(pytz.utc) + datetime.timedelta(seconds=10) + client.put_object(Bucket=bucket_name, Body='abc', Key=key, ObjectLockMode='GOVERNANCE', + ObjectLockRetainUntilDate=retain_until) + # change mode to COMPLIANCE + retention = {'Mode':'COMPLIANCE', 'RetainUntilDate':retain_until} + client.put_object_retention(Bucket=bucket_name, Key=key, Retention=retention, BypassGovernanceRetention=True) + +@attr(resource='object') +@attr(method='put') +@attr(operation='Test changing object retention mode from GOVERNANCE to COMPLIANCE without bypass') +@attr(assertion='fails') +@attr('object-lock') +def test_object_lock_changing_mode_from_governance_without_bypass(): + bucket_name = get_new_bucket_name() + key = 'file1' + client = get_client() + client.create_bucket(Bucket=bucket_name, ObjectLockEnabledForBucket=True) + # upload object with mode=GOVERNANCE + retain_until = datetime.datetime.now(pytz.utc) + datetime.timedelta(seconds=10) + client.put_object(Bucket=bucket_name, Body='abc', Key=key, ObjectLockMode='GOVERNANCE', + ObjectLockRetainUntilDate=retain_until) + # try to change mode to COMPLIANCE + retention = {'Mode':'COMPLIANCE', 'RetainUntilDate':retain_until} + e = assert_raises(ClientError, client.put_object_retention, Bucket=bucket_name, Key=key, Retention=retention) + status, error_code = _get_status_and_error_code(e.response) + eq(status, 403) + eq(error_code, 'AccessDenied') + +@attr(resource='object') +@attr(method='put') +@attr(operation='Test changing object retention mode from COMPLIANCE to GOVERNANCE') +@attr(assertion='fails') +@attr('object-lock') +def test_object_lock_changing_mode_from_compliance(): + bucket_name = get_new_bucket_name() + key = 'file1' + client = get_client() + client.create_bucket(Bucket=bucket_name, ObjectLockEnabledForBucket=True) + # upload object with mode=COMPLIANCE + retain_until = datetime.datetime.now(pytz.utc) + datetime.timedelta(seconds=10) + client.put_object(Bucket=bucket_name, Body='abc', Key=key, ObjectLockMode='COMPLIANCE', + ObjectLockRetainUntilDate=retain_until) + # try to change mode to GOVERNANCE + retention = {'Mode':'GOVERNANCE', 'RetainUntilDate':retain_until} + e = assert_raises(ClientError, client.put_object_retention, Bucket=bucket_name, Key=key, Retention=retention) + status, error_code = _get_status_and_error_code(e.response) + eq(status, 403) + eq(error_code, 'AccessDenied') + @attr(resource='object') @attr(method='copy') @attr(operation='copy w/ x-amz-copy-source-if-match: the latest ETag')