Merge branch 'next'

Conflicts:
	s3tests/functional/test_s3.py

Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
This commit is contained in:
Yehuda Sadeh 2013-08-16 10:55:54 -07:00
commit a06f5784df
3 changed files with 384 additions and 95 deletions

View file

@ -9,10 +9,17 @@ import string
s3 = bunch.Bunch()
config = bunch.Bunch()
targets = bunch.Bunch()
# this will be assigned by setup()
prefix = None
calling_formats = dict(
ordinary=boto.s3.connection.OrdinaryCallingFormat(),
subdomain=boto.s3.connection.SubdomainCallingFormat(),
vhost=boto.s3.connection.VHostCallingFormat(),
)
def get_prefix():
assert prefix is not None
return prefix
@ -71,6 +78,120 @@ def nuke_prefixed_buckets(prefix):
print 'Done with cleanup of test buckets.'
class TargetConfig:
def __init__(self, cfg, section):
self.port = None
self.api_name = ''
self.is_master = False
self.is_secure = False
self.sync_agent_addr = None
self.sync_agent_port = 0
self.sync_meta_wait = 0
try:
self.api_name = cfg.get(section, 'api_name')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
pass
try:
self.port = cfg.getint(section, 'port')
except ConfigParser.NoOptionError:
pass
try:
self.host=cfg.get(section, 'host')
except ConfigParser.NoOptionError:
raise RuntimeError(
'host not specified for section {s}'.format(s=section)
)
try:
self.is_master=cfg.getboolean(section, 'is_master')
except ConfigParser.NoOptionError:
pass
try:
self.is_secure=cfg.getboolean(section, 'is_secure')
except ConfigParser.NoOptionError:
pass
try:
raw_calling_format = cfg.get(section, 'calling_format')
except ConfigParser.NoOptionError:
raw_calling_format = 'ordinary'
try:
self.sync_agent_addr = cfg.get(section, 'sync_agent_addr')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
pass
try:
self.sync_agent_port = cfg.getint(section, 'sync_agent_port')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
pass
try:
self.sync_meta_wait = cfg.getint(section, 'sync_meta_wait')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
pass
try:
self.calling_format = calling_formats[raw_calling_format]
except KeyError:
raise RuntimeError(
'calling_format unknown: %r' % raw_calling_format
)
class TargetConnection:
def __init__(self, conf, conn):
self.conf = conf
self.connection = conn
class RegionsInfo:
def __init__(self):
self.m = bunch.Bunch()
self.master = None
self.secondaries = []
def add(self, name, region_config):
self.m[name] = region_config
if (region_config.is_master):
if not self.master is None:
raise RuntimeError(
'multiple regions defined as master'
)
self.master = region_config
else:
self.secondaries.append(region_config)
def get(self, name):
return self.m[name]
def get(self):
return self.m
def iteritems(self):
return self.m.iteritems()
regions = RegionsInfo()
class RegionsConn:
def __init__(self):
self.m = bunch.Bunch()
self.default = None
self.master = None
self.secondaries = []
def iteritems(self):
return self.m.iteritems()
def add(self, name, conn):
self.m[name] = conn
if not self.default:
self.default = conn
if (conn.conf.is_master):
self.master = conn
else:
self.secondaries.append(conn)
# nosetests --processes=N with N>1 is safe
_multiprocess_can_split_ = True
@ -88,6 +209,8 @@ def setup():
cfg.readfp(f)
global prefix
global targets
try:
template = cfg.get('fixtures', 'bucket prefix')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
@ -96,11 +219,16 @@ def setup():
s3.clear()
config.clear()
calling_formats = dict(
ordinary=boto.s3.connection.OrdinaryCallingFormat(),
subdomain=boto.s3.connection.SubdomainCallingFormat(),
vhost=boto.s3.connection.VHostCallingFormat(),
)
for section in cfg.sections():
try:
(type_, name) = section.split(None, 1)
except ValueError:
continue
if type_ != 'region':
continue
regions.add(name, TargetConfig(cfg, section))
for section in cfg.sections():
try:
(type_, name) = section.split(None, 1)
@ -108,22 +236,9 @@ def setup():
continue
if type_ != 's3':
continue
try:
port = cfg.getint(section, 'port')
except ConfigParser.NoOptionError:
port = None
try:
raw_calling_format = cfg.get(section, 'calling_format')
except ConfigParser.NoOptionError:
raw_calling_format = 'ordinary'
try:
calling_format = calling_formats[raw_calling_format]
except KeyError:
raise RuntimeError(
'calling_format unknown: %r' % raw_calling_format
)
if len(regions.get()) == 0:
regions.add("default", TargetConfig(cfg, section))
config[name] = bunch.Bunch()
for var in [
@ -135,16 +250,21 @@ def setup():
config[name][var] = cfg.get(section, var)
except ConfigParser.NoOptionError:
pass
conn = boto.s3.connection.S3Connection(
aws_access_key_id=cfg.get(section, 'access_key'),
aws_secret_access_key=cfg.get(section, 'secret_key'),
is_secure=cfg.getboolean(section, 'is_secure'),
port=port,
host=cfg.get(section, 'host'),
# TODO test vhost calling format
calling_format=calling_format,
)
s3[name] = conn
targets[name] = RegionsConn()
for (k, conf) in regions.iteritems():
conn = boto.s3.connection.S3Connection(
aws_access_key_id=cfg.get(section, 'access_key'),
aws_secret_access_key=cfg.get(section, 'secret_key'),
is_secure=conf.is_secure,
port=conf.port,
host=conf.host,
# TODO test vhost calling format
calling_format=conf.calling_format,
)
targets[name].add(k, TargetConnection(conf, conn))
s3[name] = targets[name].default.connection
# WARNING! we actively delete all buckets we see with the prefix
# we've chosen! Choose your prefix with care, and don't reuse
@ -179,18 +299,20 @@ def get_new_bucket_name():
return name
def get_new_bucket(connection=None):
def get_new_bucket(target=None, name=None, headers=None):
"""
Get a bucket that exists and is empty.
Always recreates a bucket from scratch. This is useful to also
reset ACLs and such.
"""
if connection is None:
connection = s3.main
name = get_new_bucket_name()
if target is None:
target = targets.main.default
connection = target.connection
if name is None:
name = get_new_bucket_name()
# the only way for this to fail with a pre-existing bucket is if
# someone raced us between setup nuke_prefixed_buckets and here;
# ignore that as astronomically unlikely
bucket = connection.create_bucket(name)
bucket = connection.create_bucket(name, location=target.conf.api_name, headers=headers)
return bucket

View file

@ -4,6 +4,7 @@ import boto.s3.connection
import boto.s3.acl
import bunch
import datetime
import time
import email.utils
import isodate
import nose
@ -27,6 +28,7 @@ from urlparse import urlparse
from nose.tools import eq_ as eq
from nose.plugins.attrib import attr
from nose.plugins.skip import SkipTest
from .utils import assert_raises
import AnonymousAuth
@ -41,6 +43,7 @@ from . import (
get_new_bucket,
get_new_bucket_name,
s3,
targets,
config,
get_prefix,
)
@ -48,6 +51,8 @@ from . import (
NONEXISTENT_EMAIL = 'doesnotexist@dreamhost.com.invalid'
def not_eq(a, b):
assert a != b, "%r == %r" % (a, b)
def check_access_denied(fn, *args, **kwargs):
e = assert_raises(boto.exception.S3ResponseError, fn, *args, **kwargs)
@ -753,7 +758,7 @@ def test_object_write_to_nonexist_bucket():
def test_bucket_create_delete():
name = '{prefix}foo'.format(prefix=get_prefix())
print 'Trying bucket {name!r}'.format(name=name)
bucket = s3.main.create_bucket(name)
bucket = get_new_bucket(targets.main.default, name)
# make sure it's actually there
s3.main.get_bucket(bucket.name)
bucket.delete()
@ -2312,7 +2317,7 @@ def check_bad_bucket_name(name):
Attempt to create a bucket with a specified name, and confirm
that the request fails because of an invalid bucket name.
"""
e = assert_raises(boto.exception.S3ResponseError, s3.main.create_bucket, name)
e = assert_raises(boto.exception.S3ResponseError, get_new_bucket, targets.main.default, name)
eq(e.status, 400)
eq(e.reason, 'Bad Request')
eq(e.error_code, 'InvalidBucketName')
@ -2338,7 +2343,7 @@ def test_bucket_create_naming_bad_starts_nonalpha():
def test_bucket_create_naming_bad_short_empty():
# bucket creates where name is empty look like PUTs to the parent
# resource (with slash), hence their error response is different
e = assert_raises(boto.exception.S3ResponseError, s3.main.create_bucket, '')
e = assert_raises(boto.exception.S3ResponseError, get_new_bucket, targets.main.default, '')
eq(e.status, 405)
eq(e.reason, 'Method Not Allowed')
eq(e.error_code, 'MethodNotAllowed')
@ -2385,7 +2390,7 @@ def check_good_bucket_name(name, _prefix=None):
# should be very rare
if _prefix is None:
_prefix = get_prefix()
s3.main.create_bucket('{prefix}{name}'.format(
get_new_bucket(targets.main.default, '{prefix}{name}'.format(
prefix=_prefix,
name=name,
))
@ -2399,7 +2404,7 @@ def _test_bucket_create_naming_good_long(length):
prefix = get_prefix()
assert len(prefix) < 255
num = length - len(prefix)
s3.main.create_bucket('{prefix}{name}'.format(
get_new_bucket(targets.main.default, '{prefix}{name}'.format(
prefix=prefix,
name=num*'a',
))
@ -2474,7 +2479,7 @@ def test_bucket_list_long_name():
prefix = get_prefix()
length = 251
num = length - len(prefix)
bucket = s3.main.create_bucket('{prefix}{name}'.format(
bucket = get_new_bucket(targets.main.default, '{prefix}{name}'.format(
prefix=prefix,
name=num*'a',
))
@ -2572,9 +2577,9 @@ def test_bucket_create_naming_dns_dash_dot():
@attr(operation='re-create')
@attr(assertion='idempotent success')
def test_bucket_create_exists():
bucket = get_new_bucket()
bucket = get_new_bucket(targets.main.default)
# REST idempotency means this should be a nop
s3.main.create_bucket(bucket.name)
get_new_bucket(targets.main.default, bucket.name)
@attr(resource='bucket')
@ -2585,7 +2590,7 @@ def test_bucket_create_exists_nonowner():
# Names are shared across a global namespace. As such, no two
# users can create a bucket with that same name.
bucket = get_new_bucket()
e = assert_raises(boto.exception.S3CreateError, s3.alt.create_bucket, bucket.name)
e = assert_raises(boto.exception.S3CreateError, get_new_bucket, targets.alt.default, bucket.name)
eq(e.status, 409)
eq(e.reason, 'Conflict')
eq(e.error_code, 'BucketAlreadyExists')
@ -2908,7 +2913,7 @@ def test_object_acl_canned_authenticatedread():
@attr(operation='acl bucket-owner-read')
@attr(assertion='read back expected values')
def test_object_acl_canned_bucketownerread():
bucket = get_new_bucket(s3.main)
bucket = get_new_bucket(targets.main.default)
bucket.set_acl('public-read-write')
key = s3.alt.get_bucket(bucket.name).new_key('foo')
@ -2952,7 +2957,7 @@ def test_object_acl_canned_bucketownerread():
@attr(operation='acl bucket-owner-read')
@attr(assertion='read back expected values')
def test_object_acl_canned_bucketownerfullcontrol():
bucket = get_new_bucket(s3.main)
bucket = get_new_bucket(targets.main.default)
bucket.set_acl('public-read-write')
key = s3.alt.get_bucket(bucket.name).new_key('foo')
@ -3461,7 +3466,7 @@ def test_object_header_acl_grants():
@attr('fails_on_dho')
def test_bucket_header_acl_grants():
headers = _get_acl_header()
bucket = s3.main.create_bucket(get_prefix(), headers=headers)
bucket = get_new_bucket(targets.main.default, get_prefix(), headers)
policy = bucket.get_acl()
check_grants(
@ -3596,7 +3601,7 @@ def test_bucket_acl_revoke_all():
@attr('fails_on_rgw')
def test_logging_toggle():
bucket = get_new_bucket()
log_bucket = s3.main.create_bucket(bucket.name + '-log')
log_bucket = get_new_bucket(targets.main.default, bucket.name + '-log')
log_bucket.set_as_logging_target()
bucket.enable_logging(target_bucket=log_bucket, target_prefix=bucket.name)
bucket.disable_logging()
@ -3908,7 +3913,7 @@ def test_bucket_recreate_not_overriding():
names = [e.name for e in list(li)]
eq(names, key_names)
bucket2 = s3.main.create_bucket(bucket.name)
bucket2 = get_new_bucket(targets.main.default, bucket.name)
li = bucket.list()
@ -4015,7 +4020,7 @@ def test_object_copy_diff_bucket():
@attr(operation='copy from an inaccessible bucket')
@attr(assertion='fails w/AttributeError')
def test_object_copy_not_owned_bucket():
buckets = [get_new_bucket(), get_new_bucket(s3.alt)]
buckets = [get_new_bucket(), get_new_bucket(targets.alt.default)]
print repr(buckets[1])
key = buckets[0].new_key('foo123bar')
key.set_contents_from_string('foo')
@ -4269,7 +4274,8 @@ def test_stress_bucket_acls_changes():
def test_set_cors():
bucket = get_new_bucket()
cfg = CORSConfiguration()
cfg.add_rule('GET', '*')
cfg.add_rule('GET', '*.get')
cfg.add_rule('PUT', '*.put')
e = assert_raises(boto.exception.S3ResponseError, bucket.get_cors)
eq(e.status, 404)
@ -4277,6 +4283,25 @@ def test_set_cors():
bucket.set_cors(cfg)
new_cfg = bucket.get_cors()
eq(len(new_cfg), 2)
result = bunch.Bunch()
for c in new_cfg:
eq(len(c.allowed_method), 1)
eq(len(c.allowed_origin), 1)
result[c.allowed_method[0]] = c.allowed_origin[0]
eq(result['GET'], '*.get')
eq(result['PUT'], '*.put')
bucket.delete_cors()
e = assert_raises(boto.exception.S3ResponseError, bucket.get_cors)
eq(e.status, 404)
class FakeFile(object):
"""
file that simulates seek, tell, and current character
@ -4559,3 +4584,112 @@ def test_ranged_request_response_code():
eq(fetched_content, content[4:8])
eq(status, 206)
def check_can_test_multiregion():
if not targets.main.master or len(targets.main.secondaries) == 0:
raise SkipTest
@attr(resource='bucket')
@attr(method='get')
@attr(operation='create on one region, access in another')
@attr(assertion='can\'t access in other region')
@attr('multiregion')
def test_region_bucket_create_secondary_access_remove_master():
check_can_test_multiregion()
master_conn = targets.main.master.connection
for r in targets.main.secondaries:
conn = r.connection
bucket = get_new_bucket(r)
e = assert_raises(boto.exception.S3ResponseError, master_conn.get_bucket, bucket.name)
eq(e.status, 301)
e = assert_raises(boto.exception.S3ResponseError, master_conn.delete_bucket, bucket.name)
eq(e.status, 301)
conn.delete_bucket(bucket)
# syncs all the regions except for the one passed in
def region_sync_meta(targets, region):
for (k, r) in targets.iteritems():
if r == region:
continue
conf = r.conf
if conf.sync_agent_addr:
ret = requests.post('http://{addr}:{port}/metadata/incremental'.format(addr = conf.sync_agent_addr, port = conf.sync_agent_port))
eq(ret.status_code, 200)
if conf.sync_meta_wait:
time.sleep(conf.sync_meta_wait)
@attr(resource='bucket')
@attr(method='get')
@attr(operation='create on one region, access in another')
@attr(assertion='can\'t access in other region')
@attr('multiregion')
def test_region_bucket_create_master_access_remove_secondary():
check_can_test_multiregion()
master = targets.main.master
master_conn = master.connection
for r in targets.main.secondaries:
conn = r.connection
bucket = get_new_bucket(master)
region_sync_meta(targets.main, master)
e = assert_raises(boto.exception.S3ResponseError, conn.get_bucket, bucket.name)
eq(e.status, 301)
e = assert_raises(boto.exception.S3ResponseError, conn.delete_bucket, bucket.name)
eq(e.status, 301)
master_conn.delete_bucket(bucket)
@attr(resource='object')
@attr(method='copy')
@attr(operation='copy object between regions, verify')
@attr(assertion='can read object')
@attr('multiregion')
def test_region_copy_object():
check_can_test_multiregion()
for (k, dest) in targets.main.iteritems():
dest_conn = dest.connection
dest_bucket = get_new_bucket(dest)
print 'created new dest bucket ', dest_bucket.name
region_sync_meta(targets.main, dest)
for file_size in (1024, 1024 * 1024, 10 * 1024 * 1024,
100 * 1024 * 1024):
for (k2, r) in targets.main.iteritems():
if r == dest_conn:
continue
conn = r.connection
bucket = get_new_bucket(r)
print 'created bucket', bucket.name
region_sync_meta(targets.main, r)
content = 'testcontent'
key = bucket.new_key('testobj')
fp_a = FakeWriteFile(file_size, 'A')
key.set_contents_from_file(fp_a)
dest_key = dest_bucket.copy_key('testobj-dest', bucket.name, key.name)
# verify dest
_verify_atomic_key_data(dest_key, file_size, 'A')
bucket.delete_key(key.name)
print 'removing bucket', bucket.name
conn.delete_bucket(bucket)
dest_bucket.delete_key(dest_key.name)
dest_conn.delete_bucket(dest_bucket)

View file

@ -55,13 +55,13 @@ def reader(bucket, worker_id, file_names, queue, rand):
msg='md5sum check failed',
),
)
elapsed = end - start
result.update(
start=start,
duration=int(round(elapsed * NANOSECOND)),
chunks=fp.chunks,
)
else:
elapsed = end - start
result.update(
start=start,
duration=int(round(elapsed * NANOSECOND)),
chunks=fp.chunks,
)
queue.put(result)
def writer(bucket, worker_id, file_names, files, queue, rand):
@ -97,12 +97,13 @@ def writer(bucket, worker_id, file_names, files, queue, rand):
else:
end = time.time()
elapsed = end - start
result.update(
start=start,
duration=int(round(elapsed * NANOSECOND)),
chunks=fp.last_chunks,
)
elapsed = end - start
result.update(
start=start,
duration=int(round(elapsed * NANOSECOND)),
chunks=fp.last_chunks,
)
queue.put(result)
def parse_options():
@ -161,13 +162,23 @@ def main():
bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30)
bucket = conn.create_bucket(bucket_name)
print "Created bucket: {name}".format(name=bucket.name)
file_names = realistic.names(
mean=15,
stddev=4,
seed=seeds['names'],
)
file_names = itertools.islice(file_names, config.readwrite.files.num)
file_names = list(file_names)
# check flag for deterministic file name creation
if not config.readwrite.get('deterministic_file_names'):
print 'Creating random file names'
file_names = realistic.names(
mean=15,
stddev=4,
seed=seeds['names'],
)
file_names = itertools.islice(file_names, config.readwrite.files.num)
file_names = list(file_names)
else:
print 'Creating file names that are deterministic'
file_names = []
for x in xrange(config.readwrite.files.num):
file_names.append('test_file_{num}'.format(num=x))
files = realistic.files2(
mean=1024 * config.readwrite.files.size,
stddev=1024 * config.readwrite.files.stddev,
@ -175,18 +186,20 @@ def main():
)
q = gevent.queue.Queue()
# warmup - get initial set of files uploaded
print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)
warmup_pool = gevent.pool.Pool(size=100)
for file_name in file_names:
fp = next(files)
warmup_pool.spawn_link_exception(
write_file,
bucket=bucket,
file_name=file_name,
fp=fp,
)
warmup_pool.join()
# warmup - get initial set of files uploaded if there are any writers specified
if config.readwrite.writers > 0:
print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)
warmup_pool = gevent.pool.Pool(size=100)
for file_name in file_names:
fp = next(files)
warmup_pool.spawn_link_exception(
write_file,
bucket=bucket,
file_name=file_name,
fp=fp,
)
warmup_pool.join()
# main work
print "Starting main worker loop."
@ -194,17 +207,25 @@ def main():
print "Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers)
group = gevent.pool.Group()
rand_writer = random.Random(seeds['writer'])
for x in xrange(config.readwrite.writers):
this_rand = random.Random(rand_writer.randrange(2**32))
group.spawn_link_exception(
writer,
bucket=bucket,
worker_id=x,
file_names=file_names,
files=files,
queue=q,
rand=this_rand,
)
# Don't create random files if deterministic_files_names is set and true
if not config.readwrite.get('deterministic_file_names'):
for x in xrange(config.readwrite.writers):
this_rand = random.Random(rand_writer.randrange(2**32))
group.spawn_link_exception(
writer,
bucket=bucket,
worker_id=x,
file_names=file_names,
files=files,
queue=q,
rand=this_rand,
)
# Since the loop generating readers already uses config.readwrite.readers
# and the file names are already generated (randomly or deterministically),
# this loop needs no additional qualifiers. If zero readers are specified,
# it will behave as expected (no data is read)
rand_reader = random.Random(seeds['reader'])
for x in xrange(config.readwrite.readers):
this_rand = random.Random(rand_reader.randrange(2**32))
@ -221,7 +242,19 @@ def main():
q.put(StopIteration)
gevent.spawn_later(config.readwrite.duration, stop)
yaml.safe_dump_all(q, stream=real_stdout)
# wait for all the tests to finish
group.join()
print 'post-join, queue size {size}'.format(size=q.qsize())
if q.qsize() > 0:
for temp_dict in q:
if 'error' in temp_dict:
raise Exception('exception:\n\t{msg}\n\t{trace}'.format(
msg=temp_dict['error']['msg'],
trace=temp_dict['error']['traceback'])
)
else:
yaml.safe_dump(temp_dict, stream=real_stdout)
finally:
# cleanup