2011-07-08 20:00:09 +00:00
|
|
|
import gevent
|
2011-07-20 16:42:39 +00:00
|
|
|
import gevent.pool
|
2011-07-08 20:00:09 +00:00
|
|
|
import gevent.queue
|
|
|
|
import gevent.monkey; gevent.monkey.patch_all()
|
2011-07-26 16:33:40 +00:00
|
|
|
import itertools
|
2011-07-08 20:00:09 +00:00
|
|
|
import optparse
|
2011-07-26 21:13:11 +00:00
|
|
|
import os
|
2011-07-20 17:30:16 +00:00
|
|
|
import sys
|
2011-07-08 20:00:09 +00:00
|
|
|
import time
|
2011-07-20 18:32:52 +00:00
|
|
|
import traceback
|
2011-07-08 20:00:09 +00:00
|
|
|
import random
|
2011-07-20 17:30:16 +00:00
|
|
|
import yaml
|
2011-07-08 20:00:09 +00:00
|
|
|
|
|
|
|
import realistic
|
|
|
|
import common
|
|
|
|
|
2011-07-20 17:30:16 +00:00
|
|
|
NANOSECOND = int(1e9)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
2011-07-26 22:17:35 +00:00
|
|
|
def reader(bucket, worker_id, file_names, queue, rand):
|
2011-07-20 16:53:59 +00:00
|
|
|
while True:
|
2011-07-26 22:17:35 +00:00
|
|
|
objname = rand.choice(file_names)
|
2011-07-22 18:54:19 +00:00
|
|
|
key = bucket.new_key(objname)
|
|
|
|
|
|
|
|
fp = realistic.FileVerifier()
|
|
|
|
result = dict(
|
|
|
|
type='r',
|
|
|
|
bucket=bucket.name,
|
|
|
|
key=key.name,
|
|
|
|
worker=worker_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
start = time.time()
|
|
|
|
try:
|
|
|
|
key.get_contents_to_file(fp)
|
|
|
|
except gevent.GreenletExit:
|
|
|
|
raise
|
|
|
|
except Exception as e:
|
|
|
|
# stop timer ASAP, even on errors
|
|
|
|
end = time.time()
|
|
|
|
result.update(
|
|
|
|
error=dict(
|
|
|
|
msg=str(e),
|
|
|
|
traceback=traceback.format_exc(),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
# certain kinds of programmer errors make this a busy
|
|
|
|
# loop; let parent greenlet get some time too
|
|
|
|
time.sleep(0)
|
|
|
|
else:
|
|
|
|
end = time.time()
|
2011-07-20 18:32:52 +00:00
|
|
|
|
2011-07-22 18:54:19 +00:00
|
|
|
if not fp.valid():
|
2011-07-20 18:32:52 +00:00
|
|
|
result.update(
|
|
|
|
error=dict(
|
2011-07-22 18:54:19 +00:00
|
|
|
msg='md5sum check failed',
|
2011-07-20 18:32:52 +00:00
|
|
|
),
|
|
|
|
)
|
2013-08-09 04:47:34 +00:00
|
|
|
else:
|
|
|
|
elapsed = end - start
|
|
|
|
result.update(
|
|
|
|
start=start,
|
|
|
|
duration=int(round(elapsed * NANOSECOND)),
|
|
|
|
chunks=fp.chunks,
|
|
|
|
)
|
2011-07-22 18:54:19 +00:00
|
|
|
queue.put(result)
|
|
|
|
|
2011-07-26 22:17:35 +00:00
|
|
|
def writer(bucket, worker_id, file_names, files, queue, rand):
|
2011-07-20 17:36:56 +00:00
|
|
|
while True:
|
|
|
|
fp = next(files)
|
2011-07-26 22:17:35 +00:00
|
|
|
objname = rand.choice(file_names)
|
2011-07-20 17:40:20 +00:00
|
|
|
key = bucket.new_key(objname)
|
|
|
|
|
2011-07-20 18:32:52 +00:00
|
|
|
result = dict(
|
|
|
|
type='w',
|
|
|
|
bucket=bucket.name,
|
|
|
|
key=key.name,
|
|
|
|
worker=worker_id,
|
|
|
|
)
|
|
|
|
|
2011-07-20 16:50:10 +00:00
|
|
|
start = time.time()
|
2011-07-20 18:32:52 +00:00
|
|
|
try:
|
|
|
|
key.set_contents_from_file(fp)
|
|
|
|
except gevent.GreenletExit:
|
|
|
|
raise
|
|
|
|
except Exception as e:
|
|
|
|
# stop timer ASAP, even on errors
|
|
|
|
end = time.time()
|
|
|
|
result.update(
|
|
|
|
error=dict(
|
|
|
|
msg=str(e),
|
|
|
|
traceback=traceback.format_exc(),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
# certain kinds of programmer errors make this a busy
|
|
|
|
# loop; let parent greenlet get some time too
|
|
|
|
time.sleep(0)
|
|
|
|
else:
|
|
|
|
end = time.time()
|
2011-07-20 16:50:10 +00:00
|
|
|
|
2013-08-09 04:47:34 +00:00
|
|
|
elapsed = end - start
|
|
|
|
result.update(
|
|
|
|
start=start,
|
|
|
|
duration=int(round(elapsed * NANOSECOND)),
|
|
|
|
chunks=fp.last_chunks,
|
|
|
|
)
|
|
|
|
|
2011-07-20 18:32:52 +00:00
|
|
|
queue.put(result)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
|
|
|
def parse_options():
|
2011-07-26 21:13:11 +00:00
|
|
|
parser = optparse.OptionParser(
|
|
|
|
usage='%prog [OPTS] <CONFIG_YAML',
|
|
|
|
)
|
2011-07-08 20:00:09 +00:00
|
|
|
parser.add_option("--no-cleanup", dest="cleanup", action="store_false",
|
|
|
|
help="skip cleaning up all created buckets", default=True)
|
|
|
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
2011-07-26 16:35:17 +00:00
|
|
|
def write_file(bucket, file_name, fp):
|
2011-07-22 18:54:19 +00:00
|
|
|
"""
|
|
|
|
Write a single file to the bucket using the file_name.
|
|
|
|
This is used during the warmup to initialize the files.
|
|
|
|
"""
|
|
|
|
key = bucket.new_key(file_name)
|
2011-07-26 16:35:17 +00:00
|
|
|
key.set_contents_from_file(fp)
|
2011-07-22 18:54:19 +00:00
|
|
|
|
2011-07-08 20:00:09 +00:00
|
|
|
def main():
|
2011-07-11 20:19:54 +00:00
|
|
|
# parse options
|
2011-07-08 20:00:09 +00:00
|
|
|
(options, args) = parse_options()
|
2011-07-11 20:19:54 +00:00
|
|
|
|
2011-07-26 21:13:11 +00:00
|
|
|
if os.isatty(sys.stdin.fileno()):
|
|
|
|
raise RuntimeError('Need configuration in stdin.')
|
|
|
|
config = common.read_config(sys.stdin)
|
|
|
|
conn = common.connect(config.s3)
|
|
|
|
bucket = None
|
|
|
|
|
2011-07-08 20:00:09 +00:00
|
|
|
try:
|
|
|
|
# setup
|
2011-07-22 21:18:09 +00:00
|
|
|
real_stdout = sys.stdout
|
|
|
|
sys.stdout = sys.stderr
|
2011-07-22 23:31:54 +00:00
|
|
|
|
|
|
|
# verify all required config items are present
|
2011-07-26 21:13:11 +00:00
|
|
|
if 'readwrite' not in config:
|
2011-07-26 16:51:06 +00:00
|
|
|
raise RuntimeError('readwrite section not found in config')
|
2011-07-26 21:13:11 +00:00
|
|
|
for item in ['readers', 'writers', 'duration', 'files', 'bucket']:
|
|
|
|
if item not in config.readwrite:
|
2011-07-26 16:51:06 +00:00
|
|
|
raise RuntimeError("Missing readwrite config item: {item}".format(item=item))
|
2011-07-22 23:31:54 +00:00
|
|
|
for item in ['num', 'size', 'stddev']:
|
2011-07-26 21:13:11 +00:00
|
|
|
if item not in config.readwrite.files:
|
2011-07-26 16:51:06 +00:00
|
|
|
raise RuntimeError("Missing readwrite config item: files.{item}".format(item=item))
|
2011-07-22 23:31:54 +00:00
|
|
|
|
2011-07-26 22:17:35 +00:00
|
|
|
seeds = dict(config.readwrite.get('random_seed', {}))
|
|
|
|
seeds.setdefault('main', random.randrange(2**32))
|
|
|
|
|
|
|
|
rand = random.Random(seeds['main'])
|
|
|
|
|
|
|
|
for name in ['names', 'contents', 'writer', 'reader']:
|
|
|
|
seeds.setdefault(name, rand.randrange(2**32))
|
|
|
|
|
|
|
|
print 'Using random seeds: {seeds}'.format(seeds=seeds)
|
|
|
|
|
2011-07-22 23:31:54 +00:00
|
|
|
# setup bucket and other objects
|
2011-07-26 21:13:11 +00:00
|
|
|
bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30)
|
|
|
|
bucket = conn.create_bucket(bucket_name)
|
2011-07-08 20:00:09 +00:00
|
|
|
print "Created bucket: {name}".format(name=bucket.name)
|
2013-08-02 23:49:20 +00:00
|
|
|
|
|
|
|
# check flag for deterministic file name creation
|
|
|
|
if not config.readwrite.get('deterministic_file_names'):
|
|
|
|
print 'Creating random file names'
|
|
|
|
file_names = realistic.names(
|
|
|
|
mean=15,
|
|
|
|
stddev=4,
|
|
|
|
seed=seeds['names'],
|
|
|
|
)
|
|
|
|
file_names = itertools.islice(file_names, config.readwrite.files.num)
|
|
|
|
file_names = list(file_names)
|
|
|
|
else:
|
|
|
|
print 'Creating file names that are deterministic'
|
|
|
|
file_names = []
|
|
|
|
for x in xrange(config.readwrite.files.num):
|
|
|
|
file_names.append('test_file_{num}'.format(num=x))
|
|
|
|
|
2011-12-30 19:05:12 +00:00
|
|
|
files = realistic.files2(
|
2011-07-26 21:13:11 +00:00
|
|
|
mean=1024 * config.readwrite.files.size,
|
|
|
|
stddev=1024 * config.readwrite.files.stddev,
|
2011-07-26 22:17:35 +00:00
|
|
|
seed=seeds['contents'],
|
2011-07-22 18:54:19 +00:00
|
|
|
)
|
2011-07-08 20:00:09 +00:00
|
|
|
q = gevent.queue.Queue()
|
|
|
|
|
2013-08-02 23:49:20 +00:00
|
|
|
|
|
|
|
# warmup - get initial set of files uploaded if there are any writers specified
|
|
|
|
if config.readwrite.writers > 0:
|
|
|
|
print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)
|
|
|
|
warmup_pool = gevent.pool.Pool(size=100)
|
|
|
|
for file_name in file_names:
|
|
|
|
fp = next(files)
|
|
|
|
warmup_pool.spawn_link_exception(
|
|
|
|
write_file,
|
|
|
|
bucket=bucket,
|
|
|
|
file_name=file_name,
|
|
|
|
fp=fp,
|
|
|
|
)
|
|
|
|
warmup_pool.join()
|
2011-07-22 18:54:19 +00:00
|
|
|
|
2011-07-08 20:00:09 +00:00
|
|
|
# main work
|
2011-07-22 18:54:19 +00:00
|
|
|
print "Starting main worker loop."
|
2011-07-26 21:13:11 +00:00
|
|
|
print "Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev)
|
|
|
|
print "Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers)
|
2011-07-20 16:42:39 +00:00
|
|
|
group = gevent.pool.Group()
|
2011-07-26 22:17:35 +00:00
|
|
|
rand_writer = random.Random(seeds['writer'])
|
2013-08-02 23:49:20 +00:00
|
|
|
|
|
|
|
# Don't create random files if deterministic_files_names is set and true
|
|
|
|
if not config.readwrite.get('deterministic_file_names'):
|
|
|
|
for x in xrange(config.readwrite.writers):
|
|
|
|
this_rand = random.Random(rand_writer.randrange(2**32))
|
|
|
|
group.spawn_link_exception(
|
|
|
|
writer,
|
|
|
|
bucket=bucket,
|
|
|
|
worker_id=x,
|
|
|
|
file_names=file_names,
|
|
|
|
files=files,
|
|
|
|
queue=q,
|
|
|
|
rand=this_rand,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Since the loop generating readers already uses config.readwrite.readers
|
|
|
|
# and the file names are already generated (randomly or deterministically),
|
|
|
|
# this loop needs no additional qualifiers. If zero readers are specified,
|
|
|
|
# it will behave as expected (no data is read)
|
2011-07-26 22:17:35 +00:00
|
|
|
rand_reader = random.Random(seeds['reader'])
|
2011-07-26 21:13:11 +00:00
|
|
|
for x in xrange(config.readwrite.readers):
|
2011-07-26 22:17:35 +00:00
|
|
|
this_rand = random.Random(rand_reader.randrange(2**32))
|
2011-07-20 18:04:32 +00:00
|
|
|
group.spawn_link_exception(
|
2011-07-20 17:54:17 +00:00
|
|
|
reader,
|
|
|
|
bucket=bucket,
|
2011-07-20 18:15:06 +00:00
|
|
|
worker_id=x,
|
2011-07-22 18:54:19 +00:00
|
|
|
file_names=file_names,
|
2011-07-20 17:54:17 +00:00
|
|
|
queue=q,
|
2011-07-26 22:17:35 +00:00
|
|
|
rand=this_rand,
|
2011-07-20 17:54:17 +00:00
|
|
|
)
|
2011-07-20 16:50:10 +00:00
|
|
|
def stop():
|
|
|
|
group.kill(block=True)
|
|
|
|
q.put(StopIteration)
|
2011-07-26 21:13:11 +00:00
|
|
|
gevent.spawn_later(config.readwrite.duration, stop)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
2013-08-09 04:47:34 +00:00
|
|
|
# wait for all the tests to finish
|
|
|
|
group.join()
|
|
|
|
print 'post-join, queue size {size}'.format(size=q.qsize())
|
|
|
|
|
|
|
|
if q.qsize() > 0:
|
|
|
|
for temp_dict in q:
|
|
|
|
if 'error' in temp_dict:
|
|
|
|
raise Exception('exception:\n\t{msg}\n\t{trace}'.format(
|
|
|
|
msg=temp_dict['error']['msg'],
|
|
|
|
trace=temp_dict['error']['traceback'])
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
yaml.safe_dump(temp_dict, stream=real_stdout)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
|
|
|
finally:
|
|
|
|
# cleanup
|
|
|
|
if options.cleanup:
|
2011-07-26 21:13:11 +00:00
|
|
|
if bucket is not None:
|
|
|
|
common.nuke_bucket(bucket)
|