2011-07-08 20:00:09 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
import gevent
|
2011-07-20 16:42:39 +00:00
|
|
|
import gevent.pool
|
2011-07-08 20:00:09 +00:00
|
|
|
import gevent.queue
|
|
|
|
import gevent.monkey; gevent.monkey.patch_all()
|
|
|
|
import optparse
|
2011-07-20 17:30:16 +00:00
|
|
|
import sys
|
2011-07-08 20:00:09 +00:00
|
|
|
import time
|
2011-07-20 18:32:52 +00:00
|
|
|
import traceback
|
2011-07-08 20:00:09 +00:00
|
|
|
import random
|
2011-07-20 17:30:16 +00:00
|
|
|
import yaml
|
2011-07-08 20:00:09 +00:00
|
|
|
|
|
|
|
import generate_objects
|
|
|
|
import realistic
|
|
|
|
import common
|
|
|
|
|
2011-07-20 17:30:16 +00:00
|
|
|
NANOSECOND = int(1e9)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
2011-07-22 18:54:19 +00:00
|
|
|
def reader(bucket, worker_id, file_names, queue):
|
2011-07-20 16:53:59 +00:00
|
|
|
while True:
|
2011-07-22 18:54:19 +00:00
|
|
|
objname = random.choice(file_names)
|
|
|
|
key = bucket.new_key(objname)
|
|
|
|
|
|
|
|
fp = realistic.FileVerifier()
|
|
|
|
result = dict(
|
|
|
|
type='r',
|
|
|
|
bucket=bucket.name,
|
|
|
|
key=key.name,
|
|
|
|
#TODO chunks
|
|
|
|
worker=worker_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
start = time.time()
|
|
|
|
try:
|
|
|
|
key.get_contents_to_file(fp)
|
|
|
|
except gevent.GreenletExit:
|
|
|
|
raise
|
|
|
|
except Exception as e:
|
|
|
|
# stop timer ASAP, even on errors
|
|
|
|
end = time.time()
|
|
|
|
result.update(
|
|
|
|
error=dict(
|
|
|
|
msg=str(e),
|
|
|
|
traceback=traceback.format_exc(),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
# certain kinds of programmer errors make this a busy
|
|
|
|
# loop; let parent greenlet get some time too
|
|
|
|
time.sleep(0)
|
|
|
|
else:
|
|
|
|
end = time.time()
|
2011-07-20 18:32:52 +00:00
|
|
|
|
2011-07-22 18:54:19 +00:00
|
|
|
if not fp.valid():
|
2011-07-20 18:32:52 +00:00
|
|
|
result.update(
|
|
|
|
error=dict(
|
2011-07-22 18:54:19 +00:00
|
|
|
msg='md5sum check failed',
|
2011-07-20 18:32:52 +00:00
|
|
|
),
|
|
|
|
)
|
2011-07-20 17:40:20 +00:00
|
|
|
|
2011-07-22 18:54:19 +00:00
|
|
|
elapsed = end - start
|
|
|
|
result.update(
|
|
|
|
start=start,
|
|
|
|
duration=int(round(elapsed * NANOSECOND)),
|
|
|
|
)
|
|
|
|
queue.put(result)
|
|
|
|
|
|
|
|
def writer(bucket, worker_id, file_names, files, queue):
|
2011-07-20 17:36:56 +00:00
|
|
|
while True:
|
|
|
|
fp = next(files)
|
2011-07-22 18:54:19 +00:00
|
|
|
objname = random.choice(file_names)
|
2011-07-20 17:40:20 +00:00
|
|
|
key = bucket.new_key(objname)
|
|
|
|
|
2011-07-20 18:32:52 +00:00
|
|
|
result = dict(
|
|
|
|
type='w',
|
|
|
|
bucket=bucket.name,
|
|
|
|
key=key.name,
|
|
|
|
#TODO chunks
|
|
|
|
worker=worker_id,
|
|
|
|
)
|
|
|
|
|
2011-07-20 16:50:10 +00:00
|
|
|
start = time.time()
|
2011-07-20 18:32:52 +00:00
|
|
|
try:
|
|
|
|
key.set_contents_from_file(fp)
|
|
|
|
except gevent.GreenletExit:
|
|
|
|
raise
|
|
|
|
except Exception as e:
|
|
|
|
# stop timer ASAP, even on errors
|
|
|
|
end = time.time()
|
|
|
|
result.update(
|
|
|
|
error=dict(
|
|
|
|
msg=str(e),
|
|
|
|
traceback=traceback.format_exc(),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
# certain kinds of programmer errors make this a busy
|
|
|
|
# loop; let parent greenlet get some time too
|
|
|
|
time.sleep(0)
|
|
|
|
else:
|
|
|
|
end = time.time()
|
2011-07-20 16:50:10 +00:00
|
|
|
|
2011-07-20 18:32:52 +00:00
|
|
|
elapsed = end - start
|
|
|
|
result.update(
|
|
|
|
start=start,
|
|
|
|
duration=int(round(elapsed * NANOSECOND)),
|
2011-07-20 16:50:10 +00:00
|
|
|
)
|
2011-07-20 18:32:52 +00:00
|
|
|
queue.put(result)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
|
|
|
def parse_options():
|
|
|
|
parser = optparse.OptionParser()
|
|
|
|
parser.add_option("-t", "--time", dest="duration", type="float",
|
|
|
|
help="duration to run tests (seconds)", default=5, metavar="SECS")
|
|
|
|
parser.add_option("-r", "--read", dest="num_readers", type="int",
|
|
|
|
help="number of reader threads", default=0, metavar="NUM")
|
|
|
|
parser.add_option("-w", "--write", dest="num_writers", type="int",
|
|
|
|
help="number of writer threads", default=2, metavar="NUM")
|
|
|
|
parser.add_option("-s", "--size", dest="file_size", type="float",
|
|
|
|
help="file size to use, in kb", default=1024, metavar="KB")
|
|
|
|
parser.add_option("-d", "--stddev", dest="stddev", type="float",
|
|
|
|
help="stddev of file size", default=0, metavar="KB")
|
2011-07-22 18:54:19 +00:00
|
|
|
parser.add_option("-n", "--numfiles", dest="num_files", type="int",
|
|
|
|
help="total number of files to write", default=1, metavar="NUM")
|
|
|
|
parser.add_option("--seed", dest="seed", type="int",
|
|
|
|
help="seed to use for random number generator", metavar="NUM")
|
2011-07-08 20:00:09 +00:00
|
|
|
parser.add_option("--no-cleanup", dest="cleanup", action="store_false",
|
|
|
|
help="skip cleaning up all created buckets", default=True)
|
|
|
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
2011-07-22 18:54:19 +00:00
|
|
|
def write_file(bucket, file_name, file):
|
|
|
|
"""
|
|
|
|
Write a single file to the bucket using the file_name.
|
|
|
|
This is used during the warmup to initialize the files.
|
|
|
|
"""
|
|
|
|
key = bucket.new_key(file_name)
|
|
|
|
key.set_contents_from_file(file)
|
|
|
|
|
2011-07-08 20:00:09 +00:00
|
|
|
def main():
|
2011-07-11 20:19:54 +00:00
|
|
|
# parse options
|
2011-07-08 20:00:09 +00:00
|
|
|
(options, args) = parse_options()
|
2011-07-11 20:19:54 +00:00
|
|
|
|
2011-07-08 20:00:09 +00:00
|
|
|
try:
|
|
|
|
# setup
|
|
|
|
common.setup()
|
|
|
|
bucket = common.get_new_bucket()
|
|
|
|
print "Created bucket: {name}".format(name=bucket.name)
|
2011-07-22 18:54:19 +00:00
|
|
|
file_names = list(realistic.names(
|
|
|
|
mean=15,
|
|
|
|
stddev=4,
|
|
|
|
seed=options.seed,
|
|
|
|
max_amount=options.num_files
|
|
|
|
))
|
|
|
|
files = realistic.files(
|
|
|
|
mean=options.file_size,
|
|
|
|
stddev=options.stddev,
|
|
|
|
seed=options.seed,
|
|
|
|
)
|
2011-07-08 20:00:09 +00:00
|
|
|
q = gevent.queue.Queue()
|
|
|
|
|
2011-07-22 18:54:19 +00:00
|
|
|
# warmup - get initial set of files uploaded
|
|
|
|
print "Uploading initial set of {num} files".format(num=options.num_files)
|
|
|
|
warmup_pool = gevent.pool.Pool(size=100)
|
|
|
|
for file_name in file_names:
|
|
|
|
file = next(files)
|
|
|
|
warmup_pool.spawn_link_exception(
|
|
|
|
write_file,
|
|
|
|
bucket=bucket,
|
|
|
|
file_name=file_name,
|
|
|
|
file=file,
|
|
|
|
)
|
|
|
|
warmup_pool.join()
|
|
|
|
|
2011-07-08 20:00:09 +00:00
|
|
|
# main work
|
2011-07-22 18:54:19 +00:00
|
|
|
print "Starting main worker loop."
|
2011-07-08 20:00:09 +00:00
|
|
|
print "Using file size: {size} +- {stddev}".format(size=options.file_size, stddev=options.stddev)
|
2011-07-22 18:54:19 +00:00
|
|
|
print "Spawning {w} writers and {r} readers...".format(r=options.num_readers, w=options.num_writers)
|
2011-07-20 16:42:39 +00:00
|
|
|
group = gevent.pool.Group()
|
|
|
|
for x in xrange(options.num_writers):
|
2011-07-20 18:04:32 +00:00
|
|
|
group.spawn_link_exception(
|
2011-07-20 17:54:17 +00:00
|
|
|
writer,
|
|
|
|
bucket=bucket,
|
2011-07-20 18:15:06 +00:00
|
|
|
worker_id=x,
|
2011-07-22 18:54:19 +00:00
|
|
|
file_names=file_names,
|
|
|
|
files=files,
|
2011-07-20 17:54:17 +00:00
|
|
|
queue=q,
|
|
|
|
)
|
2011-07-20 16:42:39 +00:00
|
|
|
for x in xrange(options.num_readers):
|
2011-07-20 18:04:32 +00:00
|
|
|
group.spawn_link_exception(
|
2011-07-20 17:54:17 +00:00
|
|
|
reader,
|
|
|
|
bucket=bucket,
|
2011-07-20 18:15:06 +00:00
|
|
|
worker_id=x,
|
2011-07-22 18:54:19 +00:00
|
|
|
file_names=file_names,
|
2011-07-20 17:54:17 +00:00
|
|
|
queue=q,
|
|
|
|
)
|
2011-07-20 16:50:10 +00:00
|
|
|
def stop():
|
|
|
|
group.kill(block=True)
|
|
|
|
q.put(StopIteration)
|
|
|
|
gevent.spawn_later(options.duration, stop)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
2011-07-20 17:30:16 +00:00
|
|
|
yaml.safe_dump_all(q, stream=sys.stdout, default_flow_style=False)
|
2011-07-08 20:00:09 +00:00
|
|
|
|
|
|
|
finally:
|
|
|
|
# cleanup
|
|
|
|
if options.cleanup:
|
|
|
|
common.teardown()
|