dho-qa: disentangle file generation from uploading

Static load test script now provides separate functions for generating a
list of random-file pointers and uploading those files to an S3 store.  When
run as a script it still does both, but you can call each function
individually from a different script after loading the module.
This commit is contained in:
Kyle Marsh 2011-07-08 14:38:12 -07:00
parent a1e5c50dc6
commit e5f9783053

View file

@ -17,11 +17,7 @@ import sys
def parse_opts(): def parse_opts():
parser = OptionParser(); parser = OptionParser();
parser.add_option('-O' , '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE') parser.add_option('-O' , '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE')
parser.add_option('-a' , '--access-key', dest='access_key', help='use S3 access key KEY', metavar='KEY')
parser.add_option('-s' , '--secret-key', dest='secret_key', help='use S3 secret key KEY', metavar='KEY')
parser.add_option('-b' , '--bucket', dest='bucket', help='push objects to BUCKET', metavar='BUCKET') parser.add_option('-b' , '--bucket', dest='bucket', help='push objects to BUCKET', metavar='BUCKET')
parser.add_option('--checksum', dest='checksum', action='store_true', help='include the md5 checksum with the object urls')
parser.add_option('--host', dest='host', help='use S3 gateway at HOST', metavar='HOST')
parser.add_option('--seed', dest='seed', help='optional seed for the random number generator') parser.add_option('--seed', dest='seed', help='optional seed for the random number generator')
return parser.parse_args() return parser.parse_args()
@ -38,63 +34,67 @@ def connect_s3(host, access_key, secret_key):
return conn return conn
def generate_objects(bucket, quantity, mean, stddev, seed, checksum=False, name_seed=None): def get_random_files(quantity, mean, stddev, seed):
"""Generate random objects with sizes across a normal distribution """Create file-like objects with pseudorandom contents.
specified by mean and standard deviation and write them to bucket.
IN: IN:
boto S3 bucket object number of files to create
Number of files
mean file size in bytes mean file size in bytes
standard deviation from mean file size standard deviation from mean file size
seed for RNG seed for PRNG
flag to tell the method to append md5 checksums to the output
seed to use for the file names. defaults to use the other seed
OUT: OUT:
list of urls (strings) to objects valid for 1 hour. list of file handles
If "checksum" is true, each output string consists of the url
followed by the md5 checksum.
""" """
if name_seed == None:
name_seed = seed
urls = []
file_generator = realistic.files(mean, stddev, seed) file_generator = realistic.files(mean, stddev, seed)
name_generator = realistic.names(15, 4,seed=name_seed) return [file_generator.next() for _ in xrange(quantity)]
for _ in xrange(quantity):
fp = file_generator.next()
def upload_objects(bucket, files, seed):
"""Upload a bunch of files to an S3 bucket
IN:
boto S3 bucket object
list of file handles to upload
seed for PRNG
OUT:
list of boto S3 key objects
"""
keys = []
name_generator = realistic.names(15, 4,seed=seed)
for fp in files:
print >> sys.stderr, 'sending file with size %dB' % fp.size print >> sys.stderr, 'sending file with size %dB' % fp.size
key = Key(bucket) key = Key(bucket)
key.key = name_generator.next() key.key = name_generator.next()
key.set_contents_from_file(fp) key.set_contents_from_file(fp)
url = key.generate_url(30758400) #valid for 1 year keys.append(key)
if checksum:
url += ' %s' % key.md5
urls.append(url)
return urls return keys
def main(): def main():
'''To run the static content load test, make sure you've bootstrapped your '''To run the static content load test, make sure you've bootstrapped your
test environment and set up your config.yml file, then run the following: test environment and set up your config.yml file, then run the following:
S3TEST_CONF=config.yml virtualenv/bin/python generate_objects.py -a S3_ACCESS_KEY -s S3_SECRET_KEY -O urls.txt --seed 1234 && siege -rc ./siege.conf -r 5 S3TEST_CONF=config.yml virtualenv/bin/python generate_objects.py -O urls.txt --seed 1234
This creates a bucket with your S3 credentials and fills it with This creates a bucket with your S3 credentials (from config.yml) and
garbage objects as described in generate_objects.conf. It writes a fills it with garbage objects as described in generate_objects.conf.
list of URLS to those objects to ./urls.txt. siege then reads the It writes a list of URLS to those objects to ./urls.txt.
./siege.conf config file which tells it to read from ./urls.txt and
log to ./siege.log and hammers each url in urls.txt 5 times (-r flag). Once you have objcts in your bucket, run the siege benchmarking program:
siege -rc ./siege.conf -r 5
This tells siege to read the ./siege.conf config file which tells it to
use the urls in ./urls.txt and log to ./siege.log. It hits each url in
urls.txt 5 times (-r flag).
Results are printed to the terminal and written in CSV format to Results are printed to the terminal and written in CSV format to
./siege.log ./siege.log
S3 credentials and output file may also be specified in config.yml
under s3.main and file_generation.url_file
''' '''
(options, args) = parse_opts(); (options, args) = parse_opts();
#SETUP #SETUP
random.seed(options.seed if options.seed else None) random.seed(options.seed if options.seed else None)
conn = common.s3.main
if options.outfile: if options.outfile:
OUTFILE = open(options.outfile, 'w') OUTFILE = open(options.outfile, 'w')
elif common.config.file_generation.url_file: elif common.config.file_generation.url_file:
@ -102,31 +102,23 @@ def main():
else: else:
OUTFILE = sys.stdout OUTFILE = sys.stdout
if options.access_key and options.secret_key:
host = options.host if options.host else common.config.s3.defaults.host
conn = connect_s3(host, options.access_key, options.secret_key)
else:
conn = common.s3.main
if options.bucket: if options.bucket:
bucket = get_bucket(conn, options.bucket) bucket = conn.create_bucket(options.bucket)
else: else:
bucket = common.get_new_bucket() bucket = common.get_new_bucket()
urls = [] keys = []
print >> OUTFILE, 'bucket: %s' % bucket.name print >> OUTFILE, 'bucket: %s' % bucket.name
print >> sys.stderr, 'setup complete, generating files' print >> sys.stderr, 'setup complete, generating files'
for profile in common.config.file_generation.groups: for profile in common.config.file_generation.groups:
seed = random.random() seed = random.random()
urls += generate_objects(bucket, profile[0], profile[1], profile[2], seed, options.checksum) files = get_random_files(profile[0], profile[1], profile[2], seed)
print >> sys.stderr, 'finished sending files. generating urls and sending to S3' keys += upload_objects(bucket, files, seed)
print >> sys.stderr, 'finished sending files. generating urls'
for key in keys:
print >> OUTFILE, key.generate_url(30758400) #valid for 1 year
url_string = '\n'.join(urls)
url_key = Key(bucket)
url_key.key = 'urls'
url_key.set_contents_from_string(url_string)
print >> OUTFILE, url_string
print >> sys.stderr, 'done' print >> sys.stderr, 'done'