From e5f97830534233542ea5aea185191f9f52bd6797 Mon Sep 17 00:00:00 2001 From: Kyle Marsh Date: Fri, 8 Jul 2011 14:38:12 -0700 Subject: [PATCH] dho-qa: disentangle file generation from uploading Static load test script now provides separate functions for generating a list of random-file pointers and uploading those files to an S3 store. When run as a script it still does both, but you can call each function individually from a different script after loading the module. --- generate_objects.py | 100 ++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 54 deletions(-) diff --git a/generate_objects.py b/generate_objects.py index d4b9448..2c2dab0 100755 --- a/generate_objects.py +++ b/generate_objects.py @@ -17,11 +17,7 @@ import sys def parse_opts(): parser = OptionParser(); parser.add_option('-O' , '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE') - parser.add_option('-a' , '--access-key', dest='access_key', help='use S3 access key KEY', metavar='KEY') - parser.add_option('-s' , '--secret-key', dest='secret_key', help='use S3 secret key KEY', metavar='KEY') parser.add_option('-b' , '--bucket', dest='bucket', help='push objects to BUCKET', metavar='BUCKET') - parser.add_option('--checksum', dest='checksum', action='store_true', help='include the md5 checksum with the object urls') - parser.add_option('--host', dest='host', help='use S3 gateway at HOST', metavar='HOST') parser.add_option('--seed', dest='seed', help='optional seed for the random number generator') return parser.parse_args() @@ -38,63 +34,67 @@ def connect_s3(host, access_key, secret_key): return conn -def generate_objects(bucket, quantity, mean, stddev, seed, checksum=False, name_seed=None): - """Generate random objects with sizes across a normal distribution - specified by mean and standard deviation and write them to bucket. +def get_random_files(quantity, mean, stddev, seed): + """Create file-like objects with pseudorandom contents. + IN: + number of files to create + mean file size in bytes + standard deviation from mean file size + seed for PRNG + OUT: + list of file handles + """ + file_generator = realistic.files(mean, stddev, seed) + return [file_generator.next() for _ in xrange(quantity)] + + +def upload_objects(bucket, files, seed): + """Upload a bunch of files to an S3 bucket IN: boto S3 bucket object - Number of files - mean file size in bytes - standard deviation from mean file size - seed for RNG - flag to tell the method to append md5 checksums to the output - seed to use for the file names. defaults to use the other seed + list of file handles to upload + seed for PRNG OUT: - list of urls (strings) to objects valid for 1 hour. - If "checksum" is true, each output string consists of the url - followed by the md5 checksum. + list of boto S3 key objects """ - if name_seed == None: - name_seed = seed + keys = [] + name_generator = realistic.names(15, 4,seed=seed) - urls = [] - file_generator = realistic.files(mean, stddev, seed) - name_generator = realistic.names(15, 4,seed=name_seed) - for _ in xrange(quantity): - fp = file_generator.next() + for fp in files: print >> sys.stderr, 'sending file with size %dB' % fp.size key = Key(bucket) key.key = name_generator.next() key.set_contents_from_file(fp) - url = key.generate_url(30758400) #valid for 1 year - if checksum: - url += ' %s' % key.md5 - urls.append(url) + keys.append(key) - return urls + return keys def main(): '''To run the static content load test, make sure you've bootstrapped your test environment and set up your config.yml file, then run the following: - S3TEST_CONF=config.yml virtualenv/bin/python generate_objects.py -a S3_ACCESS_KEY -s S3_SECRET_KEY -O urls.txt --seed 1234 && siege -rc ./siege.conf -r 5 + S3TEST_CONF=config.yml virtualenv/bin/python generate_objects.py -O urls.txt --seed 1234 + + This creates a bucket with your S3 credentials (from config.yml) and + fills it with garbage objects as described in generate_objects.conf. + It writes a list of URLS to those objects to ./urls.txt. + + Once you have objcts in your bucket, run the siege benchmarking program: + siege -rc ./siege.conf -r 5 + + This tells siege to read the ./siege.conf config file which tells it to + use the urls in ./urls.txt and log to ./siege.log. It hits each url in + urls.txt 5 times (-r flag). - This creates a bucket with your S3 credentials and fills it with - garbage objects as described in generate_objects.conf. It writes a - list of URLS to those objects to ./urls.txt. siege then reads the - ./siege.conf config file which tells it to read from ./urls.txt and - log to ./siege.log and hammers each url in urls.txt 5 times (-r flag). - Results are printed to the terminal and written in CSV format to ./siege.log - - S3 credentials and output file may also be specified in config.yml - under s3.main and file_generation.url_file ''' (options, args) = parse_opts(); #SETUP random.seed(options.seed if options.seed else None) + conn = common.s3.main + if options.outfile: OUTFILE = open(options.outfile, 'w') elif common.config.file_generation.url_file: @@ -102,31 +102,23 @@ def main(): else: OUTFILE = sys.stdout - if options.access_key and options.secret_key: - host = options.host if options.host else common.config.s3.defaults.host - conn = connect_s3(host, options.access_key, options.secret_key) - else: - conn = common.s3.main - if options.bucket: - bucket = get_bucket(conn, options.bucket) + bucket = conn.create_bucket(options.bucket) else: bucket = common.get_new_bucket() - urls = [] - + keys = [] print >> OUTFILE, 'bucket: %s' % bucket.name print >> sys.stderr, 'setup complete, generating files' for profile in common.config.file_generation.groups: seed = random.random() - urls += generate_objects(bucket, profile[0], profile[1], profile[2], seed, options.checksum) - print >> sys.stderr, 'finished sending files. generating urls and sending to S3' + files = get_random_files(profile[0], profile[1], profile[2], seed) + keys += upload_objects(bucket, files, seed) + + print >> sys.stderr, 'finished sending files. generating urls' + for key in keys: + print >> OUTFILE, key.generate_url(30758400) #valid for 1 year - url_string = '\n'.join(urls) - url_key = Key(bucket) - url_key.key = 'urls' - url_key.set_contents_from_string(url_string) - print >> OUTFILE, url_string print >> sys.stderr, 'done'