s3-tests/generate_objects.py
Kyle Marsh 951dc0fcdb dho-qa: Add siege config file and document running siege
Adds siege.conf file for siege configuration options
Adds docstring to main function in generate_objects.py describing how to run
the static content load test.
2011-07-08 11:27:56 -07:00

136 lines
4.7 KiB
Python
Executable file

#! /usr/bin/python
from boto.s3.connection import OrdinaryCallingFormat
from boto.s3.connection import S3Connection
from boto.s3.key import Key
from optparse import OptionParser
from realistic import RandomContentFile
import realistic
import random
import yaml
import boto
import sys
DHO_HOST = 'objects.dreamhost.com'
def parse_opts():
parser = OptionParser();
parser.add_option('-O' , '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE')
parser.add_option('-a' , '--access-key', dest='access_key', help='use S3 access key KEY', metavar='KEY')
parser.add_option('-s' , '--secret-key', dest='secret_key', help='use S3 secret key KEY', metavar='KEY')
parser.add_option('-b' , '--bucket', dest='bucket', help='push objects to BUCKET', metavar='BUCKET')
parser.add_option('--checksum', dest='checksum', action='store_true', help='include the md5 checksum with the object urls')
parser.add_option('--host', dest='host', help='use S3 gateway at HOST', metavar='HOST')
parser.add_option('--seed', dest='seed', help='optional seed for the random number generator')
parser.set_defaults(host=DHO_HOST)
return parser.parse_args()
def parse_config(config_files):
configurations = []
for file in config_files:
FILE = open(file, 'r')
configurations = configurations + yaml.load(FILE.read())
FILE.close()
return configurations
def get_bucket(conn, existing_bucket):
if existing_bucket:
return conn.get_bucket(existing_bucket)
else:
goop = '%x' % random.getrandbits(64)
bucket = conn.create_bucket(goop)
bucket.set_acl('public-read')
return bucket
def connect_s3(host, access_key, secret_key):
conn = S3Connection(
calling_format = OrdinaryCallingFormat(),
is_secure = False,
host = host,
aws_access_key_id = access_key,
aws_secret_access_key = secret_key)
return conn
def generate_objects(bucket, quantity, mean, stddev, seed, checksum=False):
"""Generate random objects with sizes across a normal distribution
specified by mean and standard deviation and write them to bucket.
IN:
boto S3 bucket object
Number of files
mean file size in bytes
standard deviation from mean file size
seed for RNG
flag to tell the method to append md5 checksums to the output
OUT:
list of urls (strings) to objects valid for 1 hour.
If "checksum" is true, each output string consists of the url
followed by the md5 checksum.
"""
urls = []
file_generator = realistic.files(mean, stddev, seed)
name_generator = realistic.names(15, 4,seed=seed)
for _ in xrange(quantity):
fp = file_generator.next()
print >> sys.stderr, 'sending file with size %dB' % fp.size
key = Key(bucket)
key.key = name_generator.next()
key.set_contents_from_file(fp)
url = key.generate_url(3600) #valid for 1 hour
if checksum:
url += ' %s' % key.md5
urls.append(url)
return urls
def main():
'''To run the static content load test:
./generate_objects.py -a S3_ACCESS_KEY -s S3_SECRET_KEY -O urls.txt --seed 1234 generate_objects.conf && siege -rc ./siege.conf -r 5
This creates a bucket with your S3 credentials and fills it with
garbage objects as described in generate_objects.conf. It writes a
list of URLS to those objects to ./urls.txt. siege then reads the
./siege.conf config file which tells it to read from ./urls.txt and
log to ./siege.log and hammers each url in urls.txt 5 times (-r flag).
Results are printed to the terminal and written in CSV format to
./siege.log
'''
(options, args) = parse_opts();
#SETUP
random.seed(options.seed if options.seed else None)
if options.outfile:
OUTFILE = open(options.outfile, 'w')
else:
OUTFILE = sys.stdout
conn = connect_s3(options.host, options.access_key, options.secret_key)
bucket = get_bucket(conn, options.bucket)
urls = []
print >> OUTFILE, 'bucket: %s' % bucket.name
print >> sys.stderr, 'setup complete, generating files'
for profile in parse_config(args):
seed = random.random()
urls += generate_objects(bucket, profile[0], profile[1], profile[2], seed, options.checksum)
print >> sys.stderr, 'finished sending files. Saving urls to S3'
url_string = '\n'.join(urls)
url_key = Key(bucket)
url_key.key = 'urls'
url_key.set_contents_from_string(url_string)
print >> OUTFILE, url_string
print >> sys.stderr, 'done'
if __name__ == '__main__':
main()