diff --git a/request_decision_graph.yml b/request_decision_graph.yml deleted file mode 100644 index 9c1b12c..0000000 --- a/request_decision_graph.yml +++ /dev/null @@ -1,569 +0,0 @@ -# -# FUZZ testing uses a probabalistic grammar to generate -# pseudo-random requests which will be sent to a server -# over long periods of time, with the goal of turning up -# garbage-input and buffer-overflow sensitivities. -# -# Each state ... -# generates/chooses contents for variables -# chooses a next state (from a weighted set of options) -# -# A terminal state is one from which there are no successors, -# at which point a message is generated (from the variables) -# and sent to the server. -# -# The test program doesn't actually know (or care) what -# response should be returned ... since the goal is to -# crash the server. -# -start: - set: - garbage: - - '{random 10-3000 printable}' - - '{random 10-1000 binary}' - garbage_no_whitespace: - - '{random 10-3000 printable_no_whitespace}' - - '{random 10-1000 binary_no_whitespace}' - acl_header: - - 'private' - - 'public-read' - - 'public-read-write' - - 'authenticated-read' - - 'bucket-owner-read' - - 'bucket-owner-full-control' - - '{random 3000 letters}' - - '{random 100-1000 binary_no_whitespace}' - choices: - - bucket - - object - -bucket: - set: - urlpath: '/{bucket}' - choices: - - 13 bucket_get - - 8 bucket_put - - 5 bucket_delete - - bucket_garbage_method - -bucket_garbage_method: - set: - method: - - '{random 1-100 printable}' - - '{random 10-100 binary}' - bucket: - - '{bucket_readable}' - - '{bucket_not_readable}' - - '{bucket_writable}' - - '{bucket_not_writable}' - - '2 {garbage_no_whitespace}' - choices: - - bucket_get_simple - - bucket_get_filtered - - bucket_get_uploads - - bucket_put_create - - bucket_put_versioning - - bucket_put_simple - -bucket_delete: - set: - method: DELETE - bucket: - - '{bucket_writable}' - - '{bucket_not_writable}' - - '2 {garbage_no_whitespace}' - query: - - null - - policy - - website - - '2 {garbage_no_whitespace}' - choices: [] - -bucket_get: - set: - method: GET - bucket: - - '{bucket_readable}' - - '{bucket_not_readable}' - - '2 {garbage_no_whitespace}' - choices: - - 11 bucket_get_simple - - bucket_get_filtered - - bucket_get_uploads - -bucket_get_simple: - set: - query: - - acl - - policy - - location - - logging - - notification - - versions - - requestPayment - - versioning - - website - - '2 {garbage_no_whitespace}' - choices: [] - -bucket_get_uploads: - set: - delimiter: - - null - - '3 delimiter={garbage_no_whitespace}' - prefix: - - null - - '3 prefix={garbage_no_whitespace}' - key_marker: - - null - - 'key-marker={object_readable}' - - 'key-marker={object_not_readable}' - - 'key-marker={invalid_key}' - - 'key-marker={random 100-1000 printable_no_whitespace}' - max_uploads: - - null - - 'max-uploads={random 1-5 binary_no_whitespace}' - - 'max-uploads={random 1-1000 digits}' - upload_id_marker: - - null - - '3 upload-id-marker={random 0-1000 printable_no_whitespace}' - query: - - 'uploads' - - 'uploads&{delimiter}&{prefix}' - - 'uploads&{max_uploads}&{key_marker}&{upload_id_marker}' - - '2 {garbage_no_whitespace}' - choices: [] - -bucket_get_filtered: - set: - delimiter: - - 'delimiter={garbage_no_whitespace}' - prefix: - - 'prefix={garbage_no_whitespace}' - marker: - - 'marker={object_readable}' - - 'marker={object_not_readable}' - - 'marker={invalid_key}' - - 'marker={random 100-1000 printable_no_whitespace}' - max_keys: - - 'max-keys={random 1-5 binary_no_whitespace}' - - 'max-keys={random 1-1000 digits}' - query: - - null - - '{delimiter}&{prefix}' - - '{max-keys}&{marker}' - - '2 {garbage_no_whitespace}' - choices: [] - -bucket_put: - set: - bucket: - - '{bucket_writable}' - - '{bucket_not_writable}' - - '2 {garbage_no_whitespace}' - method: PUT - choices: - - bucket_put_simple - - bucket_put_create - - bucket_put_versioning - -bucket_put_create: - set: - body: - - '2 {garbage}' - - '{random 2-10 binary}' - headers: - - ['0-5', 'x-amz-acl', '{acl_header}'] - choices: [] - -bucket_put_versioning: - set: - body: - - '{garbage}' - - '4 {versioning_status}{mfa_delete_body}' - mfa_delete_body: - - null - - '{random 2-10 binary}' - - '{random 2000-3000 printable}' - versioning_status: - - null - - '{random 2-10 binary}' - - '{random 2000-3000 printable}' - mfa_header: - - '{random 10-1000 printable_no_whitespace} {random 10-1000 printable_no_whitespace}' - headers: - - ['0-1', 'x-amz-mfa', '{mfa_header}'] - choices: [] - -bucket_put_simple: - set: - body: - - '{acl_body}' - - '{policy_body}' - - '{logging_body}' - - '{notification_body}' - - '{request_payment_body}' - - '{website_body}' - acl_body: - - null - - '{owner}{acl}' - owner: - - null - - '7 {id}{display_name}' - id: - - null - - '{random 10-200 binary}' - - '{random 1000-3000 printable}' - display_name: - - null - - '2 {random 10-200 binary}' - - '2 {random 1000-3000 printable}' - - '2 {random 10-300 letters}@{random 10-300 letters}.{random 2-4 letters}' - acl: - - null - - '10 {grantee}{permission}' - grantee: - - null - - '7 {id}{display_name}' - permission: - - null - - '7 {permission_value}' - permission_value: - - '2 {garbage}' - - FULL_CONTROL - - WRITE - - WRITE_ACP - - READ - - READ_ACP - policy_body: - - null - - '2 {garbage}' - logging_body: - - null - - '' - - '{bucket}{target_prefix}{target_grants}' - target_prefix: - - null - - '{random 10-1000 printable}' - - '{random 10-1000 binary}' - target_grants: - - null - - '10 {grantee}{permission}' - notification_body: - - null - - '' - - '2 {topic}{event}' - topic: - - null - - '2 {garbage}' - event: - - null - - 's3:ReducedRedundancyLostObject' - - '2 {garbage}' - request_payment_body: - - null - - '{payer}' - payer: - - Requester - - BucketOwner - - '2 {garbage}' - website_body: - - null - - '{index_doc}{error_doc}{routing_rules}' - - '{index_doc}{error_doc}{routing_rules}' - index_doc: - - null - - '{filename}' - - '{filename}' - filename: - - null - - '2 {garbage}' - - '{random 2-10 printable}.html' - - '{random 100-1000 printable}.html' - - '{random 100-1000 printable_no_whitespace}.html' - error_doc: - - null - - '{filename}' - - '{filename}' - routing_rules: - - null - - ['0-10', '{routing_rules_content}'] - routing_rules_content: - - null - - ['0-1000', '{routing_rule}'] - routing_rule: - - null - - ['0-2', '{routing_rule_condition}{routing_rule_redirect}'] - routing_rule_condition: - - null - - ['0-10', '{KeyPrefixEquals}{HttpErrorCodeReturnedEquals}'] - KeyPrefixEquals: - - null - - ['0-2', '{filename}'] - HttpErrorCodeReturnedEquals: - - null - - ['0-2', '{HttpErrorCode}'] - HttpErrorCode: - - null - - '2 {garbage}' - - '{random 1-10 digits}' - - '{random 1-100 printable}' - routing_rule_redirect: - - null - - '{protocol}{hostname}{ReplaceKeyPrefixWith}{ReplaceKeyWith}{HttpRedirectCode}' - protocol: - - null - - 'http' - - 'https' - - ['1-5', '{garbage}'] - - ['1-5', '{filename}'] - hostname: - - null - - ['1-5', '{hostname_val}'] - - ['1-5', '{garbage}'] - hostname_val: - - null - - '{random 1-255 printable_no_whitespace}' - - '{random 1-255 printable}' - - '{random 1-255 punctuation}' - - '{random 1-255 whitespace}' - - '{garbage}' - ReplaceKeyPrefixWith: - - null - - ['1-5', '{filename}'] - HttpRedirectCode: - - null - - ['1-5', '{random 1-10 digits}'] - - ['1-5', '{random 1-100 printable}'] - - ['1-5', '{filename}'] - - choices: [] - -object: - set: - urlpath: '/{bucket}/{object}' - - range_header: - - null - - 'bytes={random 1-2 digits}-{random 1-4 digits}' - - 'bytes={random 1-1000 binary_no_whitespace}' - if_modified_since_header: - - null - - '2 {garbage_no_whitespace}' - if_match_header: - - null - - '2 {garbage_no_whitespace}' - if_none_match_header: - - null - - '2 {garbage_no_whitespace}' - choices: - - object_delete - - object_get - - object_put - - object_head - - object_garbage_method - -object_garbage_method: - set: - method: - - '{random 1-100 printable}' - - '{random 10-100 binary}' - bucket: - - '{bucket_readable}' - - '{bucket_not_readable}' - - '{bucket_writable}' - - '{bucket_not_writable}' - - '2 {garbage_no_whitespace}' - object: - - '{object_readable}' - - '{object_not_readable}' - - '{object_writable}' - - '{object_not_writable}' - - '2 {garbage_no_whitespace}' - choices: - - object_get_query - - object_get_head_simple - -object_delete: - set: - method: DELETE - bucket: - - '5 {bucket_writable}' - - '{bucket_not_writable}' - - '{garbage_no_whitespace}' - object: - - '{object_writable}' - - '{object_not_writable}' - - '2 {garbage_no_whitespace}' - choices: [] - -object_get: - set: - method: GET - bucket: - - '5 {bucket_readable}' - - '{bucket_not_readable}' - - '{garbage_no_whitespace}' - object: - - '{object_readable}' - - '{object_not_readable}' - - '{garbage_no_whitespace}' - choices: - - 5 object_get_head_simple - - 2 object_get_query - -object_get_query: - set: - query: - - 'torrent' - - 'acl' - choices: [] - -object_get_head_simple: - set: {} - headers: - - ['0-1', 'range', '{range_header}'] - - ['0-1', 'if-modified-since', '{if_modified_since_header}'] - - ['0-1', 'if-unmodified-since', '{if_modified_since_header}'] - - ['0-1', 'if-match', '{if_match_header}'] - - ['0-1', 'if-none-match', '{if_none_match_header}'] - choices: [] - -object_head: - set: - method: HEAD - bucket: - - '5 {bucket_readable}' - - '{bucket_not_readable}' - - '{garbage_no_whitespace}' - object: - - '{object_readable}' - - '{object_not_readable}' - - '{garbage_no_whitespace}' - choices: - - object_get_head_simple - -object_put: - set: - method: PUT - bucket: - - '5 {bucket_writable}' - - '{bucket_not_writable}' - - '{garbage_no_whitespace}' - object: - - '{object_writable}' - - '{object_not_writable}' - - '{garbage_no_whitespace}' - cache_control: - - null - - '{garbage_no_whitespace}' - - 'no-cache' - content_disposition: - - null - - '{garbage_no_whitespace}' - content_encoding: - - null - - '{garbage_no_whitespace}' - content_length: - - '{random 1-20 digits}' - - '{garbage_no_whitespace}' - content_md5: - - null - - '{garbage_no_whitespace}' - content_type: - - null - - 'binary/octet-stream' - - '{garbage_no_whitespace}' - expect: - - null - - '100-continue' - - '{garbage_no_whitespace}' - expires: - - null - - '{random 1-10000000 digits}' - - '{garbage_no_whitespace}' - meta_key: - - null - - 'foo' - - '{garbage_no_whitespace}' - meta_value: - - null - - '{garbage_no_whitespace}' - choices: - - object_put_simple - - object_put_acl - - object_put_copy - -object_put_simple: - set: {} - headers: - - ['0-1', 'cache-control', '{cache_control}'] - - ['0-1', 'content-disposition', '{content_disposition}'] - - ['0-1', 'content-encoding', '{content_encoding}'] - - ['0-1', 'content-length', '{content_length}'] - - ['0-1', 'content-md5', '{content_md5}'] - - ['0-1', 'content-type', '{content_type}'] - - ['0-1', 'expect', '{expect}'] - - ['0-1', 'expires', '{expires}'] - - ['0-1', 'x-amz-acl', '{acl_header}'] - - ['0-6', 'x-amz-meta-{meta_key}', '{meta_value}'] - choices: [] - -object_put_acl: - set: - query: 'acl' - body: - - null - - '2 {garbage}' - - '{owner}{acl}' - owner: - - null - - '7 {id}{display_name}' - id: - - null - - '{random 10-200 binary}' - - '{random 1000-3000 printable}' - display_name: - - null - - '2 {random 10-200 binary}' - - '2 {random 1000-3000 printable}' - - '2 {random 10-300 letters}@{random 10-300 letters}.{random 2-4 letters}' - acl: - - null - - '10 {grantee}{permission}' - grantee: - - null - - '7 {id}{display_name}' - permission: - - null - - '7 {permission_value}' - permission_value: - - '2 {garbage}' - - FULL_CONTROL - - WRITE - - WRITE_ACP - - READ - - READ_ACP - headers: - - ['0-1', 'cache-control', '{cache_control}'] - - ['0-1', 'content-disposition', '{content_disposition}'] - - ['0-1', 'content-encoding', '{content_encoding}'] - - ['0-1', 'content-length', '{content_length}'] - - ['0-1', 'content-md5', '{content_md5}'] - - ['0-1', 'content-type', '{content_type}'] - - ['0-1', 'expect', '{expect}'] - - ['0-1', 'expires', '{expires}'] - - ['0-1', 'x-amz-acl', '{acl_header}'] - choices: [] - -object_put_copy: - set: {} - headers: - - ['1-1', 'x-amz-copy-source', '{source_object}'] - - ['0-1', 'x-amz-acl', '{acl_header}'] - - ['0-1', 'x-amz-metadata-directive', '{metadata_directive}'] - - ['0-1', 'x-amz-copy-source-if-match', '{if_match_header}'] - - ['0-1', 'x-amz-copy-source-if-none-match', '{if_none_match_header}'] - - ['0-1', 'x-amz-copy-source-if-modified-since', '{if_modified_since_header}'] - - ['0-1', 'x-amz-copy-source-if-unmodified-since', '{if_modified_since_header}'] - choices: [] diff --git a/s3tests/analysis/__init__.py b/s3tests/analysis/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/s3tests/analysis/rwstats.py b/s3tests/analysis/rwstats.py deleted file mode 100644 index fb341eb..0000000 --- a/s3tests/analysis/rwstats.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/python -import sys -import os -import yaml -import optparse - -NANOSECONDS = int(1e9) - -# Output stats in a format similar to siege -# see http://www.joedog.org/index/siege-home -OUTPUT_FORMAT = """Stats for type: [{type}] -Transactions: {trans:>11} hits -Availability: {avail:>11.2f} % -Elapsed time: {elapsed:>11.2f} secs -Data transferred: {data:>11.2f} MB -Response time: {resp_time:>11.2f} secs -Transaction rate: {trans_rate:>11.2f} trans/sec -Throughput: {data_rate:>11.2f} MB/sec -Concurrency: {conc:>11.2f} -Successful transactions: {trans_success:>11} -Failed transactions: {trans_fail:>11} -Longest transaction: {trans_long:>11.2f} -Shortest transaction: {trans_short:>11.2f} -""" - -def parse_options(): - usage = "usage: %prog [options]" - parser = optparse.OptionParser(usage=usage) - parser.add_option( - "-f", "--file", dest="input", metavar="FILE", - help="Name of input YAML file. Default uses sys.stdin") - parser.add_option( - "-v", "--verbose", dest="verbose", action="store_true", - help="Enable verbose output") - - (options, args) = parser.parse_args() - - if not options.input and os.isatty(sys.stdin.fileno()): - parser.error("option -f required if no data is provided " - "in stdin") - - return (options, args) - -def main(): - (options, args) = parse_options() - - total = {} - durations = {} - min_time = {} - max_time = {} - errors = {} - success = {} - - calculate_stats(options, total, durations, min_time, max_time, errors, - success) - print_results(total, durations, min_time, max_time, errors, success) - -def calculate_stats(options, total, durations, min_time, max_time, errors, - success): - print('Calculating statistics...') - - f = sys.stdin - if options.input: - f = file(options.input, 'r') - - for item in yaml.safe_load_all(f): - type_ = item.get('type') - if type_ not in ('r', 'w'): - continue # ignore any invalid items - - if 'error' in item: - errors[type_] = errors.get(type_, 0) + 1 - continue # skip rest of analysis for this item - else: - success[type_] = success.get(type_, 0) + 1 - - # parse the item - data_size = item['chunks'][-1][0] - duration = item['duration'] - start = item['start'] - end = start + duration / float(NANOSECONDS) - - if options.verbose: - print("[{type}] POSIX time: {start:>18.2f} - {end:<18.2f} " \ - "{data:>11.2f} KB".format( - type=type_, - start=start, - end=end, - data=data_size / 1024.0, # convert to KB - )) - - # update time boundaries - prev = min_time.setdefault(type_, start) - if start < prev: - min_time[type_] = start - prev = max_time.setdefault(type_, end) - if end > prev: - max_time[type_] = end - - # save the duration - if type_ not in durations: - durations[type_] = [] - durations[type_].append(duration) - - # add to running totals - total[type_] = total.get(type_, 0) + data_size - -def print_results(total, durations, min_time, max_time, errors, success): - for type_ in list(total.keys()): - trans_success = success.get(type_, 0) - trans_fail = errors.get(type_, 0) - trans = trans_success + trans_fail - avail = trans_success * 100.0 / trans - elapsed = max_time[type_] - min_time[type_] - data = total[type_] / 1024.0 / 1024.0 # convert to MB - resp_time = sum(durations[type_]) / float(NANOSECONDS) / \ - len(durations[type_]) - trans_rate = trans / elapsed - data_rate = data / elapsed - conc = trans_rate * resp_time - trans_long = max(durations[type_]) / float(NANOSECONDS) - trans_short = min(durations[type_]) / float(NANOSECONDS) - - print(OUTPUT_FORMAT.format( - type=type_, - trans_success=trans_success, - trans_fail=trans_fail, - trans=trans, - avail=avail, - elapsed=elapsed, - data=data, - resp_time=resp_time, - trans_rate=trans_rate, - data_rate=data_rate, - conc=conc, - trans_long=trans_long, - trans_short=trans_short, - )) - -if __name__ == '__main__': - main() - diff --git a/s3tests/functional/AnonymousAuth.py b/s3tests/functional/AnonymousAuth.py deleted file mode 100644 index 7e2ffee..0000000 --- a/s3tests/functional/AnonymousAuth.py +++ /dev/null @@ -1,5 +0,0 @@ -from boto.auth_handler import AuthHandler - -class AnonymousAuthHandler(AuthHandler): - def add_auth(self, http_request, **kwargs): - return # Nothing to do for anonymous access! diff --git a/s3tests/functional/test_headers.py b/s3tests/functional/test_headers.py index 659ed29..ab91025 100644 --- a/s3tests/functional/test_headers.py +++ b/s3tests/functional/test_headers.py @@ -23,7 +23,6 @@ from nose.plugins.attrib import attr from nose.plugins.skip import SkipTest from .utils import assert_raises -from . import AnonymousAuth from email.header import decode_header diff --git a/s3tests/fuzz/__init__.py b/s3tests/fuzz/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/s3tests/fuzz/headers.py b/s3tests/fuzz/headers.py deleted file mode 100644 index 9b9db12..0000000 --- a/s3tests/fuzz/headers.py +++ /dev/null @@ -1,378 +0,0 @@ -from boto.s3.connection import S3Connection -from boto.exception import BotoServerError -from boto.s3.key import Key -from http.client import BadStatusLine -from optparse import OptionParser -from .. import common - -import traceback -import itertools -import random -import string -import struct -import yaml -import sys -import re - - -class DecisionGraphError(Exception): - """ Raised when a node in a graph tries to set a header or - key that was previously set by another node - """ - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) - - -class RecursionError(Exception): - """Runaway recursion in string formatting""" - - def __init__(self, msg): - self.msg = msg - - def __str__(self): - return '{0.__doc__}: {0.msg!r}'.format(self) - - -def assemble_decision(decision_graph, prng): - """ Take in a graph describing the possible decision space and a random - number generator and traverse the graph to build a decision - """ - return descend_graph(decision_graph, 'start', prng) - - -def descend_graph(decision_graph, node_name, prng): - """ Given a graph and a particular node in that graph, set the values in - the node's "set" list, pick a choice from the "choice" list, and - recurse. Finally, return dictionary of values - """ - node = decision_graph[node_name] - - try: - choice = make_choice(node['choices'], prng) - if choice == '': - decision = {} - else: - decision = descend_graph(decision_graph, choice, prng) - except IndexError: - decision = {} - - for key, choices in node['set'].items(): - if key in decision: - raise DecisionGraphError("Node %s tried to set '%s', but that key was already set by a lower node!" %(node_name, key)) - decision[key] = make_choice(choices, prng) - - if 'headers' in node: - decision.setdefault('headers', []) - - for desc in node['headers']: - try: - (repetition_range, header, value) = desc - except ValueError: - (header, value) = desc - repetition_range = '1' - - try: - size_min, size_max = repetition_range.split('-', 1) - except ValueError: - size_min = size_max = repetition_range - - size_min = int(size_min) - size_max = int(size_max) - - num_reps = prng.randint(size_min, size_max) - if header in [h for h, v in decision['headers']]: - raise DecisionGraphError("Node %s tried to add header '%s', but that header already exists!" %(node_name, header)) - for _ in range(num_reps): - decision['headers'].append([header, value]) - - return decision - - -def make_choice(choices, prng): - """ Given a list of (possibly weighted) options or just a single option!, - choose one of the options taking weights into account and return the - choice - """ - if isinstance(choices, str): - return choices - weighted_choices = [] - for option in choices: - if option is None: - weighted_choices.append('') - continue - try: - (weight, value) = option.split(None, 1) - weight = int(weight) - except ValueError: - weight = 1 - value = option - - if value == 'null' or value == 'None': - value = '' - - for _ in range(weight): - weighted_choices.append(value) - - return prng.choice(weighted_choices) - - -def expand_headers(decision, prng): - expanded_headers = {} - for header in decision['headers']: - h = expand(decision, header[0], prng) - v = expand(decision, header[1], prng) - expanded_headers[h] = v - return expanded_headers - - -def expand(decision, value, prng): - c = itertools.count() - fmt = RepeatExpandingFormatter(prng) - new = fmt.vformat(value, [], decision) - return new - - -class RepeatExpandingFormatter(string.Formatter): - charsets = { - 'printable_no_whitespace': string.printable.translate( - "".maketrans('', '', string.whitespace)), - 'printable': string.printable, - 'punctuation': string.punctuation, - 'whitespace': string.whitespace, - 'digits': string.digits - } - - def __init__(self, prng, _recursion=0): - super(RepeatExpandingFormatter, self).__init__() - # this class assumes it is always instantiated once per - # formatting; use that to detect runaway recursion - self.prng = prng - self._recursion = _recursion - - def get_value(self, key, args, kwargs): - fields = key.split(None, 1) - fn = getattr(self, 'special_{name}'.format(name=fields[0]), None) - if fn is not None: - if len(fields) == 1: - fields.append('') - return fn(fields[1]) - - val = super(RepeatExpandingFormatter, self).get_value(key, args, kwargs) - if self._recursion > 5: - raise RecursionError(key) - fmt = self.__class__(self.prng, _recursion=self._recursion+1) - - n = fmt.vformat(val, args, kwargs) - return n - - def special_random(self, args): - arg_list = args.split() - try: - size_min, size_max = arg_list[0].split('-', 1) - except ValueError: - size_min = size_max = arg_list[0] - except IndexError: - size_min = '0' - size_max = '1000' - - size_min = int(size_min) - size_max = int(size_max) - length = self.prng.randint(size_min, size_max) - - try: - charset_arg = arg_list[1] - except IndexError: - charset_arg = 'printable' - - if charset_arg == 'binary' or charset_arg == 'binary_no_whitespace': - num_bytes = length + 8 - tmplist = [self.prng.getrandbits(64) for _ in range(num_bytes // 8)] - tmpstring = struct.pack((num_bytes // 8) * 'Q', *tmplist) - if charset_arg == 'binary_no_whitespace': - tmpstring = b''.join([c] for c in tmpstring if c not in bytes( - string.whitespace, 'utf-8')) - return tmpstring[0:length] - else: - charset = self.charsets[charset_arg] - return ''.join([self.prng.choice(charset) for _ in range(length)]) # Won't scale nicely - - -def parse_options(): - parser = OptionParser() - parser.add_option('-O', '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE') - parser.add_option('--seed', dest='seed', type='int', help='initial seed for the random number generator') - parser.add_option('--seed-file', dest='seedfile', help='read seeds for specific requests from FILE', metavar='FILE') - parser.add_option('-n', dest='num_requests', type='int', help='issue NUM requests before stopping', metavar='NUM') - parser.add_option('-v', '--verbose', dest='verbose', action="store_true", help='turn on verbose output') - parser.add_option('-d', '--debug', dest='debug', action="store_true", help='turn on debugging (very verbose) output') - parser.add_option('--decision-graph', dest='graph_filename', help='file in which to find the request decision graph') - parser.add_option('--no-cleanup', dest='cleanup', action="store_false", help='turn off teardown so you can peruse the state of buckets after testing') - - parser.set_defaults(num_requests=5) - parser.set_defaults(cleanup=True) - parser.set_defaults(graph_filename='request_decision_graph.yml') - return parser.parse_args() - - -def randomlist(seed=None): - """ Returns an infinite generator of random numbers - """ - rng = random.Random(seed) - while True: - yield rng.randint(0,100000) #100,000 seeds is enough, right? - - -def populate_buckets(conn, alt): - """ Creates buckets and keys for fuzz testing and sets appropriate - permissions. Returns a dictionary of the bucket and key names. - """ - breadable = common.get_new_bucket(alt) - bwritable = common.get_new_bucket(alt) - bnonreadable = common.get_new_bucket(alt) - - oreadable = Key(breadable) - owritable = Key(bwritable) - ononreadable = Key(breadable) - oreadable.set_contents_from_string('oreadable body') - owritable.set_contents_from_string('owritable body') - ononreadable.set_contents_from_string('ononreadable body') - - breadable.set_acl('public-read') - bwritable.set_acl('public-read-write') - bnonreadable.set_acl('private') - oreadable.set_acl('public-read') - owritable.set_acl('public-read-write') - ononreadable.set_acl('private') - - return dict( - bucket_readable=breadable.name, - bucket_writable=bwritable.name, - bucket_not_readable=bnonreadable.name, - bucket_not_writable=breadable.name, - object_readable=oreadable.key, - object_writable=owritable.key, - object_not_readable=ononreadable.key, - object_not_writable=oreadable.key, - ) - - -def _main(): - """ The main script - """ - (options, args) = parse_options() - random.seed(options.seed if options.seed else None) - s3_connection = common.s3.main - alt_connection = common.s3.alt - - if options.outfile: - OUT = open(options.outfile, 'w') - else: - OUT = sys.stderr - - VERBOSE = DEBUG = open('/dev/null', 'w') - if options.verbose: - VERBOSE = OUT - if options.debug: - DEBUG = OUT - VERBOSE = OUT - - request_seeds = None - if options.seedfile: - FH = open(options.seedfile, 'r') - request_seeds = [int(line) for line in FH if line != '\n'] - print('Seedfile: %s' %options.seedfile, file=OUT) - print('Number of requests: %d' %len(request_seeds), file=OUT) - else: - if options.seed: - print('Initial Seed: %d' %options.seed, file=OUT) - print('Number of requests: %d' %options.num_requests, file=OUT) - random_list = randomlist(options.seed) - request_seeds = itertools.islice(random_list, options.num_requests) - - print('Decision Graph: %s' %options.graph_filename, file=OUT) - - graph_file = open(options.graph_filename, 'r') - decision_graph = yaml.safe_load(graph_file) - - constants = populate_buckets(s3_connection, alt_connection) - print("Test Buckets/Objects:", file=VERBOSE) - for key, value in constants.items(): - print("\t%s: %s" %(key, value), file=VERBOSE) - - print("Begin Fuzzing...", file=OUT) - print('='*80, file=VERBOSE) - for request_seed in request_seeds: - print('Seed is: %r' %request_seed, file=VERBOSE) - prng = random.Random(request_seed) - decision = assemble_decision(decision_graph, prng) - decision.update(constants) - - method = expand(decision, decision['method'], prng) - path = expand(decision, decision['urlpath'], prng) - - try: - body = expand(decision, decision['body'], prng) - except KeyError: - body = '' - - try: - headers = expand_headers(decision, prng) - except KeyError: - headers = {} - - print("%r %r" %(method[:100], path[:100]), file=VERBOSE) - for h, v in headers.items(): - print("%r: %r" %(h[:50], v[:50]), file=VERBOSE) - print("%r\n" % body[:100], file=VERBOSE) - - print('FULL REQUEST', file=DEBUG) - print('Method: %r' %method, file=DEBUG) - print('Path: %r' %path, file=DEBUG) - print('Headers:', file=DEBUG) - for h, v in headers.items(): - print("\t%r: %r" %(h, v), file=DEBUG) - print('Body: %r\n' %body, file=DEBUG) - - failed = False # Let's be optimistic, shall we? - try: - response = s3_connection.make_request(method, path, data=body, headers=headers, override_num_retries=1) - body = response.read() - except BotoServerError as e: - response = e - body = e.body - failed = True - except BadStatusLine as e: - print('FAILED: failed to parse response (BadStatusLine); probably a NUL byte in your request?', file=OUT) - print('='*80, file=VERBOSE) - continue - - if failed: - print('FAILED:', file=OUT) - OLD_VERBOSE = VERBOSE - OLD_DEBUG = DEBUG - VERBOSE = DEBUG = OUT - print('Seed was: %r' %request_seed, file=VERBOSE) - print('Response status code: %d %s' %(response.status, response.reason), file=VERBOSE) - print('Body:\n%s' %body, file=DEBUG) - print('='*80, file=VERBOSE) - if failed: - VERBOSE = OLD_VERBOSE - DEBUG = OLD_DEBUG - - print('...done fuzzing', file=OUT) - - if options.cleanup: - common.teardown() - - -def main(): - common.setup() - try: - _main() - except Exception as e: - traceback.print_exc() - common.teardown() - diff --git a/s3tests/fuzz/test/__init__.py b/s3tests/fuzz/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/s3tests/fuzz/test/test_fuzzer.py b/s3tests/fuzz/test/test_fuzzer.py deleted file mode 100644 index e2f93ae..0000000 --- a/s3tests/fuzz/test/test_fuzzer.py +++ /dev/null @@ -1,404 +0,0 @@ -""" -Unit-test suite for the S3 fuzzer - -The fuzzer is a grammar-based random S3 operation generator -that produces random operation sequences in an effort to -crash the server. This unit-test suite does not test -S3 servers, but rather the fuzzer infrastructure. - -It works by running the fuzzer off of a simple grammar, -and checking the producted requests to ensure that they -include the expected sorts of operations in the expected -proportions. -""" -import sys -import itertools -import nose -import random -import string -import yaml - -from ..headers import * - -from nose.tools import eq_ as eq -from nose.tools import assert_true -from nose.plugins.attrib import attr - -from ...functional.utils import assert_raises -from functools import reduce - -_decision_graph = {} - -def check_access_denied(fn, *args, **kwargs): - e = assert_raises(boto.exception.S3ResponseError, fn, *args, **kwargs) - eq(e.status, 403) - eq(e.reason, 'Forbidden') - eq(e.error_code, 'AccessDenied') - - -def build_graph(): - graph = {} - graph['start'] = { - 'set': {}, - 'choices': ['node2'] - } - graph['leaf'] = { - 'set': { - 'key1': 'value1', - 'key2': 'value2' - }, - 'headers': [ - ['1-2', 'random-header-{random 5-10 printable}', '{random 20-30 punctuation}'] - ], - 'choices': [] - } - graph['node1'] = { - 'set': { - 'key3': 'value3', - 'header_val': [ - '3 h1', - '2 h2', - 'h3' - ] - }, - 'headers': [ - ['1-1', 'my-header', '{header_val}'], - ], - 'choices': ['leaf'] - } - graph['node2'] = { - 'set': { - 'randkey': 'value-{random 10-15 printable}', - 'path': '/{bucket_readable}', - 'indirect_key1': '{key1}' - }, - 'choices': ['leaf'] - } - graph['bad_node'] = { - 'set': { - 'key1': 'value1' - }, - 'choices': ['leaf'] - } - graph['nonexistant_child_node'] = { - 'set': {}, - 'choices': ['leafy_greens'] - } - graph['weighted_node'] = { - 'set': { - 'k1': [ - 'foo', - '2 bar', - '1 baz' - ] - }, - 'choices': [ - 'foo', - '2 bar', - '1 baz' - ] - } - graph['null_choice_node'] = { - 'set': {}, - 'choices': [None] - } - graph['repeated_headers_node'] = { - 'set': {}, - 'headers': [ - ['1-2', 'random-header-{random 5-10 printable}', '{random 20-30 punctuation}'] - ], - 'choices': ['leaf'] - } - graph['weighted_null_choice_node'] = { - 'set': {}, - 'choices': ['3 null'] - } - return graph - - -#def test_foo(): - #graph_file = open('request_decision_graph.yml', 'r') - #graph = yaml.safe_load(graph_file) - #eq(graph['bucket_put_simple']['set']['grantee'], 0) - - -def test_load_graph(): - graph_file = open('request_decision_graph.yml', 'r') - graph = yaml.safe_load(graph_file) - graph['start'] - - -def test_descend_leaf_node(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'leaf', prng) - - eq(decision['key1'], 'value1') - eq(decision['key2'], 'value2') - e = assert_raises(KeyError, lambda x: decision[x], 'key3') - - -def test_descend_node(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'node1', prng) - - eq(decision['key1'], 'value1') - eq(decision['key2'], 'value2') - eq(decision['key3'], 'value3') - - -def test_descend_bad_node(): - graph = build_graph() - prng = random.Random(1) - assert_raises(DecisionGraphError, descend_graph, graph, 'bad_node', prng) - - -def test_descend_nonexistant_child(): - graph = build_graph() - prng = random.Random(1) - assert_raises(KeyError, descend_graph, graph, 'nonexistant_child_node', prng) - - -def test_expand_random_printable(): - prng = random.Random(1) - got = expand({}, '{random 10-15 printable}', prng) - eq(got, '[/pNI$;92@') - - -def test_expand_random_binary(): - prng = random.Random(1) - got = expand({}, '{random 10-15 binary}', prng) - eq(got, '\xdfj\xf1\xd80>a\xcd\xc4\xbb') - - -def test_expand_random_printable_no_whitespace(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 500 printable_no_whitespace}', prng) - assert_true(reduce(lambda x, y: x and y, [x not in string.whitespace and x in string.printable for x in got])) - - -def test_expand_random_binary_no_whitespace(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 500 binary_no_whitespace}', prng) - assert_true(reduce(lambda x, y: x and y, [x not in string.whitespace for x in got])) - - -def test_expand_random_no_args(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random}', prng) - assert_true(0 <= len(got) <= 1000) - assert_true(reduce(lambda x, y: x and y, [x in string.printable for x in got])) - - -def test_expand_random_no_charset(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 10-30}', prng) - assert_true(10 <= len(got) <= 30) - assert_true(reduce(lambda x, y: x and y, [x in string.printable for x in got])) - - -def test_expand_random_exact_length(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 10 digits}', prng) - assert_true(len(got) == 10) - assert_true(reduce(lambda x, y: x and y, [x in string.digits for x in got])) - - -def test_expand_random_bad_charset(): - prng = random.Random(1) - assert_raises(KeyError, expand, {}, '{random 10-30 foo}', prng) - - -def test_expand_random_missing_length(): - prng = random.Random(1) - assert_raises(ValueError, expand, {}, '{random printable}', prng) - - -def test_assemble_decision(): - graph = build_graph() - prng = random.Random(1) - decision = assemble_decision(graph, prng) - - eq(decision['key1'], 'value1') - eq(decision['key2'], 'value2') - eq(decision['randkey'], 'value-{random 10-15 printable}') - eq(decision['indirect_key1'], '{key1}') - eq(decision['path'], '/{bucket_readable}') - assert_raises(KeyError, lambda x: decision[x], 'key3') - - -def test_expand_escape(): - prng = random.Random(1) - decision = dict( - foo='{{bar}}', - ) - got = expand(decision, '{foo}', prng) - eq(got, '{bar}') - - -def test_expand_indirect(): - prng = random.Random(1) - decision = dict( - foo='{bar}', - bar='quux', - ) - got = expand(decision, '{foo}', prng) - eq(got, 'quux') - - -def test_expand_indirect_double(): - prng = random.Random(1) - decision = dict( - foo='{bar}', - bar='{quux}', - quux='thud', - ) - got = expand(decision, '{foo}', prng) - eq(got, 'thud') - - -def test_expand_recursive(): - prng = random.Random(1) - decision = dict( - foo='{foo}', - ) - e = assert_raises(RecursionError, expand, decision, '{foo}', prng) - eq(str(e), "Runaway recursion in string formatting: 'foo'") - - -def test_expand_recursive_mutual(): - prng = random.Random(1) - decision = dict( - foo='{bar}', - bar='{foo}', - ) - e = assert_raises(RecursionError, expand, decision, '{foo}', prng) - eq(str(e), "Runaway recursion in string formatting: 'foo'") - - -def test_expand_recursive_not_too_eager(): - prng = random.Random(1) - decision = dict( - foo='bar', - ) - got = expand(decision, 100*'{foo}', prng) - eq(got, 100*'bar') - - -def test_make_choice_unweighted_with_space(): - prng = random.Random(1) - choice = make_choice(['foo bar'], prng) - eq(choice, 'foo bar') - -def test_weighted_choices(): - graph = build_graph() - prng = random.Random(1) - - choices_made = {} - for _ in range(1000): - choice = make_choice(graph['weighted_node']['choices'], prng) - if choice in choices_made: - choices_made[choice] += 1 - else: - choices_made[choice] = 1 - - foo_percentage = choices_made['foo'] / 1000.0 - bar_percentage = choices_made['bar'] / 1000.0 - baz_percentage = choices_made['baz'] / 1000.0 - nose.tools.assert_almost_equal(foo_percentage, 0.25, 1) - nose.tools.assert_almost_equal(bar_percentage, 0.50, 1) - nose.tools.assert_almost_equal(baz_percentage, 0.25, 1) - - -def test_null_choices(): - graph = build_graph() - prng = random.Random(1) - choice = make_choice(graph['null_choice_node']['choices'], prng) - - eq(choice, '') - - -def test_weighted_null_choices(): - graph = build_graph() - prng = random.Random(1) - choice = make_choice(graph['weighted_null_choice_node']['choices'], prng) - - eq(choice, '') - - -def test_null_child(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'null_choice_node', prng) - - eq(decision, {}) - - -def test_weighted_set(): - graph = build_graph() - prng = random.Random(1) - - choices_made = {} - for _ in range(1000): - choice = make_choice(graph['weighted_node']['set']['k1'], prng) - if choice in choices_made: - choices_made[choice] += 1 - else: - choices_made[choice] = 1 - - foo_percentage = choices_made['foo'] / 1000.0 - bar_percentage = choices_made['bar'] / 1000.0 - baz_percentage = choices_made['baz'] / 1000.0 - nose.tools.assert_almost_equal(foo_percentage, 0.25, 1) - nose.tools.assert_almost_equal(bar_percentage, 0.50, 1) - nose.tools.assert_almost_equal(baz_percentage, 0.25, 1) - - -def test_header_presence(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'node1', prng) - - c1 = itertools.count() - c2 = itertools.count() - for header, value in decision['headers']: - if header == 'my-header': - eq(value, '{header_val}') - assert_true(next(c1) < 1) - elif header == 'random-header-{random 5-10 printable}': - eq(value, '{random 20-30 punctuation}') - assert_true(next(c2) < 2) - else: - raise KeyError('unexpected header found: %s' % header) - - assert_true(next(c1)) - assert_true(next(c2)) - - -def test_duplicate_header(): - graph = build_graph() - prng = random.Random(1) - assert_raises(DecisionGraphError, descend_graph, graph, 'repeated_headers_node', prng) - - -def test_expand_headers(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'node1', prng) - expanded_headers = expand_headers(decision, prng) - - for header, value in expanded_headers.items(): - if header == 'my-header': - assert_true(value in ['h1', 'h2', 'h3']) - elif header.startswith('random-header-'): - assert_true(20 <= len(value) <= 30) - assert_true(string.strip(value, RepeatExpandingFormatter.charsets['punctuation']) is '') - else: - raise DecisionGraphError('unexpected header found: "%s"' % header) - diff --git a/s3tests/generate_objects.py b/s3tests/generate_objects.py deleted file mode 100644 index b8d65a7..0000000 --- a/s3tests/generate_objects.py +++ /dev/null @@ -1,117 +0,0 @@ -from boto.s3.key import Key -from optparse import OptionParser -from . import realistic -import traceback -import random -from . import common -import sys - - -def parse_opts(): - parser = OptionParser() - parser.add_option('-O', '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE') - parser.add_option('-b', '--bucket', dest='bucket', help='push objects to BUCKET', metavar='BUCKET') - parser.add_option('--seed', dest='seed', help='optional seed for the random number generator') - - return parser.parse_args() - - -def get_random_files(quantity, mean, stddev, seed): - """Create file-like objects with pseudorandom contents. - IN: - number of files to create - mean file size in bytes - standard deviation from mean file size - seed for PRNG - OUT: - list of file handles - """ - file_generator = realistic.files(mean, stddev, seed) - return [next(file_generator) for _ in range(quantity)] - - -def upload_objects(bucket, files, seed): - """Upload a bunch of files to an S3 bucket - IN: - boto S3 bucket object - list of file handles to upload - seed for PRNG - OUT: - list of boto S3 key objects - """ - keys = [] - name_generator = realistic.names(15, 4, seed=seed) - - for fp in files: - print('sending file with size %dB' % fp.size, file=sys.stderr) - key = Key(bucket) - key.key = next(name_generator) - key.set_contents_from_file(fp, rewind=True) - key.set_acl('public-read') - keys.append(key) - - return keys - - -def _main(): - '''To run the static content load test, make sure you've bootstrapped your - test environment and set up your config.yaml file, then run the following: - S3TEST_CONF=config.yaml virtualenv/bin/s3tests-generate-objects.py --seed 1234 - - This creates a bucket with your S3 credentials (from config.yaml) and - fills it with garbage objects as described in the - file_generation.groups section of config.yaml. It writes a list of - URLS to those objects to the file listed in file_generation.url_file - in config.yaml. - - Once you have objcts in your bucket, run the siege benchmarking program: - siege --rc ./siege.conf -r 5 - - This tells siege to read the ./siege.conf config file which tells it to - use the urls in ./urls.txt and log to ./siege.log. It hits each url in - urls.txt 5 times (-r flag). - - Results are printed to the terminal and written in CSV format to - ./siege.log - ''' - (options, args) = parse_opts() - - #SETUP - random.seed(options.seed if options.seed else None) - conn = common.s3.main - - if options.outfile: - OUTFILE = open(options.outfile, 'w') - elif common.config.file_generation.url_file: - OUTFILE = open(common.config.file_generation.url_file, 'w') - else: - OUTFILE = sys.stdout - - if options.bucket: - bucket = conn.create_bucket(options.bucket) - else: - bucket = common.get_new_bucket() - - bucket.set_acl('public-read') - keys = [] - print('bucket: %s' % bucket.name, file=OUTFILE) - print('setup complete, generating files', file=sys.stderr) - for profile in common.config.file_generation.groups: - seed = random.random() - files = get_random_files(profile[0], profile[1], profile[2], seed) - keys += upload_objects(bucket, files, seed) - - print('finished sending files. generating urls', file=sys.stderr) - for key in keys: - print(key.generate_url(0, query_auth=False), file=OUTFILE) - - print('done', file=sys.stderr) - - -def main(): - common.setup() - try: - _main() - except Exception as e: - traceback.print_exc() - common.teardown() diff --git a/s3tests/readwrite.py b/s3tests/readwrite.py deleted file mode 100644 index 1afb3f1..0000000 --- a/s3tests/readwrite.py +++ /dev/null @@ -1,265 +0,0 @@ -import gevent -import gevent.pool -import gevent.queue -import gevent.monkey; gevent.monkey.patch_all() -import itertools -import optparse -import os -import sys -import time -import traceback -import random -import yaml - -from . import realistic -from . import common - -NANOSECOND = int(1e9) - -def reader(bucket, worker_id, file_names, queue, rand): - while True: - objname = rand.choice(file_names) - key = bucket.new_key(objname) - - fp = realistic.FileValidator() - result = dict( - type='r', - bucket=bucket.name, - key=key.name, - worker=worker_id, - ) - - start = time.time() - try: - key.get_contents_to_file(fp._file) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - if not fp.valid(): - m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp._file.tell(), o=objname) - result.update( - error=dict( - msg=m, - traceback=traceback.format_exc(), - ), - ) - print("ERROR:", m) - else: - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - ) - queue.put(result) - -def writer(bucket, worker_id, file_names, files, queue, rand): - while True: - fp = next(files) - fp.seek(0) - objname = rand.choice(file_names) - key = bucket.new_key(objname) - - result = dict( - type='w', - bucket=bucket.name, - key=key.name, - worker=worker_id, - ) - - start = time.time() - try: - key.set_contents_from_file(fp) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - ) - - queue.put(result) - -def parse_options(): - parser = optparse.OptionParser( - usage='%prog [OPTS] 0: - print("Uploading initial set of {num} files".format(num=config.readwrite.files.num)) - warmup_pool = gevent.pool.Pool(size=100) - for file_name in file_names: - fp = next(files) - warmup_pool.spawn( - write_file, - bucket=bucket, - file_name=file_name, - fp=fp, - ) - warmup_pool.join() - - # main work - print("Starting main worker loop.") - print("Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev)) - print("Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers)) - group = gevent.pool.Group() - rand_writer = random.Random(seeds['writer']) - - # Don't create random files if deterministic_files_names is set and true - if not config.readwrite.get('deterministic_file_names'): - for x in range(config.readwrite.writers): - this_rand = random.Random(rand_writer.randrange(2**32)) - group.spawn( - writer, - bucket=bucket, - worker_id=x, - file_names=file_names, - files=files, - queue=q, - rand=this_rand, - ) - - # Since the loop generating readers already uses config.readwrite.readers - # and the file names are already generated (randomly or deterministically), - # this loop needs no additional qualifiers. If zero readers are specified, - # it will behave as expected (no data is read) - rand_reader = random.Random(seeds['reader']) - for x in range(config.readwrite.readers): - this_rand = random.Random(rand_reader.randrange(2**32)) - group.spawn( - reader, - bucket=bucket, - worker_id=x, - file_names=file_names, - queue=q, - rand=this_rand, - ) - def stop(): - group.kill(block=True) - q.put(StopIteration) - gevent.spawn_later(config.readwrite.duration, stop) - - # wait for all the tests to finish - group.join() - print('post-join, queue size {size}'.format(size=q.qsize())) - - if q.qsize() > 0: - for temp_dict in q: - if 'error' in temp_dict: - raise Exception('exception:\n\t{msg}\n\t{trace}'.format( - msg=temp_dict['error']['msg'], - trace=temp_dict['error']['traceback']) - ) - else: - yaml.safe_dump(temp_dict, stream=real_stdout) - - finally: - # cleanup - if options.cleanup: - if bucket is not None: - common.nuke_bucket(bucket) diff --git a/s3tests/realistic.py b/s3tests/realistic.py deleted file mode 100644 index c4b6920..0000000 --- a/s3tests/realistic.py +++ /dev/null @@ -1,281 +0,0 @@ -import hashlib -import random -import string -import struct -import time -import math -import tempfile -import shutil -import os - - -NANOSECOND = int(1e9) - - -def generate_file_contents(size): - """ - A helper function to generate binary contents for a given size, and - calculates the md5 hash of the contents appending itself at the end of the - blob. - It uses sha1's hexdigest which is 40 chars long. So any binary generated - should remove the last 40 chars from the blob to retrieve the original hash - and binary so that validity can be proved. - """ - size = int(size) - contents = os.urandom(size) - content_hash = hashlib.sha1(contents).hexdigest() - return contents + content_hash - - -class FileValidator(object): - - def __init__(self, f=None): - self._file = tempfile.SpooledTemporaryFile() - self.original_hash = None - self.new_hash = None - if f: - f.seek(0) - shutil.copyfileobj(f, self._file) - - def valid(self): - """ - Returns True if this file looks valid. The file is valid if the end - of the file has the md5 digest for the first part of the file. - """ - self._file.seek(0) - contents = self._file.read() - self.original_hash, binary = contents[-40:], contents[:-40] - self.new_hash = hashlib.sha1(binary).hexdigest() - if not self.new_hash == self.original_hash: - print('original hash: ', self.original_hash) - print('new hash: ', self.new_hash) - print('size: ', self._file.tell()) - return False - return True - - # XXX not sure if we need all of these - def seek(self, offset, whence=os.SEEK_SET): - self._file.seek(offset, whence) - - def tell(self): - return self._file.tell() - - def read(self, size=-1): - return self._file.read(size) - - def write(self, data): - self._file.write(data) - self._file.seek(0) - - -class RandomContentFile(object): - def __init__(self, size, seed): - self.size = size - self.seed = seed - self.random = random.Random(self.seed) - - # Boto likes to seek once more after it's done reading, so we need to save the last chunks/seek value. - self.last_chunks = self.chunks = None - self.last_seek = None - - # Let seek initialize the rest of it, rather than dup code - self.seek(0) - - def _mark_chunk(self): - self.chunks.append([self.offset, int(round((time.time() - self.last_seek) * NANOSECOND))]) - - def seek(self, offset, whence=os.SEEK_SET): - if whence == os.SEEK_SET: - self.offset = offset - elif whence == os.SEEK_END: - self.offset = self.size + offset; - elif whence == os.SEEK_CUR: - self.offset += offset - - assert self.offset == 0 - - self.random.seed(self.seed) - self.buffer = '' - - self.hash = hashlib.md5() - self.digest_size = self.hash.digest_size - self.digest = None - - # Save the last seek time as our start time, and the last chunks - self.last_chunks = self.chunks - # Before emptying. - self.last_seek = time.time() - self.chunks = [] - - def tell(self): - return self.offset - - def _generate(self): - # generate and return a chunk of pseudorandom data - size = min(self.size, 1*1024*1024) # generate at most 1 MB at a time - chunks = int(math.ceil(size/8.0)) # number of 8-byte chunks to create - - l = [self.random.getrandbits(64) for _ in range(chunks)] - s = struct.pack(chunks*'Q', *l) - return s - - def read(self, size=-1): - if size < 0: - size = self.size - self.offset - - r = [] - - random_count = min(size, self.size - self.offset - self.digest_size) - if random_count > 0: - while len(self.buffer) < random_count: - self.buffer += self._generate() - self.offset += random_count - size -= random_count - data, self.buffer = self.buffer[:random_count], self.buffer[random_count:] - if self.hash is not None: - self.hash.update(data) - r.append(data) - - digest_count = min(size, self.size - self.offset) - if digest_count > 0: - if self.digest is None: - self.digest = self.hash.digest() - self.hash = None - self.offset += digest_count - size -= digest_count - data = self.digest[:digest_count] - r.append(data) - - self._mark_chunk() - - return ''.join(r) - - -class PrecomputedContentFile(object): - def __init__(self, f): - self._file = tempfile.SpooledTemporaryFile() - f.seek(0) - shutil.copyfileobj(f, self._file) - - self.last_chunks = self.chunks = None - self.seek(0) - - def seek(self, offset, whence=os.SEEK_SET): - self._file.seek(offset, whence) - - if self.tell() == 0: - # only reset the chunks when seeking to the beginning - self.last_chunks = self.chunks - self.last_seek = time.time() - self.chunks = [] - - def tell(self): - return self._file.tell() - - def read(self, size=-1): - data = self._file.read(size) - self._mark_chunk() - return data - - def _mark_chunk(self): - elapsed = time.time() - self.last_seek - elapsed_nsec = int(round(elapsed * NANOSECOND)) - self.chunks.append([self.tell(), elapsed_nsec]) - -class FileVerifier(object): - def __init__(self): - self.size = 0 - self.hash = hashlib.md5() - self.buf = '' - self.created_at = time.time() - self.chunks = [] - - def _mark_chunk(self): - self.chunks.append([self.size, int(round((time.time() - self.created_at) * NANOSECOND))]) - - def write(self, data): - self.size += len(data) - self.buf += data - digsz = -1*self.hash.digest_size - new_data, self.buf = self.buf[0:digsz], self.buf[digsz:] - self.hash.update(new_data) - self._mark_chunk() - - def valid(self): - """ - Returns True if this file looks valid. The file is valid if the end - of the file has the md5 digest for the first part of the file. - """ - if self.size < self.hash.digest_size: - return self.hash.digest().startswith(self.buf) - - return self.buf == self.hash.digest() - - -def files(mean, stddev, seed=None): - """ - Yields file-like objects with effectively random contents, where - the size of each file follows the normal distribution with `mean` - and `stddev`. - - Beware, the file-likeness is very shallow. You can use boto's - `key.set_contents_from_file` to send these to S3, but they are not - full file objects. - - The last 128 bits are the MD5 digest of the previous bytes, for - verifying round-trip data integrity. For example, if you - re-download the object and place the contents into a file called - ``foo``, the following should print two identical lines: - - python -c 'import sys, hashlib; data=sys.stdin.read(); print hashlib.md5(data[:-16]).hexdigest(); print "".join("%02x" % ord(c) for c in data[-16:])' = 0: - break - yield RandomContentFile(size=size, seed=rand.getrandbits(32)) - - -def files2(mean, stddev, seed=None, numfiles=10): - """ - Yields file objects with effectively random contents, where the - size of each file follows the normal distribution with `mean` and - `stddev`. - - Rather than continuously generating new files, this pre-computes and - stores `numfiles` files and yields them in a loop. - """ - # pre-compute all the files (and save with TemporaryFiles) - fs = [] - for _ in range(numfiles): - t = tempfile.SpooledTemporaryFile() - t.write(generate_file_contents(random.normalvariate(mean, stddev))) - t.seek(0) - fs.append(t) - - while True: - for f in fs: - yield f - - -def names(mean, stddev, charset=None, seed=None): - """ - Yields strings that are somewhat plausible as file names, where - the lenght of each filename follows the normal distribution with - `mean` and `stddev`. - """ - if charset is None: - charset = string.ascii_lowercase - rand = random.Random(seed) - while True: - while True: - length = int(rand.normalvariate(mean, stddev)) - if length > 0: - break - name = ''.join(rand.choice(charset) for _ in range(length)) - yield name diff --git a/s3tests/roundtrip.py b/s3tests/roundtrip.py deleted file mode 100644 index cbc9379..0000000 --- a/s3tests/roundtrip.py +++ /dev/null @@ -1,219 +0,0 @@ -import gevent -import gevent.pool -import gevent.queue -import gevent.monkey; gevent.monkey.patch_all() -import itertools -import optparse -import os -import sys -import time -import traceback -import random -import yaml - -from . import realistic -from . import common - -NANOSECOND = int(1e9) - -def writer(bucket, objname, fp, queue): - key = bucket.new_key(objname) - - result = dict( - type='w', - bucket=bucket.name, - key=key.name, - ) - - start = time.time() - try: - key.set_contents_from_file(fp, rewind=True) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - chunks=fp.last_chunks, - ) - queue.put(result) - - -def reader(bucket, objname, queue): - key = bucket.new_key(objname) - - fp = realistic.FileVerifier() - result = dict( - type='r', - bucket=bucket.name, - key=key.name, - ) - - start = time.time() - try: - key.get_contents_to_file(fp) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - if not fp.valid(): - result.update( - error=dict( - msg='md5sum check failed', - ), - ) - - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - chunks=fp.chunks, - ) - queue.put(result) - -def parse_options(): - parser = optparse.OptionParser( - usage='%prog [OPTS] 11} hits -Availability: {avail:>11.2f} % -Elapsed time: {elapsed:>11.2f} secs -Data transferred: {data:>11.2f} MB -Response time: {resp_time:>11.2f} secs -Transaction rate: {trans_rate:>11.2f} trans/sec -Throughput: {data_rate:>11.2f} MB/sec -Concurrency: {conc:>11.2f} -Successful transactions: {trans_success:>11} -Failed transactions: {trans_fail:>11} -Longest transaction: {trans_long:>11.2f} -Shortest transaction: {trans_short:>11.2f} -""" - -def parse_options(): - usage = "usage: %prog [options]" - parser = optparse.OptionParser(usage=usage) - parser.add_option( - "-f", "--file", dest="input", metavar="FILE", - help="Name of input YAML file. Default uses sys.stdin") - parser.add_option( - "-v", "--verbose", dest="verbose", action="store_true", - help="Enable verbose output") - - (options, args) = parser.parse_args() - - if not options.input and os.isatty(sys.stdin.fileno()): - parser.error("option -f required if no data is provided " - "in stdin") - - return (options, args) - -def main(): - (options, args) = parse_options() - - total = {} - durations = {} - min_time = {} - max_time = {} - errors = {} - success = {} - - calculate_stats(options, total, durations, min_time, max_time, errors, - success) - print_results(total, durations, min_time, max_time, errors, success) - -def calculate_stats(options, total, durations, min_time, max_time, errors, - success): - print('Calculating statistics...') - - f = sys.stdin - if options.input: - f = file(options.input, 'r') - - for item in yaml.safe_load_all(f): - type_ = item.get('type') - if type_ not in ('r', 'w'): - continue # ignore any invalid items - - if 'error' in item: - errors[type_] = errors.get(type_, 0) + 1 - continue # skip rest of analysis for this item - else: - success[type_] = success.get(type_, 0) + 1 - - # parse the item - data_size = item['chunks'][-1][0] - duration = item['duration'] - start = item['start'] - end = start + duration / float(NANOSECONDS) - - if options.verbose: - print("[{type}] POSIX time: {start:>18.2f} - {end:<18.2f} " \ - "{data:>11.2f} KB".format( - type=type_, - start=start, - end=end, - data=data_size / 1024.0, # convert to KB - )) - - # update time boundaries - prev = min_time.setdefault(type_, start) - if start < prev: - min_time[type_] = start - prev = max_time.setdefault(type_, end) - if end > prev: - max_time[type_] = end - - # save the duration - if type_ not in durations: - durations[type_] = [] - durations[type_].append(duration) - - # add to running totals - total[type_] = total.get(type_, 0) + data_size - -def print_results(total, durations, min_time, max_time, errors, success): - for type_ in list(total.keys()): - trans_success = success.get(type_, 0) - trans_fail = errors.get(type_, 0) - trans = trans_success + trans_fail - avail = trans_success * 100.0 / trans - elapsed = max_time[type_] - min_time[type_] - data = total[type_] / 1024.0 / 1024.0 # convert to MB - resp_time = sum(durations[type_]) / float(NANOSECONDS) / \ - len(durations[type_]) - trans_rate = trans / elapsed - data_rate = data / elapsed - conc = trans_rate * resp_time - trans_long = max(durations[type_]) / float(NANOSECONDS) - trans_short = min(durations[type_]) / float(NANOSECONDS) - - print(OUTPUT_FORMAT.format( - type=type_, - trans_success=trans_success, - trans_fail=trans_fail, - trans=trans, - avail=avail, - elapsed=elapsed, - data=data, - resp_time=resp_time, - trans_rate=trans_rate, - data_rate=data_rate, - conc=conc, - trans_long=trans_long, - trans_short=trans_short, - )) - -if __name__ == '__main__': - main() - diff --git a/s3tests_boto3/fuzz/__init__.py b/s3tests_boto3/fuzz/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/s3tests_boto3/fuzz/headers.py b/s3tests_boto3/fuzz/headers.py deleted file mode 100644 index fd04372..0000000 --- a/s3tests_boto3/fuzz/headers.py +++ /dev/null @@ -1,378 +0,0 @@ -from boto.s3.connection import S3Connection -from boto.exception import BotoServerError -from boto.s3.key import Key -from http.client import BadStatusLine -from optparse import OptionParser -from .. import common - -import traceback -import itertools -import random -import string -import struct -import yaml -import sys -import re - - -class DecisionGraphError(Exception): - """ Raised when a node in a graph tries to set a header or - key that was previously set by another node - """ - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) - - -class RecursionError(Exception): - """Runaway recursion in string formatting""" - - def __init__(self, msg): - self.msg = msg - - def __str__(self): - return '{0.__doc__}: {0.msg!r}'.format(self) - - -def assemble_decision(decision_graph, prng): - """ Take in a graph describing the possible decision space and a random - number generator and traverse the graph to build a decision - """ - return descend_graph(decision_graph, 'start', prng) - - -def descend_graph(decision_graph, node_name, prng): - """ Given a graph and a particular node in that graph, set the values in - the node's "set" list, pick a choice from the "choice" list, and - recurse. Finally, return dictionary of values - """ - node = decision_graph[node_name] - - try: - choice = make_choice(node['choices'], prng) - if choice == '': - decision = {} - else: - decision = descend_graph(decision_graph, choice, prng) - except IndexError: - decision = {} - - for key, choices in node['set'].items(): - if key in decision: - raise DecisionGraphError("Node %s tried to set '%s', but that key was already set by a lower node!" %(node_name, key)) - decision[key] = make_choice(choices, prng) - - if 'headers' in node: - decision.setdefault('headers', []) - - for desc in node['headers']: - try: - (repetition_range, header, value) = desc - except ValueError: - (header, value) = desc - repetition_range = '1' - - try: - size_min, size_max = repetition_range.split('-', 1) - except ValueError: - size_min = size_max = repetition_range - - size_min = int(size_min) - size_max = int(size_max) - - num_reps = prng.randint(size_min, size_max) - if header in [h for h, v in decision['headers']]: - raise DecisionGraphError("Node %s tried to add header '%s', but that header already exists!" %(node_name, header)) - for _ in range(num_reps): - decision['headers'].append([header, value]) - - return decision - - -def make_choice(choices, prng): - """ Given a list of (possibly weighted) options or just a single option!, - choose one of the options taking weights into account and return the - choice - """ - if isinstance(choices, str): - return choices - weighted_choices = [] - for option in choices: - if option is None: - weighted_choices.append('') - continue - try: - (weight, value) = option.split(None, 1) - weight = int(weight) - except ValueError: - weight = 1 - value = option - - if value == 'null' or value == 'None': - value = '' - - for _ in range(weight): - weighted_choices.append(value) - - return prng.choice(weighted_choices) - - -def expand_headers(decision, prng): - expanded_headers = {} - for header in decision['headers']: - h = expand(decision, header[0], prng) - v = expand(decision, header[1], prng) - expanded_headers[h] = v - return expanded_headers - - -def expand(decision, value, prng): - c = itertools.count() - fmt = RepeatExpandingFormatter(prng) - new = fmt.vformat(value, [], decision) - return new - - -class RepeatExpandingFormatter(string.Formatter): - charsets = { - 'printable_no_whitespace': string.printable.translate( - "".maketrans('', '', string.whitespace)), - 'printable': string.printable, - 'punctuation': string.punctuation, - 'whitespace': string.whitespace, - 'digits': string.digits - } - - def __init__(self, prng, _recursion=0): - super(RepeatExpandingFormatter, self).__init__() - # this class assumes it is always instantiated once per - # formatting; use that to detect runaway recursion - self.prng = prng - self._recursion = _recursion - - def get_value(self, key, args, kwargs): - fields = key.split(None, 1) - fn = getattr(self, 'special_{name}'.format(name=fields[0]), None) - if fn is not None: - if len(fields) == 1: - fields.append('') - return fn(fields[1]) - - val = super(RepeatExpandingFormatter, self).get_value(key, args, kwargs) - if self._recursion > 5: - raise RecursionError(key) - fmt = self.__class__(self.prng, _recursion=self._recursion+1) - - n = fmt.vformat(val, args, kwargs) - return n - - def special_random(self, args): - arg_list = args.split() - try: - size_min, size_max = arg_list[0].split('-', 1) - except ValueError: - size_min = size_max = arg_list[0] - except IndexError: - size_min = '0' - size_max = '1000' - - size_min = int(size_min) - size_max = int(size_max) - length = self.prng.randint(size_min, size_max) - - try: - charset_arg = arg_list[1] - except IndexError: - charset_arg = 'printable' - - if charset_arg == 'binary' or charset_arg == 'binary_no_whitespace': - num_bytes = length + 8 - tmplist = [self.prng.getrandbits(64) for _ in range(num_bytes // 8)] - tmpstring = struct.pack((num_bytes // 8) * 'Q', *tmplist) - if charset_arg == 'binary_no_whitespace': - tmpstring = ''.join([c] for c in tmpstring if c not in bytes( - string.whitespace, 'utf-8')) - return tmpstring[0:length] - else: - charset = self.charsets[charset_arg] - return ''.join([self.prng.choice(charset) for _ in range(length)]) # Won't scale nicely - - -def parse_options(): - parser = OptionParser() - parser.add_option('-O', '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE') - parser.add_option('--seed', dest='seed', type='int', help='initial seed for the random number generator') - parser.add_option('--seed-file', dest='seedfile', help='read seeds for specific requests from FILE', metavar='FILE') - parser.add_option('-n', dest='num_requests', type='int', help='issue NUM requests before stopping', metavar='NUM') - parser.add_option('-v', '--verbose', dest='verbose', action="store_true", help='turn on verbose output') - parser.add_option('-d', '--debug', dest='debug', action="store_true", help='turn on debugging (very verbose) output') - parser.add_option('--decision-graph', dest='graph_filename', help='file in which to find the request decision graph') - parser.add_option('--no-cleanup', dest='cleanup', action="store_false", help='turn off teardown so you can peruse the state of buckets after testing') - - parser.set_defaults(num_requests=5) - parser.set_defaults(cleanup=True) - parser.set_defaults(graph_filename='request_decision_graph.yml') - return parser.parse_args() - - -def randomlist(seed=None): - """ Returns an infinite generator of random numbers - """ - rng = random.Random(seed) - while True: - yield rng.randint(0,100000) #100,000 seeds is enough, right? - - -def populate_buckets(conn, alt): - """ Creates buckets and keys for fuzz testing and sets appropriate - permissions. Returns a dictionary of the bucket and key names. - """ - breadable = common.get_new_bucket(alt) - bwritable = common.get_new_bucket(alt) - bnonreadable = common.get_new_bucket(alt) - - oreadable = Key(breadable) - owritable = Key(bwritable) - ononreadable = Key(breadable) - oreadable.set_contents_from_string('oreadable body') - owritable.set_contents_from_string('owritable body') - ononreadable.set_contents_from_string('ononreadable body') - - breadable.set_acl('public-read') - bwritable.set_acl('public-read-write') - bnonreadable.set_acl('private') - oreadable.set_acl('public-read') - owritable.set_acl('public-read-write') - ononreadable.set_acl('private') - - return dict( - bucket_readable=breadable.name, - bucket_writable=bwritable.name, - bucket_not_readable=bnonreadable.name, - bucket_not_writable=breadable.name, - object_readable=oreadable.key, - object_writable=owritable.key, - object_not_readable=ononreadable.key, - object_not_writable=oreadable.key, - ) - - -def _main(): - """ The main script - """ - (options, args) = parse_options() - random.seed(options.seed if options.seed else None) - s3_connection = common.s3.main - alt_connection = common.s3.alt - - if options.outfile: - OUT = open(options.outfile, 'w') - else: - OUT = sys.stderr - - VERBOSE = DEBUG = open('/dev/null', 'w') - if options.verbose: - VERBOSE = OUT - if options.debug: - DEBUG = OUT - VERBOSE = OUT - - request_seeds = None - if options.seedfile: - FH = open(options.seedfile, 'r') - request_seeds = [int(line) for line in FH if line != '\n'] - print('Seedfile: %s' %options.seedfile, file=OUT) - print('Number of requests: %d' %len(request_seeds), file=OUT) - else: - if options.seed: - print('Initial Seed: %d' %options.seed, file=OUT) - print('Number of requests: %d' %options.num_requests, file=OUT) - random_list = randomlist(options.seed) - request_seeds = itertools.islice(random_list, options.num_requests) - - print('Decision Graph: %s' %options.graph_filename, file=OUT) - - graph_file = open(options.graph_filename, 'r') - decision_graph = yaml.safe_load(graph_file) - - constants = populate_buckets(s3_connection, alt_connection) - print("Test Buckets/Objects:", file=VERBOSE) - for key, value in constants.items(): - print("\t%s: %s" %(key, value), file=VERBOSE) - - print("Begin Fuzzing...", file=OUT) - print('='*80, file=VERBOSE) - for request_seed in request_seeds: - print('Seed is: %r' %request_seed, file=VERBOSE) - prng = random.Random(request_seed) - decision = assemble_decision(decision_graph, prng) - decision.update(constants) - - method = expand(decision, decision['method'], prng) - path = expand(decision, decision['urlpath'], prng) - - try: - body = expand(decision, decision['body'], prng) - except KeyError: - body = '' - - try: - headers = expand_headers(decision, prng) - except KeyError: - headers = {} - - print("%r %r" %(method[:100], path[:100]), file=VERBOSE) - for h, v in headers.items(): - print("%r: %r" %(h[:50], v[:50]), file=VERBOSE) - print("%r\n" % body[:100], file=VERBOSE) - - print('FULL REQUEST', file=DEBUG) - print('Method: %r' %method, file=DEBUG) - print('Path: %r' %path, file=DEBUG) - print('Headers:', file=DEBUG) - for h, v in headers.items(): - print("\t%r: %r" %(h, v), file=DEBUG) - print('Body: %r\n' %body, file=DEBUG) - - failed = False # Let's be optimistic, shall we? - try: - response = s3_connection.make_request(method, path, data=body, headers=headers, override_num_retries=1) - body = response.read() - except BotoServerError as e: - response = e - body = e.body - failed = True - except BadStatusLine as e: - print('FAILED: failed to parse response (BadStatusLine); probably a NUL byte in your request?', file=OUT) - print('='*80, file=VERBOSE) - continue - - if failed: - print('FAILED:', file=OUT) - OLD_VERBOSE = VERBOSE - OLD_DEBUG = DEBUG - VERBOSE = DEBUG = OUT - print('Seed was: %r' %request_seed, file=VERBOSE) - print('Response status code: %d %s' %(response.status, response.reason), file=VERBOSE) - print('Body:\n%s' %body, file=DEBUG) - print('='*80, file=VERBOSE) - if failed: - VERBOSE = OLD_VERBOSE - DEBUG = OLD_DEBUG - - print('...done fuzzing', file=OUT) - - if options.cleanup: - common.teardown() - - -def main(): - common.setup() - try: - _main() - except Exception as e: - traceback.print_exc() - common.teardown() - diff --git a/s3tests_boto3/fuzz/test/__init__.py b/s3tests_boto3/fuzz/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/s3tests_boto3/fuzz/test/test_fuzzer.py b/s3tests_boto3/fuzz/test/test_fuzzer.py deleted file mode 100644 index e2f93ae..0000000 --- a/s3tests_boto3/fuzz/test/test_fuzzer.py +++ /dev/null @@ -1,404 +0,0 @@ -""" -Unit-test suite for the S3 fuzzer - -The fuzzer is a grammar-based random S3 operation generator -that produces random operation sequences in an effort to -crash the server. This unit-test suite does not test -S3 servers, but rather the fuzzer infrastructure. - -It works by running the fuzzer off of a simple grammar, -and checking the producted requests to ensure that they -include the expected sorts of operations in the expected -proportions. -""" -import sys -import itertools -import nose -import random -import string -import yaml - -from ..headers import * - -from nose.tools import eq_ as eq -from nose.tools import assert_true -from nose.plugins.attrib import attr - -from ...functional.utils import assert_raises -from functools import reduce - -_decision_graph = {} - -def check_access_denied(fn, *args, **kwargs): - e = assert_raises(boto.exception.S3ResponseError, fn, *args, **kwargs) - eq(e.status, 403) - eq(e.reason, 'Forbidden') - eq(e.error_code, 'AccessDenied') - - -def build_graph(): - graph = {} - graph['start'] = { - 'set': {}, - 'choices': ['node2'] - } - graph['leaf'] = { - 'set': { - 'key1': 'value1', - 'key2': 'value2' - }, - 'headers': [ - ['1-2', 'random-header-{random 5-10 printable}', '{random 20-30 punctuation}'] - ], - 'choices': [] - } - graph['node1'] = { - 'set': { - 'key3': 'value3', - 'header_val': [ - '3 h1', - '2 h2', - 'h3' - ] - }, - 'headers': [ - ['1-1', 'my-header', '{header_val}'], - ], - 'choices': ['leaf'] - } - graph['node2'] = { - 'set': { - 'randkey': 'value-{random 10-15 printable}', - 'path': '/{bucket_readable}', - 'indirect_key1': '{key1}' - }, - 'choices': ['leaf'] - } - graph['bad_node'] = { - 'set': { - 'key1': 'value1' - }, - 'choices': ['leaf'] - } - graph['nonexistant_child_node'] = { - 'set': {}, - 'choices': ['leafy_greens'] - } - graph['weighted_node'] = { - 'set': { - 'k1': [ - 'foo', - '2 bar', - '1 baz' - ] - }, - 'choices': [ - 'foo', - '2 bar', - '1 baz' - ] - } - graph['null_choice_node'] = { - 'set': {}, - 'choices': [None] - } - graph['repeated_headers_node'] = { - 'set': {}, - 'headers': [ - ['1-2', 'random-header-{random 5-10 printable}', '{random 20-30 punctuation}'] - ], - 'choices': ['leaf'] - } - graph['weighted_null_choice_node'] = { - 'set': {}, - 'choices': ['3 null'] - } - return graph - - -#def test_foo(): - #graph_file = open('request_decision_graph.yml', 'r') - #graph = yaml.safe_load(graph_file) - #eq(graph['bucket_put_simple']['set']['grantee'], 0) - - -def test_load_graph(): - graph_file = open('request_decision_graph.yml', 'r') - graph = yaml.safe_load(graph_file) - graph['start'] - - -def test_descend_leaf_node(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'leaf', prng) - - eq(decision['key1'], 'value1') - eq(decision['key2'], 'value2') - e = assert_raises(KeyError, lambda x: decision[x], 'key3') - - -def test_descend_node(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'node1', prng) - - eq(decision['key1'], 'value1') - eq(decision['key2'], 'value2') - eq(decision['key3'], 'value3') - - -def test_descend_bad_node(): - graph = build_graph() - prng = random.Random(1) - assert_raises(DecisionGraphError, descend_graph, graph, 'bad_node', prng) - - -def test_descend_nonexistant_child(): - graph = build_graph() - prng = random.Random(1) - assert_raises(KeyError, descend_graph, graph, 'nonexistant_child_node', prng) - - -def test_expand_random_printable(): - prng = random.Random(1) - got = expand({}, '{random 10-15 printable}', prng) - eq(got, '[/pNI$;92@') - - -def test_expand_random_binary(): - prng = random.Random(1) - got = expand({}, '{random 10-15 binary}', prng) - eq(got, '\xdfj\xf1\xd80>a\xcd\xc4\xbb') - - -def test_expand_random_printable_no_whitespace(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 500 printable_no_whitespace}', prng) - assert_true(reduce(lambda x, y: x and y, [x not in string.whitespace and x in string.printable for x in got])) - - -def test_expand_random_binary_no_whitespace(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 500 binary_no_whitespace}', prng) - assert_true(reduce(lambda x, y: x and y, [x not in string.whitespace for x in got])) - - -def test_expand_random_no_args(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random}', prng) - assert_true(0 <= len(got) <= 1000) - assert_true(reduce(lambda x, y: x and y, [x in string.printable for x in got])) - - -def test_expand_random_no_charset(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 10-30}', prng) - assert_true(10 <= len(got) <= 30) - assert_true(reduce(lambda x, y: x and y, [x in string.printable for x in got])) - - -def test_expand_random_exact_length(): - prng = random.Random(1) - for _ in range(1000): - got = expand({}, '{random 10 digits}', prng) - assert_true(len(got) == 10) - assert_true(reduce(lambda x, y: x and y, [x in string.digits for x in got])) - - -def test_expand_random_bad_charset(): - prng = random.Random(1) - assert_raises(KeyError, expand, {}, '{random 10-30 foo}', prng) - - -def test_expand_random_missing_length(): - prng = random.Random(1) - assert_raises(ValueError, expand, {}, '{random printable}', prng) - - -def test_assemble_decision(): - graph = build_graph() - prng = random.Random(1) - decision = assemble_decision(graph, prng) - - eq(decision['key1'], 'value1') - eq(decision['key2'], 'value2') - eq(decision['randkey'], 'value-{random 10-15 printable}') - eq(decision['indirect_key1'], '{key1}') - eq(decision['path'], '/{bucket_readable}') - assert_raises(KeyError, lambda x: decision[x], 'key3') - - -def test_expand_escape(): - prng = random.Random(1) - decision = dict( - foo='{{bar}}', - ) - got = expand(decision, '{foo}', prng) - eq(got, '{bar}') - - -def test_expand_indirect(): - prng = random.Random(1) - decision = dict( - foo='{bar}', - bar='quux', - ) - got = expand(decision, '{foo}', prng) - eq(got, 'quux') - - -def test_expand_indirect_double(): - prng = random.Random(1) - decision = dict( - foo='{bar}', - bar='{quux}', - quux='thud', - ) - got = expand(decision, '{foo}', prng) - eq(got, 'thud') - - -def test_expand_recursive(): - prng = random.Random(1) - decision = dict( - foo='{foo}', - ) - e = assert_raises(RecursionError, expand, decision, '{foo}', prng) - eq(str(e), "Runaway recursion in string formatting: 'foo'") - - -def test_expand_recursive_mutual(): - prng = random.Random(1) - decision = dict( - foo='{bar}', - bar='{foo}', - ) - e = assert_raises(RecursionError, expand, decision, '{foo}', prng) - eq(str(e), "Runaway recursion in string formatting: 'foo'") - - -def test_expand_recursive_not_too_eager(): - prng = random.Random(1) - decision = dict( - foo='bar', - ) - got = expand(decision, 100*'{foo}', prng) - eq(got, 100*'bar') - - -def test_make_choice_unweighted_with_space(): - prng = random.Random(1) - choice = make_choice(['foo bar'], prng) - eq(choice, 'foo bar') - -def test_weighted_choices(): - graph = build_graph() - prng = random.Random(1) - - choices_made = {} - for _ in range(1000): - choice = make_choice(graph['weighted_node']['choices'], prng) - if choice in choices_made: - choices_made[choice] += 1 - else: - choices_made[choice] = 1 - - foo_percentage = choices_made['foo'] / 1000.0 - bar_percentage = choices_made['bar'] / 1000.0 - baz_percentage = choices_made['baz'] / 1000.0 - nose.tools.assert_almost_equal(foo_percentage, 0.25, 1) - nose.tools.assert_almost_equal(bar_percentage, 0.50, 1) - nose.tools.assert_almost_equal(baz_percentage, 0.25, 1) - - -def test_null_choices(): - graph = build_graph() - prng = random.Random(1) - choice = make_choice(graph['null_choice_node']['choices'], prng) - - eq(choice, '') - - -def test_weighted_null_choices(): - graph = build_graph() - prng = random.Random(1) - choice = make_choice(graph['weighted_null_choice_node']['choices'], prng) - - eq(choice, '') - - -def test_null_child(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'null_choice_node', prng) - - eq(decision, {}) - - -def test_weighted_set(): - graph = build_graph() - prng = random.Random(1) - - choices_made = {} - for _ in range(1000): - choice = make_choice(graph['weighted_node']['set']['k1'], prng) - if choice in choices_made: - choices_made[choice] += 1 - else: - choices_made[choice] = 1 - - foo_percentage = choices_made['foo'] / 1000.0 - bar_percentage = choices_made['bar'] / 1000.0 - baz_percentage = choices_made['baz'] / 1000.0 - nose.tools.assert_almost_equal(foo_percentage, 0.25, 1) - nose.tools.assert_almost_equal(bar_percentage, 0.50, 1) - nose.tools.assert_almost_equal(baz_percentage, 0.25, 1) - - -def test_header_presence(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'node1', prng) - - c1 = itertools.count() - c2 = itertools.count() - for header, value in decision['headers']: - if header == 'my-header': - eq(value, '{header_val}') - assert_true(next(c1) < 1) - elif header == 'random-header-{random 5-10 printable}': - eq(value, '{random 20-30 punctuation}') - assert_true(next(c2) < 2) - else: - raise KeyError('unexpected header found: %s' % header) - - assert_true(next(c1)) - assert_true(next(c2)) - - -def test_duplicate_header(): - graph = build_graph() - prng = random.Random(1) - assert_raises(DecisionGraphError, descend_graph, graph, 'repeated_headers_node', prng) - - -def test_expand_headers(): - graph = build_graph() - prng = random.Random(1) - decision = descend_graph(graph, 'node1', prng) - expanded_headers = expand_headers(decision, prng) - - for header, value in expanded_headers.items(): - if header == 'my-header': - assert_true(value in ['h1', 'h2', 'h3']) - elif header.startswith('random-header-'): - assert_true(20 <= len(value) <= 30) - assert_true(string.strip(value, RepeatExpandingFormatter.charsets['punctuation']) is '') - else: - raise DecisionGraphError('unexpected header found: "%s"' % header) - diff --git a/s3tests_boto3/generate_objects.py b/s3tests_boto3/generate_objects.py deleted file mode 100644 index b8d65a7..0000000 --- a/s3tests_boto3/generate_objects.py +++ /dev/null @@ -1,117 +0,0 @@ -from boto.s3.key import Key -from optparse import OptionParser -from . import realistic -import traceback -import random -from . import common -import sys - - -def parse_opts(): - parser = OptionParser() - parser.add_option('-O', '--outfile', help='write output to FILE. Defaults to STDOUT', metavar='FILE') - parser.add_option('-b', '--bucket', dest='bucket', help='push objects to BUCKET', metavar='BUCKET') - parser.add_option('--seed', dest='seed', help='optional seed for the random number generator') - - return parser.parse_args() - - -def get_random_files(quantity, mean, stddev, seed): - """Create file-like objects with pseudorandom contents. - IN: - number of files to create - mean file size in bytes - standard deviation from mean file size - seed for PRNG - OUT: - list of file handles - """ - file_generator = realistic.files(mean, stddev, seed) - return [next(file_generator) for _ in range(quantity)] - - -def upload_objects(bucket, files, seed): - """Upload a bunch of files to an S3 bucket - IN: - boto S3 bucket object - list of file handles to upload - seed for PRNG - OUT: - list of boto S3 key objects - """ - keys = [] - name_generator = realistic.names(15, 4, seed=seed) - - for fp in files: - print('sending file with size %dB' % fp.size, file=sys.stderr) - key = Key(bucket) - key.key = next(name_generator) - key.set_contents_from_file(fp, rewind=True) - key.set_acl('public-read') - keys.append(key) - - return keys - - -def _main(): - '''To run the static content load test, make sure you've bootstrapped your - test environment and set up your config.yaml file, then run the following: - S3TEST_CONF=config.yaml virtualenv/bin/s3tests-generate-objects.py --seed 1234 - - This creates a bucket with your S3 credentials (from config.yaml) and - fills it with garbage objects as described in the - file_generation.groups section of config.yaml. It writes a list of - URLS to those objects to the file listed in file_generation.url_file - in config.yaml. - - Once you have objcts in your bucket, run the siege benchmarking program: - siege --rc ./siege.conf -r 5 - - This tells siege to read the ./siege.conf config file which tells it to - use the urls in ./urls.txt and log to ./siege.log. It hits each url in - urls.txt 5 times (-r flag). - - Results are printed to the terminal and written in CSV format to - ./siege.log - ''' - (options, args) = parse_opts() - - #SETUP - random.seed(options.seed if options.seed else None) - conn = common.s3.main - - if options.outfile: - OUTFILE = open(options.outfile, 'w') - elif common.config.file_generation.url_file: - OUTFILE = open(common.config.file_generation.url_file, 'w') - else: - OUTFILE = sys.stdout - - if options.bucket: - bucket = conn.create_bucket(options.bucket) - else: - bucket = common.get_new_bucket() - - bucket.set_acl('public-read') - keys = [] - print('bucket: %s' % bucket.name, file=OUTFILE) - print('setup complete, generating files', file=sys.stderr) - for profile in common.config.file_generation.groups: - seed = random.random() - files = get_random_files(profile[0], profile[1], profile[2], seed) - keys += upload_objects(bucket, files, seed) - - print('finished sending files. generating urls', file=sys.stderr) - for key in keys: - print(key.generate_url(0, query_auth=False), file=OUTFILE) - - print('done', file=sys.stderr) - - -def main(): - common.setup() - try: - _main() - except Exception as e: - traceback.print_exc() - common.teardown() diff --git a/s3tests_boto3/readwrite.py b/s3tests_boto3/readwrite.py deleted file mode 100644 index 1afb3f1..0000000 --- a/s3tests_boto3/readwrite.py +++ /dev/null @@ -1,265 +0,0 @@ -import gevent -import gevent.pool -import gevent.queue -import gevent.monkey; gevent.monkey.patch_all() -import itertools -import optparse -import os -import sys -import time -import traceback -import random -import yaml - -from . import realistic -from . import common - -NANOSECOND = int(1e9) - -def reader(bucket, worker_id, file_names, queue, rand): - while True: - objname = rand.choice(file_names) - key = bucket.new_key(objname) - - fp = realistic.FileValidator() - result = dict( - type='r', - bucket=bucket.name, - key=key.name, - worker=worker_id, - ) - - start = time.time() - try: - key.get_contents_to_file(fp._file) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - if not fp.valid(): - m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp._file.tell(), o=objname) - result.update( - error=dict( - msg=m, - traceback=traceback.format_exc(), - ), - ) - print("ERROR:", m) - else: - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - ) - queue.put(result) - -def writer(bucket, worker_id, file_names, files, queue, rand): - while True: - fp = next(files) - fp.seek(0) - objname = rand.choice(file_names) - key = bucket.new_key(objname) - - result = dict( - type='w', - bucket=bucket.name, - key=key.name, - worker=worker_id, - ) - - start = time.time() - try: - key.set_contents_from_file(fp) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - ) - - queue.put(result) - -def parse_options(): - parser = optparse.OptionParser( - usage='%prog [OPTS] 0: - print("Uploading initial set of {num} files".format(num=config.readwrite.files.num)) - warmup_pool = gevent.pool.Pool(size=100) - for file_name in file_names: - fp = next(files) - warmup_pool.spawn( - write_file, - bucket=bucket, - file_name=file_name, - fp=fp, - ) - warmup_pool.join() - - # main work - print("Starting main worker loop.") - print("Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev)) - print("Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers)) - group = gevent.pool.Group() - rand_writer = random.Random(seeds['writer']) - - # Don't create random files if deterministic_files_names is set and true - if not config.readwrite.get('deterministic_file_names'): - for x in range(config.readwrite.writers): - this_rand = random.Random(rand_writer.randrange(2**32)) - group.spawn( - writer, - bucket=bucket, - worker_id=x, - file_names=file_names, - files=files, - queue=q, - rand=this_rand, - ) - - # Since the loop generating readers already uses config.readwrite.readers - # and the file names are already generated (randomly or deterministically), - # this loop needs no additional qualifiers. If zero readers are specified, - # it will behave as expected (no data is read) - rand_reader = random.Random(seeds['reader']) - for x in range(config.readwrite.readers): - this_rand = random.Random(rand_reader.randrange(2**32)) - group.spawn( - reader, - bucket=bucket, - worker_id=x, - file_names=file_names, - queue=q, - rand=this_rand, - ) - def stop(): - group.kill(block=True) - q.put(StopIteration) - gevent.spawn_later(config.readwrite.duration, stop) - - # wait for all the tests to finish - group.join() - print('post-join, queue size {size}'.format(size=q.qsize())) - - if q.qsize() > 0: - for temp_dict in q: - if 'error' in temp_dict: - raise Exception('exception:\n\t{msg}\n\t{trace}'.format( - msg=temp_dict['error']['msg'], - trace=temp_dict['error']['traceback']) - ) - else: - yaml.safe_dump(temp_dict, stream=real_stdout) - - finally: - # cleanup - if options.cleanup: - if bucket is not None: - common.nuke_bucket(bucket) diff --git a/s3tests_boto3/realistic.py b/s3tests_boto3/realistic.py deleted file mode 100644 index c4b6920..0000000 --- a/s3tests_boto3/realistic.py +++ /dev/null @@ -1,281 +0,0 @@ -import hashlib -import random -import string -import struct -import time -import math -import tempfile -import shutil -import os - - -NANOSECOND = int(1e9) - - -def generate_file_contents(size): - """ - A helper function to generate binary contents for a given size, and - calculates the md5 hash of the contents appending itself at the end of the - blob. - It uses sha1's hexdigest which is 40 chars long. So any binary generated - should remove the last 40 chars from the blob to retrieve the original hash - and binary so that validity can be proved. - """ - size = int(size) - contents = os.urandom(size) - content_hash = hashlib.sha1(contents).hexdigest() - return contents + content_hash - - -class FileValidator(object): - - def __init__(self, f=None): - self._file = tempfile.SpooledTemporaryFile() - self.original_hash = None - self.new_hash = None - if f: - f.seek(0) - shutil.copyfileobj(f, self._file) - - def valid(self): - """ - Returns True if this file looks valid. The file is valid if the end - of the file has the md5 digest for the first part of the file. - """ - self._file.seek(0) - contents = self._file.read() - self.original_hash, binary = contents[-40:], contents[:-40] - self.new_hash = hashlib.sha1(binary).hexdigest() - if not self.new_hash == self.original_hash: - print('original hash: ', self.original_hash) - print('new hash: ', self.new_hash) - print('size: ', self._file.tell()) - return False - return True - - # XXX not sure if we need all of these - def seek(self, offset, whence=os.SEEK_SET): - self._file.seek(offset, whence) - - def tell(self): - return self._file.tell() - - def read(self, size=-1): - return self._file.read(size) - - def write(self, data): - self._file.write(data) - self._file.seek(0) - - -class RandomContentFile(object): - def __init__(self, size, seed): - self.size = size - self.seed = seed - self.random = random.Random(self.seed) - - # Boto likes to seek once more after it's done reading, so we need to save the last chunks/seek value. - self.last_chunks = self.chunks = None - self.last_seek = None - - # Let seek initialize the rest of it, rather than dup code - self.seek(0) - - def _mark_chunk(self): - self.chunks.append([self.offset, int(round((time.time() - self.last_seek) * NANOSECOND))]) - - def seek(self, offset, whence=os.SEEK_SET): - if whence == os.SEEK_SET: - self.offset = offset - elif whence == os.SEEK_END: - self.offset = self.size + offset; - elif whence == os.SEEK_CUR: - self.offset += offset - - assert self.offset == 0 - - self.random.seed(self.seed) - self.buffer = '' - - self.hash = hashlib.md5() - self.digest_size = self.hash.digest_size - self.digest = None - - # Save the last seek time as our start time, and the last chunks - self.last_chunks = self.chunks - # Before emptying. - self.last_seek = time.time() - self.chunks = [] - - def tell(self): - return self.offset - - def _generate(self): - # generate and return a chunk of pseudorandom data - size = min(self.size, 1*1024*1024) # generate at most 1 MB at a time - chunks = int(math.ceil(size/8.0)) # number of 8-byte chunks to create - - l = [self.random.getrandbits(64) for _ in range(chunks)] - s = struct.pack(chunks*'Q', *l) - return s - - def read(self, size=-1): - if size < 0: - size = self.size - self.offset - - r = [] - - random_count = min(size, self.size - self.offset - self.digest_size) - if random_count > 0: - while len(self.buffer) < random_count: - self.buffer += self._generate() - self.offset += random_count - size -= random_count - data, self.buffer = self.buffer[:random_count], self.buffer[random_count:] - if self.hash is not None: - self.hash.update(data) - r.append(data) - - digest_count = min(size, self.size - self.offset) - if digest_count > 0: - if self.digest is None: - self.digest = self.hash.digest() - self.hash = None - self.offset += digest_count - size -= digest_count - data = self.digest[:digest_count] - r.append(data) - - self._mark_chunk() - - return ''.join(r) - - -class PrecomputedContentFile(object): - def __init__(self, f): - self._file = tempfile.SpooledTemporaryFile() - f.seek(0) - shutil.copyfileobj(f, self._file) - - self.last_chunks = self.chunks = None - self.seek(0) - - def seek(self, offset, whence=os.SEEK_SET): - self._file.seek(offset, whence) - - if self.tell() == 0: - # only reset the chunks when seeking to the beginning - self.last_chunks = self.chunks - self.last_seek = time.time() - self.chunks = [] - - def tell(self): - return self._file.tell() - - def read(self, size=-1): - data = self._file.read(size) - self._mark_chunk() - return data - - def _mark_chunk(self): - elapsed = time.time() - self.last_seek - elapsed_nsec = int(round(elapsed * NANOSECOND)) - self.chunks.append([self.tell(), elapsed_nsec]) - -class FileVerifier(object): - def __init__(self): - self.size = 0 - self.hash = hashlib.md5() - self.buf = '' - self.created_at = time.time() - self.chunks = [] - - def _mark_chunk(self): - self.chunks.append([self.size, int(round((time.time() - self.created_at) * NANOSECOND))]) - - def write(self, data): - self.size += len(data) - self.buf += data - digsz = -1*self.hash.digest_size - new_data, self.buf = self.buf[0:digsz], self.buf[digsz:] - self.hash.update(new_data) - self._mark_chunk() - - def valid(self): - """ - Returns True if this file looks valid. The file is valid if the end - of the file has the md5 digest for the first part of the file. - """ - if self.size < self.hash.digest_size: - return self.hash.digest().startswith(self.buf) - - return self.buf == self.hash.digest() - - -def files(mean, stddev, seed=None): - """ - Yields file-like objects with effectively random contents, where - the size of each file follows the normal distribution with `mean` - and `stddev`. - - Beware, the file-likeness is very shallow. You can use boto's - `key.set_contents_from_file` to send these to S3, but they are not - full file objects. - - The last 128 bits are the MD5 digest of the previous bytes, for - verifying round-trip data integrity. For example, if you - re-download the object and place the contents into a file called - ``foo``, the following should print two identical lines: - - python -c 'import sys, hashlib; data=sys.stdin.read(); print hashlib.md5(data[:-16]).hexdigest(); print "".join("%02x" % ord(c) for c in data[-16:])' = 0: - break - yield RandomContentFile(size=size, seed=rand.getrandbits(32)) - - -def files2(mean, stddev, seed=None, numfiles=10): - """ - Yields file objects with effectively random contents, where the - size of each file follows the normal distribution with `mean` and - `stddev`. - - Rather than continuously generating new files, this pre-computes and - stores `numfiles` files and yields them in a loop. - """ - # pre-compute all the files (and save with TemporaryFiles) - fs = [] - for _ in range(numfiles): - t = tempfile.SpooledTemporaryFile() - t.write(generate_file_contents(random.normalvariate(mean, stddev))) - t.seek(0) - fs.append(t) - - while True: - for f in fs: - yield f - - -def names(mean, stddev, charset=None, seed=None): - """ - Yields strings that are somewhat plausible as file names, where - the lenght of each filename follows the normal distribution with - `mean` and `stddev`. - """ - if charset is None: - charset = string.ascii_lowercase - rand = random.Random(seed) - while True: - while True: - length = int(rand.normalvariate(mean, stddev)) - if length > 0: - break - name = ''.join(rand.choice(charset) for _ in range(length)) - yield name diff --git a/s3tests_boto3/roundtrip.py b/s3tests_boto3/roundtrip.py deleted file mode 100644 index cbc9379..0000000 --- a/s3tests_boto3/roundtrip.py +++ /dev/null @@ -1,219 +0,0 @@ -import gevent -import gevent.pool -import gevent.queue -import gevent.monkey; gevent.monkey.patch_all() -import itertools -import optparse -import os -import sys -import time -import traceback -import random -import yaml - -from . import realistic -from . import common - -NANOSECOND = int(1e9) - -def writer(bucket, objname, fp, queue): - key = bucket.new_key(objname) - - result = dict( - type='w', - bucket=bucket.name, - key=key.name, - ) - - start = time.time() - try: - key.set_contents_from_file(fp, rewind=True) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - chunks=fp.last_chunks, - ) - queue.put(result) - - -def reader(bucket, objname, queue): - key = bucket.new_key(objname) - - fp = realistic.FileVerifier() - result = dict( - type='r', - bucket=bucket.name, - key=key.name, - ) - - start = time.time() - try: - key.get_contents_to_file(fp) - except gevent.GreenletExit: - raise - except Exception as e: - # stop timer ASAP, even on errors - end = time.time() - result.update( - error=dict( - msg=str(e), - traceback=traceback.format_exc(), - ), - ) - # certain kinds of programmer errors make this a busy - # loop; let parent greenlet get some time too - time.sleep(0) - else: - end = time.time() - - if not fp.valid(): - result.update( - error=dict( - msg='md5sum check failed', - ), - ) - - elapsed = end - start - result.update( - start=start, - duration=int(round(elapsed * NANOSECOND)), - chunks=fp.chunks, - ) - queue.put(result) - -def parse_options(): - parser = optparse.OptionParser( - usage='%prog [OPTS] =1.0', 'isodate >=0.4.4', ], - - entry_points={ - 'console_scripts': [ - 's3tests-generate-objects = s3tests.generate_objects:main', - 's3tests-test-readwrite = s3tests.readwrite:main', - 's3tests-test-roundtrip = s3tests.roundtrip:main', - 's3tests-fuzz-headers = s3tests.fuzz.headers:main', - 's3tests-analysis-rwstats = s3tests.analysis.rwstats:main', - ], - }, - ) diff --git a/siege.conf b/siege.conf deleted file mode 100644 index c40b334..0000000 --- a/siege.conf +++ /dev/null @@ -1,382 +0,0 @@ -# Updated by Siege 2.69, May-24-2010 -# Copyright 2000-2007 by Jeffrey Fulmer, et al. -# -# Siege configuration file -- edit as necessary -# For more information about configuring and running -# this program, visit: http://www.joedog.org/ - -# -# Variable declarations. You can set variables here -# for use in the directives below. Example: -# PROXY = proxy.joedog.org -# Reference variables inside ${} or $(), example: -# proxy-host = ${PROXY} -# You can also reference ENVIRONMENT variables without -# actually declaring them, example: -# logfile = $(HOME)/var/siege.log - -# -# Signify verbose mode, true turns on verbose output -# ex: verbose = true|false -# -verbose = true - -# -# CSV Verbose format: with this option, you can choose -# to format verbose output in traditional siege format -# or comma separated format. The latter will allow you -# to redirect output to a file for import into a spread -# sheet, i.e., siege > file.csv -# ex: csv = true|false (default false) -# -csv = true - -# -# Full URL verbose format: By default siege displays -# the URL path and not the full URL. With this option, -# you # can instruct siege to show the complete URL. -# ex: fullurl = true|false (default false) -# -# fullurl = true - -# -# Display id: in verbose mode, display the siege user -# id associated with the HTTP transaction information -# ex: display-id = true|false -# -# display-id = - -# -# Show logfile location. By default, siege displays the -# logfile location at the end of every run when logging -# You can turn this message off with this directive. -# ex: show-logfile = false -# -show-logfile = true - -# -# Default logging status, true turns logging on. -# ex: logging = true|false -# -logging = true - -# -# Logfile, the default siege logfile is $PREFIX/var/siege.log -# This directive allows you to choose an alternative log file. -# Environment variables may be used as shown in the examples: -# ex: logfile = /home/jeff/var/log/siege.log -# logfile = ${HOME}/var/log/siege.log -# logfile = ${LOGFILE} -# -logfile = ./siege.log - -# -# HTTP protocol. Options HTTP/1.1 and HTTP/1.0. -# Some webservers have broken implementation of the -# 1.1 protocol which skews throughput evaluations. -# If you notice some siege clients hanging for -# extended periods of time, change this to HTTP/1.0 -# ex: protocol = HTTP/1.1 -# protocol = HTTP/1.0 -# -protocol = HTTP/1.1 - -# -# Chunked encoding is required by HTTP/1.1 protocol -# but siege allows you to turn it off as desired. -# -# ex: chunked = true -# -chunked = true - -# -# Cache revalidation. -# Siege supports cache revalidation for both ETag and -# Last-modified headers. If a copy is still fresh, the -# server responds with 304. -# HTTP/1.1 200 0.00 secs: 2326 bytes ==> /apache_pb.gif -# HTTP/1.1 304 0.00 secs: 0 bytes ==> /apache_pb.gif -# HTTP/1.1 304 0.00 secs: 0 bytes ==> /apache_pb.gif -# -# ex: cache = true -# -cache = false - -# -# Connection directive. Options "close" and "keep-alive" -# Starting with release 2.57b3, siege implements persistent -# connections in accordance to RFC 2068 using both chunked -# encoding and content-length directives to determine the -# page size. To run siege with persistent connections set -# the connection directive to keep-alive. (Default close) -# CAUTION: use the keep-alive directive with care. -# DOUBLE CAUTION: this directive does not work well on HPUX -# TRIPLE CAUTION: don't use keep-alives until further notice -# ex: connection = close -# connection = keep-alive -# -connection = close - -# -# Default number of simulated concurrent users -# ex: concurrent = 25 -# -concurrent = 15 - -# -# Default duration of the siege. The right hand argument has -# a modifier which specifies the time units, H=hours, M=minutes, -# and S=seconds. If a modifier is not specified, then minutes -# are assumed. -# ex: time = 50M -# -# time = - -# -# Repetitions. The length of siege may be specified in client -# reps rather then a time duration. Instead of specifying a time -# span, you can tell each siege instance to hit the server X number -# of times. So if you chose 'reps = 20' and you've selected 10 -# concurrent users, then siege will hit the server 200 times. -# ex: reps = 20 -# -# reps = - -# -# Default URLs file, set at configuration time, the default -# file is PREFIX/etc/urls.txt. So if you configured siege -# with --prefix=/usr/local then the urls.txt file is installed -# int /usr/local/etc/urls.txt. Use the "file = " directive to -# configure an alternative URLs file. You may use environment -# variables as shown in the examples below: -# ex: file = /export/home/jdfulmer/MYURLS.txt -# file = $HOME/etc/urls.txt -# file = $URLSFILE -# -file = ./urls.txt - -# -# Default URL, this is a single URL that you want to test. This -# is usually set at the command line with the -u option. When -# used, this option overrides the urls.txt (-f FILE/--file=FILE) -# option. You will HAVE to comment this out for in order to use -# the urls.txt file option. -# ex: url = https://shemp.whoohoo.com/docs/index.jsp -# -# url = - -# -# Default delay value, see the siege(1) man page. -# This value is used for load testing, it is not used -# for benchmarking. -# ex: delay = 3 -# -delay = 1 - -# -# Connection timeout value. Set the value in seconds for -# socket connection timeouts. The default value is 30 seconds. -# ex: timeout = 30 -# -# timeout = - -# -# Session expiration: This directive allows you to delete all -# cookies after you pass through the URLs. This means siege will -# grab a new session with each run through its URLs. The default -# value is false. -# ex: expire-session = true -# -# expire-session = - -# -# Failures: This is the number of total connection failures allowed -# before siege aborts. Connection failures (timeouts, socket failures, -# etc.) are combined with 400 and 500 level errors in the final stats, -# but those errors do not count against the abort total. If you set -# this total to 10, then siege will abort after ten socket timeouts, -# but it will NOT abort after ten 404s. This is designed to prevent -# a run-away mess on an unattended siege. The default value is 1024 -# ex: failures = 50 -# -# failures = - -# -# Internet simulation. If true, siege clients will hit -# the URLs in the urls.txt file randomly, thereby simulating -# internet usage. If false, siege will run through the -# urls.txt file in order from first to last and back again. -# ex: internet = true -# -internet = false - -# -# Default benchmarking value, If true, there is NO delay -# between server requests, siege runs as fast as the web -# server and the network will let it. Set this to false -# for load testing. -# ex: benchmark = true -# -benchmark = false - -# -# Set the siege User-Agent to identify yourself at the -# host, the default is: JoeDog/1.00 [en] (X11; I; Siege #.##) -# But that wreaks of corporate techno speak. Feel free -# to make it more interesting :-) Since Limey is recovering -# from minor surgery as I write this, I'll dedicate the -# example to him... -# ex: user-agent = Limey The Bulldog -# -# user-agent = - -# -# Accept-encoding. This option allows you to specify -# acceptable encodings returned by the server. Use this -# directive to turn on compression. By default we accept -# gzip compression. -# -# ex: accept-encoding = * -# accept-encoding = gzip -# accept-encoding = compress;q=0.5;gzip;q=1 -accept-encoding = gzip - -# -# TURN OFF THAT ANNOYING SPINNER! -# Siege spawns a thread and runs a spinner to entertain you -# as it collects and computes its stats. If you don't like -# this feature, you may turn it off here. -# ex: spinner = false -# -spinner = true - -# -# WWW-Authenticate login. When siege hits a webpage -# that requires basic authentication, it will search its -# logins for authentication which matches the specific realm -# requested by the server. If it finds a match, it will send -# that login information. If it fails to match the realm, it -# will send the default login information. (Default is "all"). -# You may configure siege with several logins as long as no -# two realms match. The format for logins is: -# username:password[:realm] where "realm" is optional. -# If you do not supply a realm, then it will default to "all" -# ex: login = jdfulmer:topsecret:Admin -# login = jeff:supersecret -# -# login = - -# -# WWW-Authenticate username and password. When siege -# hits a webpage that requires authentication, it will -# send this user name and password to the server. Note -# this is NOT form based authentication. You will have -# to construct URLs for that. -# ex: username = jdfulmer -# password = whoohoo -# -# username = -# password = - -# -# ssl-cert -# This optional feature allows you to specify a path to a client -# certificate. It is not neccessary to specify a certificate in -# order to use https. If you don't know why you would want one, -# then you probably don't need this feature. Use openssl to -# generate a certificate and key with the following command: -# $ openssl req -nodes -new -days 365 -newkey rsa:1024 \ -# -keyout key.pem -out cert.pem -# Specify a path to cert.pem as follows: -# ex: ssl-cert = /home/jeff/.certs/cert.pem -# -# ssl-cert = - -# -# ssl-key -# Use this option to specify the key you generated with the command -# above. ex: ssl-key = /home/jeff/.certs/key.pem -# You may actually skip this option and combine both your cert and -# your key in a single file: -# $ cat key.pem > client.pem -# $ cat cert.pem >> client.pem -# Now set the path for ssl-cert: -# ex: ssl-cert = /home/jeff/.certs/client.pem -# (in this scenario, you comment out ssl-key) -# -# ssl-key = - -# -# ssl-timeout -# This option sets a connection timeout for the ssl library -# ex: ssl-timeout = 30 -# -# ssl-timeout = - -# -# ssl-ciphers -# You can use this feature to select a specific ssl cipher -# for HTTPs. To view the ones available with your library run -# the following command: openssl ciphers -# ex: ssl-ciphers = EXP-RC4-MD5 -# -# ssl-ciphers = - -# -# Login URL. This is the first URL to be hit by every siege -# client. This feature was designed to allow you to login to -# a server and establish a session. It will only be hit once -# so if you need to hit this URL more then once, make sure it -# also appears in your urls.txt file. -# -# ex: login-url = http://eos.haha.com/login.jsp POST name=jeff&pass=foo -# -# login-url = - -# -# Proxy protocol. This option allows you to select a proxy -# server stress testing. The proxy will request the URL(s) -# specified by -u"my.url.org" OR from the urls.txt file. -# -# ex: proxy-host = proxy.whoohoo.org -# proxy-port = 8080 -# -# proxy-host = -# proxy-port = - -# -# Proxy-Authenticate. When scout hits a proxy server which -# requires username and password authentication, it will this -# username and password to the server. The format is username, -# password and optional realm each separated by a colon. You -# may enter more than one proxy-login as long as each one has -# a different realm. If you do not enter a realm, then scout -# will send that login information to all proxy challenges. If -# you have more than one proxy-login, then scout will attempt -# to match the login to the realm. -# ex: proxy-login: jeff:secret:corporate -# proxy-login: jeff:whoohoo -# -# proxy-login = - -# -# Redirection support. This option allows to to control -# whether a Location: hint will be followed. Most users -# will want to follow redirection information, but sometimes -# it's desired to just get the Location information. -# -# ex: follow-location = false -# -# follow-location = - -# Zero-length data. siege can be configured to disregard -# results in which zero bytes are read after the headers. -# Alternatively, such results can be counted in the final -# tally of outcomes. -# -# ex: zero-data-ok = false -# -# zero-data-ok = - -# -# end of siegerc