diff --git a/s3tests/readwrite.py b/s3tests/readwrite.py index c7b0907..8e59f7e 100644 --- a/s3tests/readwrite.py +++ b/s3tests/readwrite.py @@ -21,7 +21,7 @@ def reader(bucket, worker_id, file_names, queue, rand): objname = rand.choice(file_names) key = bucket.new_key(objname) - fp = realistic.FileVerifier() + fp = realistic.FileValidator() result = dict( type='r', bucket=bucket.name, @@ -31,7 +31,7 @@ def reader(bucket, worker_id, file_names, queue, rand): start = time.time() try: - key.get_contents_to_file(fp) + key.get_contents_to_file(fp._file) except gevent.GreenletExit: raise except Exception as e: @@ -50,7 +50,7 @@ def reader(bucket, worker_id, file_names, queue, rand): end = time.time() if not fp.valid(): - m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp.size, o=objname) + m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp._file.tell(), o=objname) result.update( error=dict( msg=m, @@ -63,13 +63,13 @@ def reader(bucket, worker_id, file_names, queue, rand): result.update( start=start, duration=int(round(elapsed * NANOSECOND)), - chunks=fp.chunks, ) queue.put(result) def writer(bucket, worker_id, file_names, files, queue, rand): while True: fp = next(files) + fp.seek(0) objname = rand.choice(file_names) key = bucket.new_key(objname) @@ -104,7 +104,6 @@ def writer(bucket, worker_id, file_names, files, queue, rand): result.update( start=start, duration=int(round(elapsed * NANOSECOND)), - chunks=fp.last_chunks, ) queue.put(result) @@ -189,7 +188,7 @@ def main(): ) q = gevent.queue.Queue() - + # warmup - get initial set of files uploaded if there are any writers specified if config.readwrite.writers > 0: print "Uploading initial set of {num} files".format(num=config.readwrite.files.num) diff --git a/s3tests/realistic.py b/s3tests/realistic.py index 5e063e8..f86ba4c 100644 --- a/s3tests/realistic.py +++ b/s3tests/realistic.py @@ -21,6 +21,7 @@ def generate_file_contents(size): should remove the last 40 chars from the blob to retrieve the original hash and binary so that validity can be proved. """ + size = int(size) contents = os.urandom(size) content_hash = hashlib.sha1(contents).hexdigest() return contents + content_hash @@ -28,22 +29,29 @@ def generate_file_contents(size): class FileValidator(object): - def __init__(self, f): + def __init__(self, f=None): self._file = tempfile.SpooledTemporaryFile() - f.seek(0) - shutil.copyfileobj(f, self._file) - self.seek(0) + self.original_hash = None + self.new_hash = None + if f: + f.seek(0) + shutil.copyfileobj(f, self._file) def valid(self): """ Returns True if this file looks valid. The file is valid if the end of the file has the md5 digest for the first part of the file. """ - contents = self._file.read() self._file.seek(0) - original_hash, binary = contents[-40:], contents[:-40] - new_hash = hashlib.sha1(binary).hexdigest() - return new_hash == original_hash + contents = self._file.read() + self.original_hash, binary = contents[-40:], contents[:-40] + self.new_hash = hashlib.sha1(binary).hexdigest() + if not self.new_hash == self.original_hash: + print 'original hash: ', self.original_hash + print 'new hash: ', self.new_hash + print 'size: ', self._file.tell() + return False + return True # XXX not sure if we need all of these def seek(self, offset, whence=os.SEEK_SET): @@ -55,6 +63,10 @@ class FileValidator(object): def read(self, size=-1): return self._file.read(size) + def write(self, data): + self._file.write(data) + self._file.seek(0) + class RandomContentFile(object): def __init__(self, size, seed): @@ -228,6 +240,7 @@ def files(mean, stddev, seed=None): break yield RandomContentFile(size=size, seed=rand.getrandbits(32)) + def files2(mean, stddev, seed=None, numfiles=10): """ Yields file objects with effectively random contents, where the @@ -238,17 +251,17 @@ def files2(mean, stddev, seed=None, numfiles=10): stores `numfiles` files and yields them in a loop. """ # pre-compute all the files (and save with TemporaryFiles) - rand_files = files(mean, stddev, seed) fs = [] for _ in xrange(numfiles): - f = next(rand_files) t = tempfile.SpooledTemporaryFile() - shutil.copyfileobj(f, t) + t.write(generate_file_contents(random.normalvariate(mean, stddev))) + t.seek(0) fs.append(t) while True: for f in fs: - yield PrecomputedContentFile(f) + yield f + def names(mean, stddev, charset=None, seed=None): """ diff --git a/s3tests/tests/test_realistic.py b/s3tests/tests/test_realistic.py index 9b313c9..db00948 100644 --- a/s3tests/tests/test_realistic.py +++ b/s3tests/tests/test_realistic.py @@ -40,13 +40,11 @@ class TestFiles(object): t = tempfile.SpooledTemporaryFile() shutil.copyfileobj(source, t) precomputed = realistic.PrecomputedContentFile(t) - assert precomputed.valid() - #verifier = realistic.FileVerifier() - #shutil.copyfileobj(precomputed, verifier) + verifier = realistic.FileVerifier() + shutil.copyfileobj(precomputed, verifier) - #assert verifier.valid() - #assert 0 + assert verifier.valid() # new implementation