Merge pull request #26 from ceph/wip-10066

fixes/improves weird hashing issues on readwrite tests Reviewed-by: Josh Durgin <jdurgin@redhat.com>
2014-12-15 23:17:51 -08:00 · 2014-12-15 23:17:51 -08:00 · d50dfe8fa3
commit d50dfe8fa3
parent abc9e67120 a983b6b670
3 changed files with 33 additions and 23 deletions
--- a/s3tests/readwrite.py
+++ b/s3tests/readwrite.py
@ -21,7 +21,7 @@ def reader(bucket, worker_id, file_names, queue, rand):
        objname = rand.choice(file_names)
        key = bucket.new_key(objname)
-        fp = realistic.FileVerifier()
+        fp = realistic.FileValidator()
        result = dict(
                type='r',
                bucket=bucket.name,
@ -31,7 +31,7 @@ def reader(bucket, worker_id, file_names, queue, rand):
        start = time.time()
        try:
-            key.get_contents_to_file(fp)
+            key.get_contents_to_file(fp._file)
        except gevent.GreenletExit:
            raise
        except Exception as e:
@ -50,7 +50,7 @@ def reader(bucket, worker_id, file_names, queue, rand):
            end = time.time()
            if not fp.valid():
-                m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp.size, o=objname)
+                m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp._file.tell(), o=objname)
                result.update(
                    error=dict(
                        msg=m,
@ -63,13 +63,13 @@ def reader(bucket, worker_id, file_names, queue, rand):
                result.update(
                    start=start,
                    duration=int(round(elapsed * NANOSECOND)),
                    chunks=fp.chunks,
                    )
        queue.put(result)
 def writer(bucket, worker_id, file_names, files, queue, rand):
    while True:
        fp = next(files)
        fp.seek(0)
        objname = rand.choice(file_names)
        key = bucket.new_key(objname)
@ -104,7 +104,6 @@ def writer(bucket, worker_id, file_names, files, queue, rand):
            result.update(
                start=start,
                duration=int(round(elapsed * NANOSECOND)),
                chunks=fp.last_chunks,
                )
        queue.put(result)
@ -189,7 +188,7 @@ def main():
            )
        q = gevent.queue.Queue()
-        
+
        # warmup - get initial set of files uploaded if there are any writers specified
        if config.readwrite.writers > 0:
            print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)
--- a/s3tests/realistic.py
+++ b/s3tests/realistic.py
@ -21,6 +21,7 @@ def generate_file_contents(size):
    should remove the last 40 chars from the blob to retrieve the original hash
    and binary so that validity can be proved.
    """
    size = int(size)
    contents = os.urandom(size)
    content_hash = hashlib.sha1(contents).hexdigest()
    return contents + content_hash
@ -28,22 +29,29 @@ def generate_file_contents(size):
 class FileValidator(object):
-    def __init__(self, f):
+    def __init__(self, f=None):
        self._file = tempfile.SpooledTemporaryFile()
-        f.seek(0)
+        self.original_hash = None
-        shutil.copyfileobj(f, self._file)
+        self.new_hash = None
-        self.seek(0)
+        if f:
            f.seek(0)
            shutil.copyfileobj(f, self._file)
    def valid(self):
        """
        Returns True if this file looks valid. The file is valid if the end
        of the file has the md5 digest for the first part of the file.
        """
        contents = self._file.read()
        self._file.seek(0)
-        original_hash, binary = contents[-40:], contents[:-40]
+        contents = self._file.read()
-        new_hash = hashlib.sha1(binary).hexdigest()
+        self.original_hash, binary = contents[-40:], contents[:-40]
-        return new_hash == original_hash
+        self.new_hash = hashlib.sha1(binary).hexdigest()
        if not self.new_hash == self.original_hash:
            print 'original  hash: ', self.original_hash
            print 'new hash: ', self.new_hash
            print 'size: ', self._file.tell()
            return False
        return True
    # XXX not sure if we need all of these
    def seek(self, offset, whence=os.SEEK_SET):
@ -55,6 +63,10 @@ class FileValidator(object):
    def read(self, size=-1):
        return self._file.read(size)
    def write(self, data):
        self._file.write(data)
        self._file.seek(0)
 class RandomContentFile(object):
    def __init__(self, size, seed):
@ -228,6 +240,7 @@ def files(mean, stddev, seed=None):
                break
        yield RandomContentFile(size=size, seed=rand.getrandbits(32))
 def files2(mean, stddev, seed=None, numfiles=10):
    """
    Yields file objects with effectively random contents, where the
@ -238,17 +251,17 @@ def files2(mean, stddev, seed=None, numfiles=10):
    stores `numfiles` files and yields them in a loop.
    """
    # pre-compute all the files (and save with TemporaryFiles)
    rand_files = files(mean, stddev, seed)
    fs = []
    for _ in xrange(numfiles):
        f = next(rand_files)
        t = tempfile.SpooledTemporaryFile()
-        shutil.copyfileobj(f, t)
+        t.write(generate_file_contents(random.normalvariate(mean, stddev)))
        t.seek(0)
        fs.append(t)
    while True:
        for f in fs:
-            yield PrecomputedContentFile(f)
+            yield f
 def names(mean, stddev, charset=None, seed=None):
    """
--- a/s3tests/tests/test_realistic.py
+++ b/s3tests/tests/test_realistic.py
@ -40,13 +40,11 @@ class TestFiles(object):
        t = tempfile.SpooledTemporaryFile()
        shutil.copyfileobj(source, t)
        precomputed = realistic.PrecomputedContentFile(t)
        assert precomputed.valid()
-        #verifier = realistic.FileVerifier()
+        verifier = realistic.FileVerifier()
-        #shutil.copyfileobj(precomputed, verifier)
+        shutil.copyfileobj(precomputed, verifier)
-        #assert verifier.valid()
+        assert verifier.valid()
        #assert 0
 # new implementation