Merge pull request #26 from ceph/wip-10066

fixes/improves weird hashing issues on readwrite tests

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
This commit is contained in:
Josh Durgin 2014-12-15 23:17:51 -08:00
commit d50dfe8fa3
3 changed files with 33 additions and 23 deletions

View file

@ -21,7 +21,7 @@ def reader(bucket, worker_id, file_names, queue, rand):
objname = rand.choice(file_names) objname = rand.choice(file_names)
key = bucket.new_key(objname) key = bucket.new_key(objname)
fp = realistic.FileVerifier() fp = realistic.FileValidator()
result = dict( result = dict(
type='r', type='r',
bucket=bucket.name, bucket=bucket.name,
@ -31,7 +31,7 @@ def reader(bucket, worker_id, file_names, queue, rand):
start = time.time() start = time.time()
try: try:
key.get_contents_to_file(fp) key.get_contents_to_file(fp._file)
except gevent.GreenletExit: except gevent.GreenletExit:
raise raise
except Exception as e: except Exception as e:
@ -50,7 +50,7 @@ def reader(bucket, worker_id, file_names, queue, rand):
end = time.time() end = time.time()
if not fp.valid(): if not fp.valid():
m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp.size, o=objname) m='md5sum check failed start={s} ({se}) end={e} size={sz} obj={o}'.format(s=time.ctime(start), se=start, e=end, sz=fp._file.tell(), o=objname)
result.update( result.update(
error=dict( error=dict(
msg=m, msg=m,
@ -63,13 +63,13 @@ def reader(bucket, worker_id, file_names, queue, rand):
result.update( result.update(
start=start, start=start,
duration=int(round(elapsed * NANOSECOND)), duration=int(round(elapsed * NANOSECOND)),
chunks=fp.chunks,
) )
queue.put(result) queue.put(result)
def writer(bucket, worker_id, file_names, files, queue, rand): def writer(bucket, worker_id, file_names, files, queue, rand):
while True: while True:
fp = next(files) fp = next(files)
fp.seek(0)
objname = rand.choice(file_names) objname = rand.choice(file_names)
key = bucket.new_key(objname) key = bucket.new_key(objname)
@ -104,7 +104,6 @@ def writer(bucket, worker_id, file_names, files, queue, rand):
result.update( result.update(
start=start, start=start,
duration=int(round(elapsed * NANOSECOND)), duration=int(round(elapsed * NANOSECOND)),
chunks=fp.last_chunks,
) )
queue.put(result) queue.put(result)
@ -189,7 +188,7 @@ def main():
) )
q = gevent.queue.Queue() q = gevent.queue.Queue()
# warmup - get initial set of files uploaded if there are any writers specified # warmup - get initial set of files uploaded if there are any writers specified
if config.readwrite.writers > 0: if config.readwrite.writers > 0:
print "Uploading initial set of {num} files".format(num=config.readwrite.files.num) print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)

View file

@ -21,6 +21,7 @@ def generate_file_contents(size):
should remove the last 40 chars from the blob to retrieve the original hash should remove the last 40 chars from the blob to retrieve the original hash
and binary so that validity can be proved. and binary so that validity can be proved.
""" """
size = int(size)
contents = os.urandom(size) contents = os.urandom(size)
content_hash = hashlib.sha1(contents).hexdigest() content_hash = hashlib.sha1(contents).hexdigest()
return contents + content_hash return contents + content_hash
@ -28,22 +29,29 @@ def generate_file_contents(size):
class FileValidator(object): class FileValidator(object):
def __init__(self, f): def __init__(self, f=None):
self._file = tempfile.SpooledTemporaryFile() self._file = tempfile.SpooledTemporaryFile()
f.seek(0) self.original_hash = None
shutil.copyfileobj(f, self._file) self.new_hash = None
self.seek(0) if f:
f.seek(0)
shutil.copyfileobj(f, self._file)
def valid(self): def valid(self):
""" """
Returns True if this file looks valid. The file is valid if the end Returns True if this file looks valid. The file is valid if the end
of the file has the md5 digest for the first part of the file. of the file has the md5 digest for the first part of the file.
""" """
contents = self._file.read()
self._file.seek(0) self._file.seek(0)
original_hash, binary = contents[-40:], contents[:-40] contents = self._file.read()
new_hash = hashlib.sha1(binary).hexdigest() self.original_hash, binary = contents[-40:], contents[:-40]
return new_hash == original_hash self.new_hash = hashlib.sha1(binary).hexdigest()
if not self.new_hash == self.original_hash:
print 'original hash: ', self.original_hash
print 'new hash: ', self.new_hash
print 'size: ', self._file.tell()
return False
return True
# XXX not sure if we need all of these # XXX not sure if we need all of these
def seek(self, offset, whence=os.SEEK_SET): def seek(self, offset, whence=os.SEEK_SET):
@ -55,6 +63,10 @@ class FileValidator(object):
def read(self, size=-1): def read(self, size=-1):
return self._file.read(size) return self._file.read(size)
def write(self, data):
self._file.write(data)
self._file.seek(0)
class RandomContentFile(object): class RandomContentFile(object):
def __init__(self, size, seed): def __init__(self, size, seed):
@ -228,6 +240,7 @@ def files(mean, stddev, seed=None):
break break
yield RandomContentFile(size=size, seed=rand.getrandbits(32)) yield RandomContentFile(size=size, seed=rand.getrandbits(32))
def files2(mean, stddev, seed=None, numfiles=10): def files2(mean, stddev, seed=None, numfiles=10):
""" """
Yields file objects with effectively random contents, where the Yields file objects with effectively random contents, where the
@ -238,17 +251,17 @@ def files2(mean, stddev, seed=None, numfiles=10):
stores `numfiles` files and yields them in a loop. stores `numfiles` files and yields them in a loop.
""" """
# pre-compute all the files (and save with TemporaryFiles) # pre-compute all the files (and save with TemporaryFiles)
rand_files = files(mean, stddev, seed)
fs = [] fs = []
for _ in xrange(numfiles): for _ in xrange(numfiles):
f = next(rand_files)
t = tempfile.SpooledTemporaryFile() t = tempfile.SpooledTemporaryFile()
shutil.copyfileobj(f, t) t.write(generate_file_contents(random.normalvariate(mean, stddev)))
t.seek(0)
fs.append(t) fs.append(t)
while True: while True:
for f in fs: for f in fs:
yield PrecomputedContentFile(f) yield f
def names(mean, stddev, charset=None, seed=None): def names(mean, stddev, charset=None, seed=None):
""" """

View file

@ -40,13 +40,11 @@ class TestFiles(object):
t = tempfile.SpooledTemporaryFile() t = tempfile.SpooledTemporaryFile()
shutil.copyfileobj(source, t) shutil.copyfileobj(source, t)
precomputed = realistic.PrecomputedContentFile(t) precomputed = realistic.PrecomputedContentFile(t)
assert precomputed.valid()
#verifier = realistic.FileVerifier() verifier = realistic.FileVerifier()
#shutil.copyfileobj(precomputed, verifier) shutil.copyfileobj(precomputed, verifier)
#assert verifier.valid() assert verifier.valid()
#assert 0
# new implementation # new implementation