precompute files in readwrite tool

Makes the readwrite tool precompute a set of files and reuse them
when writing objects rather than generating each file on the fly.
This commit is contained in:
Steven Berler 2011-12-30 19:05:12 +00:00
parent a268bcfbef
commit e9492927ae
2 changed files with 56 additions and 1 deletions

View file

@ -168,7 +168,7 @@ def main():
) )
file_names = itertools.islice(file_names, config.readwrite.files.num) file_names = itertools.islice(file_names, config.readwrite.files.num)
file_names = list(file_names) file_names = list(file_names)
files = realistic.files( files = realistic.files2(
mean=1024 * config.readwrite.files.size, mean=1024 * config.readwrite.files.size,
stddev=1024 * config.readwrite.files.stddev, stddev=1024 * config.readwrite.files.stddev,
seed=seeds['contents'], seed=seeds['contents'],

View file

@ -4,6 +4,8 @@ import string
import struct import struct
import time import time
import math import math
import tempfile
import shutil
NANOSECOND = int(1e9) NANOSECOND = int(1e9)
@ -84,6 +86,37 @@ class RandomContentFile(object):
return ''.join(r) return ''.join(r)
class PrecomputedContentFile(object):
def __init__(self, f):
self._file = tempfile.SpooledTemporaryFile()
f.seek(0)
shutil.copyfileobj(f, self._file)
self.last_chunks = self.chunks = None
self.seek(0)
def seek(self, offset):
self._file.seek(offset)
if offset == 0:
# only reset the chunks when seeking to the beginning
self.last_chunks = self.chunks
self.last_seek = time.time()
self.chunks = []
def tell(self):
return self._file.tell()
def read(self, size=-1):
data = self._file.read(size)
self._mark_chunk()
return data
def _mark_chunk(self):
elapsed = time.time() - self.last_seek
elapsed_nsec = int(round(elapsed * NANOSECOND))
self.chunks.append([self.tell(), elapsed_nsec])
class FileVerifier(object): class FileVerifier(object):
def __init__(self): def __init__(self):
self.size = 0 self.size = 0
@ -141,6 +174,28 @@ def files(mean, stddev, seed=None):
break break
yield RandomContentFile(size=size, seed=rand.getrandbits(32)) yield RandomContentFile(size=size, seed=rand.getrandbits(32))
def files2(mean, stddev, seed=None, numfiles=10):
"""
Yields file objects with effectively random contents, where the
size of each file follows the normal distribution with `mean` and
`stddev`.
Rather than continuously generating new files, this pre-computes and
stores `numfiles` files and yields them in a loop.
"""
# pre-compute all the files (and save with TemporaryFiles)
rand_files = files(mean, stddev, seed)
fs = []
for _ in xrange(numfiles):
f = next(rand_files)
t = tempfile.SpooledTemporaryFile()
shutil.copyfileobj(f, t)
fs.append(t)
while True:
for f in fs:
yield PrecomputedContentFile(f)
def names(mean, stddev, charset=None, seed=None): def names(mean, stddev, charset=None, seed=None):
""" """
Yields strings that are somewhat plausible as file names, where Yields strings that are somewhat plausible as file names, where