forked from TrueCloudLab/s3-tests
precompute files in readwrite tool
Makes the readwrite tool precompute a set of files and reuse them when writing objects rather than generating each file on the fly.
This commit is contained in:
parent
a268bcfbef
commit
e9492927ae
2 changed files with 56 additions and 1 deletions
|
@ -168,7 +168,7 @@ def main():
|
||||||
)
|
)
|
||||||
file_names = itertools.islice(file_names, config.readwrite.files.num)
|
file_names = itertools.islice(file_names, config.readwrite.files.num)
|
||||||
file_names = list(file_names)
|
file_names = list(file_names)
|
||||||
files = realistic.files(
|
files = realistic.files2(
|
||||||
mean=1024 * config.readwrite.files.size,
|
mean=1024 * config.readwrite.files.size,
|
||||||
stddev=1024 * config.readwrite.files.stddev,
|
stddev=1024 * config.readwrite.files.stddev,
|
||||||
seed=seeds['contents'],
|
seed=seeds['contents'],
|
||||||
|
|
|
@ -4,6 +4,8 @@ import string
|
||||||
import struct
|
import struct
|
||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
NANOSECOND = int(1e9)
|
NANOSECOND = int(1e9)
|
||||||
|
@ -84,6 +86,37 @@ class RandomContentFile(object):
|
||||||
|
|
||||||
return ''.join(r)
|
return ''.join(r)
|
||||||
|
|
||||||
|
class PrecomputedContentFile(object):
|
||||||
|
def __init__(self, f):
|
||||||
|
self._file = tempfile.SpooledTemporaryFile()
|
||||||
|
f.seek(0)
|
||||||
|
shutil.copyfileobj(f, self._file)
|
||||||
|
|
||||||
|
self.last_chunks = self.chunks = None
|
||||||
|
self.seek(0)
|
||||||
|
|
||||||
|
def seek(self, offset):
|
||||||
|
self._file.seek(offset)
|
||||||
|
|
||||||
|
if offset == 0:
|
||||||
|
# only reset the chunks when seeking to the beginning
|
||||||
|
self.last_chunks = self.chunks
|
||||||
|
self.last_seek = time.time()
|
||||||
|
self.chunks = []
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
return self._file.tell()
|
||||||
|
|
||||||
|
def read(self, size=-1):
|
||||||
|
data = self._file.read(size)
|
||||||
|
self._mark_chunk()
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _mark_chunk(self):
|
||||||
|
elapsed = time.time() - self.last_seek
|
||||||
|
elapsed_nsec = int(round(elapsed * NANOSECOND))
|
||||||
|
self.chunks.append([self.tell(), elapsed_nsec])
|
||||||
|
|
||||||
class FileVerifier(object):
|
class FileVerifier(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.size = 0
|
self.size = 0
|
||||||
|
@ -141,6 +174,28 @@ def files(mean, stddev, seed=None):
|
||||||
break
|
break
|
||||||
yield RandomContentFile(size=size, seed=rand.getrandbits(32))
|
yield RandomContentFile(size=size, seed=rand.getrandbits(32))
|
||||||
|
|
||||||
|
def files2(mean, stddev, seed=None, numfiles=10):
|
||||||
|
"""
|
||||||
|
Yields file objects with effectively random contents, where the
|
||||||
|
size of each file follows the normal distribution with `mean` and
|
||||||
|
`stddev`.
|
||||||
|
|
||||||
|
Rather than continuously generating new files, this pre-computes and
|
||||||
|
stores `numfiles` files and yields them in a loop.
|
||||||
|
"""
|
||||||
|
# pre-compute all the files (and save with TemporaryFiles)
|
||||||
|
rand_files = files(mean, stddev, seed)
|
||||||
|
fs = []
|
||||||
|
for _ in xrange(numfiles):
|
||||||
|
f = next(rand_files)
|
||||||
|
t = tempfile.SpooledTemporaryFile()
|
||||||
|
shutil.copyfileobj(f, t)
|
||||||
|
fs.append(t)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
for f in fs:
|
||||||
|
yield PrecomputedContentFile(f)
|
||||||
|
|
||||||
def names(mean, stddev, charset=None, seed=None):
|
def names(mean, stddev, charset=None, seed=None):
|
||||||
"""
|
"""
|
||||||
Yields strings that are somewhat plausible as file names, where
|
Yields strings that are somewhat plausible as file names, where
|
||||||
|
|
Loading…
Reference in a new issue