precompute files in readwrite tool

Makes the readwrite tool precompute a set of files and reuse them when writing objects rather than generating each file on the fly.
2025-02-02 09:40:41 +00:00 · 2011-12-30 19:05:12 +00:00 · 2011-12-30 19:05:12 +00:00 · e9492927ae
commit e9492927ae
parent a268bcfbef
2 changed files with 56 additions and 1 deletions
--- a/s3tests/readwrite.py
+++ b/s3tests/readwrite.py
@ -168,7 +168,7 @@ def main():
            )
        file_names = itertools.islice(file_names, config.readwrite.files.num)
        file_names = list(file_names)
-        files = realistic.files(
+        files = realistic.files2(
            mean=1024 * config.readwrite.files.size,
            stddev=1024 * config.readwrite.files.stddev,
            seed=seeds['contents'],
--- a/s3tests/realistic.py
+++ b/s3tests/realistic.py
@ -4,6 +4,8 @@ import string
 import struct
 import time
 import math
+import tempfile
+import shutil


 NANOSECOND = int(1e9)
@ -84,6 +86,37 @@ class RandomContentFile(object):

        return ''.join(r)

+class PrecomputedContentFile(object):
+    def __init__(self, f):
+        self._file = tempfile.SpooledTemporaryFile()
+        f.seek(0)
+        shutil.copyfileobj(f, self._file)
+        
+        self.last_chunks = self.chunks = None
+        self.seek(0)
+
+    def seek(self, offset):
+        self._file.seek(offset)
+
+        if offset == 0:
+            # only reset the chunks when seeking to the beginning
+            self.last_chunks = self.chunks
+            self.last_seek = time.time()
+            self.chunks = []
+
+    def tell(self):
+        return self._file.tell()
+
+    def read(self, size=-1):
+        data = self._file.read(size)
+        self._mark_chunk()
+        return data
+
+    def _mark_chunk(self):
+        elapsed = time.time() - self.last_seek
+        elapsed_nsec = int(round(elapsed * NANOSECOND))
+        self.chunks.append([self.tell(), elapsed_nsec])
+
 class FileVerifier(object):
    def __init__(self):
        self.size = 0
@ -141,6 +174,28 @@ def files(mean, stddev, seed=None):
                break
        yield RandomContentFile(size=size, seed=rand.getrandbits(32))

+def files2(mean, stddev, seed=None, numfiles=10):
+    """
+    Yields file objects with effectively random contents, where the
+    size of each file follows the normal distribution with `mean` and
+    `stddev`.
+
+    Rather than continuously generating new files, this pre-computes and
+    stores `numfiles` files and yields them in a loop.
+    """
+    # pre-compute all the files (and save with TemporaryFiles)
+    rand_files = files(mean, stddev, seed)
+    fs = []
+    for _ in xrange(numfiles):
+        f = next(rand_files)
+        t = tempfile.SpooledTemporaryFile()
+        shutil.copyfileobj(f, t)
+        fs.append(t)
+
+    while True:
+        for f in fs:
+            yield PrecomputedContentFile(f)
+
 def names(mean, stddev, charset=None, seed=None):
    """
    Yields strings that are somewhat plausible as file names, where