From 81be846c3cd246f499fdaa32ea8d17d9f9aadd93 Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Wed, 6 Jul 2011 15:27:50 -0700 Subject: [PATCH] Do pseudorandom string generation 1 MB at a time. This gives it a ~10x speedup. --- realistic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/realistic.py b/realistic.py index 1d55147..95d84f1 100644 --- a/realistic.py +++ b/realistic.py @@ -1,12 +1,14 @@ import hashlib import random import string +import struct class RandomContentFile(object): def __init__(self, size, seed): self.seed = seed self.random = random.Random(self.seed) self.offset = 0 + self.buffer = '' self.size = size self.hash = hashlib.md5() self.digest_size = self.hash.digest_size @@ -16,10 +18,19 @@ class RandomContentFile(object): assert offset == 0 self.random.seed(self.seed) self.offset = offset + self.buffer = '' def tell(self): return self.offset + def _generate(self): + # generate and return a chunk of pseudorandom data + # 256 bits = 32 bytes at a time + size = 1*1024*1024 + l = [self.random.getrandbits(64) for _ in xrange(size/8)] + s = struct.pack((size/8)*'Q', *l) + return s + def read(self, size=-1): if size < 0: size = self.size - self.offset @@ -28,9 +39,11 @@ class RandomContentFile(object): random_count = min(size, self.size - self.offset - self.digest_size) if random_count > 0: + while len(self.buffer) < random_count: + self.buffer += self._generate() self.offset += random_count size -= random_count - data = ''.join(chr(self.random.getrandbits(8)) for _ in xrange(random_count)) + data, self.buffer = self.buffer[:random_count], self.buffer[random_count:] if self.hash is not None: self.hash.update(data) r.append(data)