Do pseudorandom string generation 1 MB at a time.

This gives it a ~10x speedup.
This commit is contained in:
Tommi Virtanen 2011-07-06 15:27:50 -07:00
parent 2edd78ebbc
commit 81be846c3c

View file

@ -1,12 +1,14 @@
import hashlib import hashlib
import random import random
import string import string
import struct
class RandomContentFile(object): class RandomContentFile(object):
def __init__(self, size, seed): def __init__(self, size, seed):
self.seed = seed self.seed = seed
self.random = random.Random(self.seed) self.random = random.Random(self.seed)
self.offset = 0 self.offset = 0
self.buffer = ''
self.size = size self.size = size
self.hash = hashlib.md5() self.hash = hashlib.md5()
self.digest_size = self.hash.digest_size self.digest_size = self.hash.digest_size
@ -16,10 +18,19 @@ class RandomContentFile(object):
assert offset == 0 assert offset == 0
self.random.seed(self.seed) self.random.seed(self.seed)
self.offset = offset self.offset = offset
self.buffer = ''
def tell(self): def tell(self):
return self.offset return self.offset
def _generate(self):
# generate and return a chunk of pseudorandom data
# 256 bits = 32 bytes at a time
size = 1*1024*1024
l = [self.random.getrandbits(64) for _ in xrange(size/8)]
s = struct.pack((size/8)*'Q', *l)
return s
def read(self, size=-1): def read(self, size=-1):
if size < 0: if size < 0:
size = self.size - self.offset size = self.size - self.offset
@ -28,9 +39,11 @@ class RandomContentFile(object):
random_count = min(size, self.size - self.offset - self.digest_size) random_count = min(size, self.size - self.offset - self.digest_size)
if random_count > 0: if random_count > 0:
while len(self.buffer) < random_count:
self.buffer += self._generate()
self.offset += random_count self.offset += random_count
size -= random_count size -= random_count
data = ''.join(chr(self.random.getrandbits(8)) for _ in xrange(random_count)) data, self.buffer = self.buffer[:random_count], self.buffer[random_count:]
if self.hash is not None: if self.hash is not None:
self.hash.update(data) self.hash.update(data)
r.append(data) r.append(data)