|
From: Francesc A. <fa...@py...> - 2004-11-10 10:50:10
|
I've ended with a new rewrite of EArray._calcBufferSize method, that I'm
including at the end of this message. Please, play with the different values
in lines:
#bufmultfactor = int(1000 * 10) # Conservative value
bufmultfactor = int(1000 * 20) # Medium value
#bufmultfactor = int(1000 * 50) # Aggresive value
#bufmultfactor = int(1000 * 100) # Very Aggresive value
and tell me your feedback.
--
Francesc Altet
def _calcBufferSize(self, atom, extdim, expectedrows, compress):
"""Calculate the buffer size and the HDF5 chunk size.
The logic to do that is based purely in experiments playing
with different buffer sizes, chunksize and compression
flag. It is obvious that using big buffers optimize the I/O
speed. This might (should) be further optimized doing more
experiments.
"""
rowsize = atom.atomsize()
#bufmultfactor = int(1000 * 10) # Conservative value
bufmultfactor = int(1000 * 20) # Medium value
#bufmultfactor = int(1000 * 50) # Aggresive value
#bufmultfactor = int(1000 * 100) # Very Aggresive value
rowsizeinfile = rowsize
expectedfsizeinKb = (expectedrows * rowsizeinfile) / 1024
if expectedfsizeinKb <= 100:
# Values for files less than 100 KB of size
buffersize = 5 * bufmultfactor
elif (expectedfsizeinKb > 100 and
expectedfsizeinKb <= 1000):
# Values for files less than 1 MB of size
buffersize = 10 * bufmultfactor
elif (expectedfsizeinKb > 1000 and
expectedfsizeinKb <= 20 * 1000):
# Values for sizes between 1 MB and 20 MB
buffersize = 20 * bufmultfactor
elif (expectedfsizeinKb > 20 * 1000 and
expectedfsizeinKb <= 200 * 1000):
# Values for sizes between 20 MB and 200 MB
buffersize = 40 * bufmultfactor
elif (expectedfsizeinKb > 200 * 1000 and
expectedfsizeinKb <= 2000 * 1000):
# Values for sizes between 200 MB and 2 GB
buffersize = 50 * bufmultfactor
else: # Greater than 2 GB
buffersize = 60 * bufmultfactor
# Max Tuples to fill the buffer
maxTuples = buffersize // rowsize
chunksizes = list(atom.shape)
# Check if at least 1 tuple fits in buffer
if maxTuples > 1:
# Yes. So the chunk sizes for the non-extendeable dims will be
# unchanged
chunksizes[extdim] = maxTuples
else:
# No. reduce other dimensions until we get a proper chunksizes
# shape
chunksizes[extdim] = 1 # Only one row in extendeable dimension
for j in range(len(chunksizes)):
newrowsize = atom.itemsize
for i in chunksizes[j+1:]:
newrowsize *= i
maxTuples = buffersize // newrowsize
if maxTuples > 1:
break
chunksizes[j] = 1
# Compute the chunksizes correctly for this j index
chunksize = maxTuples
if j < len(chunksizes):
# Only modify chunksizes[j] if needed
if chunksize < chunksizes[j]:
chunksizes[j] = chunksize
else:
chunksizes[-1] = 1 # very large itemsizes!
# Compute the correct maxTuples number
newrowsize = atom.itemsize
for i in chunksizes:
newrowsize *= i
maxTuples = buffersize // newrowsize
return (buffersize, maxTuples, chunksizes)
|