|
from __future__ import division, absolute_import, print_function |
|
|
|
__all__ = ['memmap'] |
|
|
|
import warnings |
|
import sys |
|
|
|
import numpy as np |
|
from .numeric import uint8, ndarray, dtype |
|
from numpy.compat import long, basestring |
|
|
|
dtypedescr = dtype |
|
valid_filemodes = ["r", "c", "r+", "w+"] |
|
writeable_filemodes = ["r+", "w+"] |
|
|
|
mode_equivalents = { |
|
"readonly":"r", |
|
"copyonwrite":"c", |
|
"readwrite":"r+", |
|
"write":"w+" |
|
} |
|
|
|
class memmap(ndarray): |
|
""" |
|
Create a memory-map to an array stored in a *binary* file on disk. |
|
|
|
Memory-mapped files are used for accessing small segments of large files |
|
on disk, without reading the entire file into memory. Numpy's |
|
memmap's are array-like objects. This differs from Python's ``mmap`` |
|
module, which uses file-like objects. |
|
|
|
This subclass of ndarray has some unpleasant interactions with |
|
some operations, because it doesn't quite fit properly as a subclass. |
|
An alternative to using this subclass is to create the ``mmap`` |
|
object yourself, then create an ndarray with ndarray.__new__ directly, |
|
passing the object created in its 'buffer=' parameter. |
|
|
|
This class may at some point be turned into a factory function |
|
which returns a view into an mmap buffer. |
|
|
|
Delete the memmap instance to close. |
|
|
|
|
|
Parameters |
|
---------- |
|
filename : str or file-like object |
|
The file name or file object to be used as the array data buffer. |
|
dtype : data-type, optional |
|
The data-type used to interpret the file contents. |
|
Default is `uint8`. |
|
mode : {'r+', 'r', 'w+', 'c'}, optional |
|
The file is opened in this mode: |
|
|
|
+------+-------------------------------------------------------------+ |
|
| 'r' | Open existing file for reading only. | |
|
+------+-------------------------------------------------------------+ |
|
| 'r+' | Open existing file for reading and writing. | |
|
+------+-------------------------------------------------------------+ |
|
| 'w+' | Create or overwrite existing file for reading and writing. | |
|
+------+-------------------------------------------------------------+ |
|
| 'c' | Copy-on-write: assignments affect data in memory, but | |
|
| | changes are not saved to disk. The file on disk is | |
|
| | read-only. | |
|
+------+-------------------------------------------------------------+ |
|
|
|
Default is 'r+'. |
|
offset : int, optional |
|
In the file, array data starts at this offset. Since `offset` is |
|
measured in bytes, it should normally be a multiple of the byte-size |
|
of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of |
|
file are valid; The file will be extended to accommodate the |
|
additional data. By default, ``memmap`` will start at the beginning of |
|
the file, even if ``filename`` is a file pointer ``fp`` and |
|
``fp.tell() != 0``. |
|
shape : tuple, optional |
|
The desired shape of the array. If ``mode == 'r'`` and the number |
|
of remaining bytes after `offset` is not a multiple of the byte-size |
|
of `dtype`, you must specify `shape`. By default, the returned array |
|
will be 1-D with the number of elements determined by file size |
|
and data-type. |
|
order : {'C', 'F'}, optional |
|
Specify the order of the ndarray memory layout: C (row-major) or |
|
Fortran (column-major). This only has an effect if the shape is |
|
greater than 1-D. The default order is 'C'. |
|
|
|
Attributes |
|
---------- |
|
filename : str |
|
Path to the mapped file. |
|
offset : int |
|
Offset position in the file. |
|
mode : str |
|
File mode. |
|
|
|
Methods |
|
------- |
|
flush |
|
Flush any changes in memory to file on disk. |
|
When you delete a memmap object, flush is called first to write |
|
changes to disk before removing the object. |
|
|
|
|
|
Notes |
|
----- |
|
The memmap object can be used anywhere an ndarray is accepted. |
|
Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns |
|
``True``. |
|
|
|
Memory-mapped arrays use the Python memory-map object which |
|
(prior to Python 2.5) does not allow files to be larger than a |
|
certain size depending on the platform. This size is always < 2GB |
|
even on 64-bit systems. |
|
|
|
Examples |
|
-------- |
|
>>> data = np.arange(12, dtype='float32') |
|
>>> data.resize((3,4)) |
|
|
|
This example uses a temporary file so that doctest doesn't write |
|
files to your directory. You would use a 'normal' filename. |
|
|
|
>>> from tempfile import mkdtemp |
|
>>> import os.path as path |
|
>>> filename = path.join(mkdtemp(), 'newfile.dat') |
|
|
|
Create a memmap with dtype and shape that matches our data: |
|
|
|
>>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) |
|
>>> fp |
|
memmap([[ 0., 0., 0., 0.], |
|
[ 0., 0., 0., 0.], |
|
[ 0., 0., 0., 0.]], dtype=float32) |
|
|
|
Write data to memmap array: |
|
|
|
>>> fp[:] = data[:] |
|
>>> fp |
|
memmap([[ 0., 1., 2., 3.], |
|
[ 4., 5., 6., 7.], |
|
[ 8., 9., 10., 11.]], dtype=float32) |
|
|
|
>>> fp.filename == path.abspath(filename) |
|
True |
|
|
|
Deletion flushes memory changes to disk before removing the object: |
|
|
|
>>> del fp |
|
|
|
Load the memmap and verify data was stored: |
|
|
|
>>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) |
|
>>> newfp |
|
memmap([[ 0., 1., 2., 3.], |
|
[ 4., 5., 6., 7.], |
|
[ 8., 9., 10., 11.]], dtype=float32) |
|
|
|
Read-only memmap: |
|
|
|
>>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) |
|
>>> fpr.flags.writeable |
|
False |
|
|
|
Copy-on-write memmap: |
|
|
|
>>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) |
|
>>> fpc.flags.writeable |
|
True |
|
|
|
It's possible to assign to copy-on-write array, but values are only |
|
written into the memory copy of the array, and not written to disk: |
|
|
|
>>> fpc |
|
memmap([[ 0., 1., 2., 3.], |
|
[ 4., 5., 6., 7.], |
|
[ 8., 9., 10., 11.]], dtype=float32) |
|
>>> fpc[0,:] = 0 |
|
>>> fpc |
|
memmap([[ 0., 0., 0., 0.], |
|
[ 4., 5., 6., 7.], |
|
[ 8., 9., 10., 11.]], dtype=float32) |
|
|
|
File on disk is unchanged: |
|
|
|
>>> fpr |
|
memmap([[ 0., 1., 2., 3.], |
|
[ 4., 5., 6., 7.], |
|
[ 8., 9., 10., 11.]], dtype=float32) |
|
|
|
Offset into a memmap: |
|
|
|
>>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) |
|
>>> fpo |
|
memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) |
|
|
|
""" |
|
|
|
__array_priority__ = -100.0 |
|
def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, |
|
shape=None, order='C'): |
|
|
|
import mmap |
|
import os.path |
|
try: |
|
mode = mode_equivalents[mode] |
|
except KeyError: |
|
if mode not in valid_filemodes: |
|
raise ValueError("mode must be one of %s" % |
|
(valid_filemodes + list(mode_equivalents.keys()))) |
|
|
|
if hasattr(filename, 'read'): |
|
fid = filename |
|
own_file = False |
|
else: |
|
fid = open(filename, (mode == 'c' and 'r' or mode)+'b') |
|
own_file = True |
|
|
|
if (mode == 'w+') and shape is None: |
|
raise ValueError("shape must be given") |
|
|
|
fid.seek(0, 2) |
|
flen = fid.tell() |
|
descr = dtypedescr(dtype) |
|
_dbytes = descr.itemsize |
|
|
|
if shape is None: |
|
bytes = flen - offset |
|
if (bytes % _dbytes): |
|
fid.close() |
|
raise ValueError("Size of available data is not a " |
|
"multiple of the data-type size.") |
|
size = bytes // _dbytes |
|
shape = (size,) |
|
else: |
|
if not isinstance(shape, tuple): |
|
shape = (shape,) |
|
size = 1 |
|
for k in shape: |
|
size *= k |
|
|
|
bytes = long(offset + size*_dbytes) |
|
|
|
if mode == 'w+' or (mode == 'r+' and flen < bytes): |
|
fid.seek(bytes - 1, 0) |
|
fid.write(np.compat.asbytes('\0')) |
|
fid.flush() |
|
|
|
if mode == 'c': |
|
acc = mmap.ACCESS_COPY |
|
elif mode == 'r': |
|
acc = mmap.ACCESS_READ |
|
else: |
|
acc = mmap.ACCESS_WRITE |
|
|
|
start = offset - offset % mmap.ALLOCATIONGRANULARITY |
|
bytes -= start |
|
offset -= start |
|
mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) |
|
|
|
self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, |
|
offset=offset, order=order) |
|
self._mmap = mm |
|
self.offset = offset |
|
self.mode = mode |
|
|
|
if isinstance(filename, basestring): |
|
self.filename = os.path.abspath(filename) |
|
|
|
elif (hasattr(filename, "name") and |
|
isinstance(filename.name, basestring)): |
|
self.filename = os.path.abspath(filename.name) |
|
|
|
else: |
|
self.filename = None |
|
|
|
if own_file: |
|
fid.close() |
|
|
|
return self |
|
|
|
def __array_finalize__(self, obj): |
|
if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): |
|
self._mmap = obj._mmap |
|
self.filename = obj.filename |
|
self.offset = obj.offset |
|
self.mode = obj.mode |
|
else: |
|
self._mmap = None |
|
self.filename = None |
|
self.offset = None |
|
self.mode = None |
|
|
|
def flush(self): |
|
""" |
|
Write any changes in the array to the file on disk. |
|
|
|
For further information, see `memmap`. |
|
|
|
Parameters |
|
---------- |
|
None |
|
|
|
See Also |
|
-------- |
|
memmap |
|
|
|
""" |
|
if self.base is not None and hasattr(self.base, 'flush'): |
|
self.base.flush() |
|
|