|
from __future__ import division, absolute_import, print_function |
|
|
|
import sys |
|
import os |
|
import re |
|
import itertools |
|
import warnings |
|
import weakref |
|
from operator import itemgetter |
|
|
|
import numpy as np |
|
from . import format |
|
from ._datasource import DataSource |
|
from ._compiled_base import packbits, unpackbits |
|
from ._iotools import ( |
|
LineSplitter, NameValidator, StringConverter, ConverterError, |
|
ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields, |
|
flatten_dtype, easy_dtype, _bytes_to_name |
|
) |
|
|
|
from numpy.compat import ( |
|
asbytes, asstr, asbytes_nested, bytes, basestring, unicode |
|
) |
|
|
|
if sys.version_info[0] >= 3: |
|
import pickle |
|
else: |
|
import cPickle as pickle |
|
from future_builtins import map |
|
|
|
loads = pickle.loads |
|
|
|
__all__ = [ |
|
'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', |
|
'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', |
|
'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource' |
|
] |
|
|
|
|
|
def seek_gzip_factory(f): |
|
"""Use this factory to produce the class so that we can do a lazy |
|
import on gzip. |
|
|
|
""" |
|
import gzip |
|
|
|
class GzipFile(gzip.GzipFile): |
|
|
|
def seek(self, offset, whence=0): |
|
|
|
if whence == 1: |
|
offset = self.offset + offset |
|
|
|
if whence not in [0, 1]: |
|
raise IOError("Illegal argument") |
|
|
|
if offset < self.offset: |
|
|
|
self.rewind() |
|
count = offset - self.offset |
|
for i in range(count // 1024): |
|
self.read(1024) |
|
self.read(count % 1024) |
|
|
|
def tell(self): |
|
return self.offset |
|
|
|
if isinstance(f, str): |
|
f = GzipFile(f) |
|
elif isinstance(f, gzip.GzipFile): |
|
|
|
|
|
try: |
|
name = f.name |
|
except AttributeError: |
|
|
|
name = f.filename |
|
mode = f.mode |
|
|
|
f = GzipFile(fileobj=f.fileobj, filename=name) |
|
f.mode = mode |
|
|
|
return f |
|
|
|
|
|
class BagObj(object): |
|
""" |
|
BagObj(obj) |
|
|
|
Convert attribute look-ups to getitems on the object passed in. |
|
|
|
Parameters |
|
---------- |
|
obj : class instance |
|
Object on which attribute look-up is performed. |
|
|
|
Examples |
|
-------- |
|
>>> from numpy.lib.npyio import BagObj as BO |
|
>>> class BagDemo(object): |
|
... def __getitem__(self, key): # An instance of BagObj(BagDemo) |
|
... # will call this method when any |
|
... # attribute look-up is required |
|
... result = "Doesn't matter what you want, " |
|
... return result + "you're gonna get this" |
|
... |
|
>>> demo_obj = BagDemo() |
|
>>> bagobj = BO(demo_obj) |
|
>>> bagobj.hello_there |
|
"Doesn't matter what you want, you're gonna get this" |
|
>>> bagobj.I_can_be_anything |
|
"Doesn't matter what you want, you're gonna get this" |
|
|
|
""" |
|
|
|
def __init__(self, obj): |
|
|
|
self._obj = weakref.proxy(obj) |
|
|
|
def __getattribute__(self, key): |
|
try: |
|
return object.__getattribute__(self, '_obj')[key] |
|
except KeyError: |
|
raise AttributeError(key) |
|
|
|
|
|
def zipfile_factory(*args, **kwargs): |
|
import zipfile |
|
kwargs['allowZip64'] = True |
|
return zipfile.ZipFile(*args, **kwargs) |
|
|
|
|
|
class NpzFile(object): |
|
""" |
|
NpzFile(fid) |
|
|
|
A dictionary-like object with lazy-loading of files in the zipped |
|
archive provided on construction. |
|
|
|
`NpzFile` is used to load files in the NumPy ``.npz`` data archive |
|
format. It assumes that files in the archive have a ``.npy`` extension, |
|
other files are ignored. |
|
|
|
The arrays and file strings are lazily loaded on either |
|
getitem access using ``obj['key']`` or attribute lookup using |
|
``obj.f.key``. A list of all files (without ``.npy`` extensions) can |
|
be obtained with ``obj.files`` and the ZipFile object itself using |
|
``obj.zip``. |
|
|
|
Attributes |
|
---------- |
|
files : list of str |
|
List of all files in the archive with a ``.npy`` extension. |
|
zip : ZipFile instance |
|
The ZipFile object initialized with the zipped archive. |
|
f : BagObj instance |
|
An object on which attribute can be performed as an alternative |
|
to getitem access on the `NpzFile` instance itself. |
|
|
|
Parameters |
|
---------- |
|
fid : file or str |
|
The zipped archive to open. This is either a file-like object |
|
or a string containing the path to the archive. |
|
own_fid : bool, optional |
|
Whether NpzFile should close the file handle. |
|
Requires that `fid` is a file-like object. |
|
|
|
Examples |
|
-------- |
|
>>> from tempfile import TemporaryFile |
|
>>> outfile = TemporaryFile() |
|
>>> x = np.arange(10) |
|
>>> y = np.sin(x) |
|
>>> np.savez(outfile, x=x, y=y) |
|
>>> outfile.seek(0) |
|
|
|
>>> npz = np.load(outfile) |
|
>>> isinstance(npz, np.lib.io.NpzFile) |
|
True |
|
>>> npz.files |
|
['y', 'x'] |
|
>>> npz['x'] # getitem access |
|
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
|
>>> npz.f.x # attribute lookup |
|
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
|
|
|
""" |
|
|
|
def __init__(self, fid, own_fid=False): |
|
|
|
|
|
_zip = zipfile_factory(fid) |
|
self._files = _zip.namelist() |
|
self.files = [] |
|
for x in self._files: |
|
if x.endswith('.npy'): |
|
self.files.append(x[:-4]) |
|
else: |
|
self.files.append(x) |
|
self.zip = _zip |
|
self.f = BagObj(self) |
|
if own_fid: |
|
self.fid = fid |
|
else: |
|
self.fid = None |
|
|
|
def __enter__(self): |
|
return self |
|
|
|
def __exit__(self, exc_type, exc_value, traceback): |
|
self.close() |
|
|
|
def close(self): |
|
""" |
|
Close the file. |
|
|
|
""" |
|
if self.zip is not None: |
|
self.zip.close() |
|
self.zip = None |
|
if self.fid is not None: |
|
self.fid.close() |
|
self.fid = None |
|
self.f = None |
|
|
|
def __del__(self): |
|
self.close() |
|
|
|
def __getitem__(self, key): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
member = 0 |
|
if key in self._files: |
|
member = 1 |
|
elif key in self.files: |
|
member = 1 |
|
key += '.npy' |
|
if member: |
|
bytes = self.zip.open(key) |
|
magic = bytes.read(len(format.MAGIC_PREFIX)) |
|
bytes.close() |
|
if magic == format.MAGIC_PREFIX: |
|
bytes = self.zip.open(key) |
|
return format.read_array(bytes) |
|
else: |
|
return self.zip.read(key) |
|
else: |
|
raise KeyError("%s is not a file in the archive" % key) |
|
|
|
def __iter__(self): |
|
return iter(self.files) |
|
|
|
def items(self): |
|
""" |
|
Return a list of tuples, with each tuple (filename, array in file). |
|
|
|
""" |
|
return [(f, self[f]) for f in self.files] |
|
|
|
def iteritems(self): |
|
"""Generator that returns tuples (filename, array in file).""" |
|
for f in self.files: |
|
yield (f, self[f]) |
|
|
|
def keys(self): |
|
"""Return files in the archive with a ``.npy`` extension.""" |
|
return self.files |
|
|
|
def iterkeys(self): |
|
"""Return an iterator over the files in the archive.""" |
|
return self.__iter__() |
|
|
|
def __contains__(self, key): |
|
return self.files.__contains__(key) |
|
|
|
|
|
def load(file, mmap_mode=None): |
|
""" |
|
Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. |
|
|
|
Parameters |
|
---------- |
|
file : file-like object or string |
|
The file to read. File-like objects must support the |
|
``seek()`` and ``read()`` methods. Pickled files require that the |
|
file-like object support the ``readline()`` method as well. |
|
mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional |
|
If not None, then memory-map the file, using the given mode (see |
|
`numpy.memmap` for a detailed description of the modes). A |
|
memory-mapped array is kept on disk. However, it can be accessed |
|
and sliced like any ndarray. Memory mapping is especially useful |
|
for accessing small fragments of large files without reading the |
|
entire file into memory. |
|
|
|
Returns |
|
------- |
|
result : array, tuple, dict, etc. |
|
Data stored in the file. For ``.npz`` files, the returned instance |
|
of NpzFile class must be closed to avoid leaking file descriptors. |
|
|
|
Raises |
|
------ |
|
IOError |
|
If the input file does not exist or cannot be read. |
|
|
|
See Also |
|
-------- |
|
save, savez, savez_compressed, loadtxt |
|
memmap : Create a memory-map to an array stored in a file on disk. |
|
|
|
Notes |
|
----- |
|
- If the file contains pickle data, then whatever object is stored |
|
in the pickle is returned. |
|
- If the file is a ``.npy`` file, then a single array is returned. |
|
- If the file is a ``.npz`` file, then a dictionary-like object is |
|
returned, containing ``{filename: array}`` key-value pairs, one for |
|
each file in the archive. |
|
- If the file is a ``.npz`` file, the returned value supports the |
|
context manager protocol in a similar fashion to the open function:: |
|
|
|
with load('foo.npz') as data: |
|
a = data['a'] |
|
|
|
The underlying file descriptor is closed when exiting the 'with' |
|
block. |
|
|
|
Examples |
|
-------- |
|
Store data to disk, and load it again: |
|
|
|
>>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) |
|
>>> np.load('/tmp/123.npy') |
|
array([[1, 2, 3], |
|
[4, 5, 6]]) |
|
|
|
Store compressed data to disk, and load it again: |
|
|
|
>>> a=np.array([[1, 2, 3], [4, 5, 6]]) |
|
>>> b=np.array([1, 2]) |
|
>>> np.savez('/tmp/123.npz', a=a, b=b) |
|
>>> data = np.load('/tmp/123.npz') |
|
>>> data['a'] |
|
array([[1, 2, 3], |
|
[4, 5, 6]]) |
|
>>> data['b'] |
|
array([1, 2]) |
|
>>> data.close() |
|
|
|
Mem-map the stored array, and then access the second row |
|
directly from disk: |
|
|
|
>>> X = np.load('/tmp/123.npy', mmap_mode='r') |
|
>>> X[1, :] |
|
memmap([4, 5, 6]) |
|
|
|
""" |
|
import gzip |
|
|
|
own_fid = False |
|
if isinstance(file, basestring): |
|
fid = open(file, "rb") |
|
own_fid = True |
|
elif isinstance(file, gzip.GzipFile): |
|
fid = seek_gzip_factory(file) |
|
else: |
|
fid = file |
|
|
|
try: |
|
|
|
_ZIP_PREFIX = asbytes('PK\x03\x04') |
|
N = len(format.MAGIC_PREFIX) |
|
magic = fid.read(N) |
|
fid.seek(-N, 1) |
|
if magic.startswith(_ZIP_PREFIX): |
|
|
|
|
|
tmp = own_fid |
|
own_fid = False |
|
return NpzFile(fid, own_fid=tmp) |
|
elif magic == format.MAGIC_PREFIX: |
|
|
|
if mmap_mode: |
|
return format.open_memmap(file, mode=mmap_mode) |
|
else: |
|
return format.read_array(fid) |
|
else: |
|
|
|
try: |
|
return pickle.load(fid) |
|
except: |
|
raise IOError( |
|
"Failed to interpret file %s as a pickle" % repr(file)) |
|
finally: |
|
if own_fid: |
|
fid.close() |
|
|
|
|
|
def save(file, arr): |
|
""" |
|
Save an array to a binary file in NumPy ``.npy`` format. |
|
|
|
Parameters |
|
---------- |
|
file : file or str |
|
File or filename to which the data is saved. If file is a file-object, |
|
then the filename is unchanged. If file is a string, a ``.npy`` |
|
extension will be appended to the file name if it does not already |
|
have one. |
|
arr : array_like |
|
Array data to be saved. |
|
|
|
See Also |
|
-------- |
|
savez : Save several arrays into a ``.npz`` archive |
|
savetxt, load |
|
|
|
Notes |
|
----- |
|
For a description of the ``.npy`` format, see `format`. |
|
|
|
Examples |
|
-------- |
|
>>> from tempfile import TemporaryFile |
|
>>> outfile = TemporaryFile() |
|
|
|
>>> x = np.arange(10) |
|
>>> np.save(outfile, x) |
|
|
|
>>> outfile.seek(0) # Only needed here to simulate closing & reopening file |
|
>>> np.load(outfile) |
|
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
|
|
|
""" |
|
own_fid = False |
|
if isinstance(file, basestring): |
|
if not file.endswith('.npy'): |
|
file = file + '.npy' |
|
fid = open(file, "wb") |
|
own_fid = True |
|
else: |
|
fid = file |
|
|
|
try: |
|
arr = np.asanyarray(arr) |
|
format.write_array(fid, arr) |
|
finally: |
|
if own_fid: |
|
fid.close() |
|
|
|
|
|
def savez(file, *args, **kwds): |
|
""" |
|
Save several arrays into a single file in uncompressed ``.npz`` format. |
|
|
|
If arguments are passed in with no keywords, the corresponding variable |
|
names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword |
|
arguments are given, the corresponding variable names, in the ``.npz`` |
|
file will match the keyword names. |
|
|
|
Parameters |
|
---------- |
|
file : str or file |
|
Either the file name (string) or an open file (file-like object) |
|
where the data will be saved. If file is a string, the ``.npz`` |
|
extension will be appended to the file name if it is not already there. |
|
args : Arguments, optional |
|
Arrays to save to the file. Since it is not possible for Python to |
|
know the names of the arrays outside `savez`, the arrays will be saved |
|
with names "arr_0", "arr_1", and so on. These arguments can be any |
|
expression. |
|
kwds : Keyword arguments, optional |
|
Arrays to save to the file. Arrays will be saved in the file with the |
|
keyword names. |
|
|
|
Returns |
|
------- |
|
None |
|
|
|
See Also |
|
-------- |
|
save : Save a single array to a binary file in NumPy format. |
|
savetxt : Save an array to a file as plain text. |
|
savez_compressed : Save several arrays into a compressed ``.npz`` archive |
|
|
|
Notes |
|
----- |
|
The ``.npz`` file format is a zipped archive of files named after the |
|
variables they contain. The archive is not compressed and each file |
|
in the archive contains one variable in ``.npy`` format. For a |
|
description of the ``.npy`` format, see `format`. |
|
|
|
When opening the saved ``.npz`` file with `load` a `NpzFile` object is |
|
returned. This is a dictionary-like object which can be queried for |
|
its list of arrays (with the ``.files`` attribute), and for the arrays |
|
themselves. |
|
|
|
Examples |
|
-------- |
|
>>> from tempfile import TemporaryFile |
|
>>> outfile = TemporaryFile() |
|
>>> x = np.arange(10) |
|
>>> y = np.sin(x) |
|
|
|
Using `savez` with \\*args, the arrays are saved with default names. |
|
|
|
>>> np.savez(outfile, x, y) |
|
>>> outfile.seek(0) # Only needed here to simulate closing & reopening file |
|
>>> npzfile = np.load(outfile) |
|
>>> npzfile.files |
|
['arr_1', 'arr_0'] |
|
>>> npzfile['arr_0'] |
|
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
|
|
|
Using `savez` with \\**kwds, the arrays are saved with the keyword names. |
|
|
|
>>> outfile = TemporaryFile() |
|
>>> np.savez(outfile, x=x, y=y) |
|
>>> outfile.seek(0) |
|
>>> npzfile = np.load(outfile) |
|
>>> npzfile.files |
|
['y', 'x'] |
|
>>> npzfile['x'] |
|
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
|
|
|
""" |
|
_savez(file, args, kwds, False) |
|
|
|
|
|
def savez_compressed(file, *args, **kwds): |
|
""" |
|
Save several arrays into a single file in compressed ``.npz`` format. |
|
|
|
If keyword arguments are given, then filenames are taken from the keywords. |
|
If arguments are passed in with no keywords, then stored file names are |
|
arr_0, arr_1, etc. |
|
|
|
Parameters |
|
---------- |
|
file : str |
|
File name of ``.npz`` file. |
|
args : Arguments |
|
Function arguments. |
|
kwds : Keyword arguments |
|
Keywords. |
|
|
|
See Also |
|
-------- |
|
numpy.savez : Save several arrays into an uncompressed ``.npz`` file format |
|
numpy.load : Load the files created by savez_compressed. |
|
|
|
""" |
|
_savez(file, args, kwds, True) |
|
|
|
|
|
def _savez(file, args, kwds, compress): |
|
|
|
|
|
import zipfile |
|
|
|
import tempfile |
|
|
|
if isinstance(file, basestring): |
|
if not file.endswith('.npz'): |
|
file = file + '.npz' |
|
|
|
namedict = kwds |
|
for i, val in enumerate(args): |
|
key = 'arr_%d' % i |
|
if key in namedict.keys(): |
|
raise ValueError( |
|
"Cannot use un-named variables and keyword %s" % key) |
|
namedict[key] = val |
|
|
|
if compress: |
|
compression = zipfile.ZIP_DEFLATED |
|
else: |
|
compression = zipfile.ZIP_STORED |
|
|
|
zipf = zipfile_factory(file, mode="w", compression=compression) |
|
|
|
|
|
fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy') |
|
os.close(fd) |
|
try: |
|
for key, val in namedict.items(): |
|
fname = key + '.npy' |
|
fid = open(tmpfile, 'wb') |
|
try: |
|
format.write_array(fid, np.asanyarray(val)) |
|
fid.close() |
|
fid = None |
|
zipf.write(tmpfile, arcname=fname) |
|
finally: |
|
if fid: |
|
fid.close() |
|
finally: |
|
os.remove(tmpfile) |
|
|
|
zipf.close() |
|
|
|
|
|
def _getconv(dtype): |
|
""" Find the correct dtype converter. Adapted from matplotlib """ |
|
typ = dtype.type |
|
if issubclass(typ, np.bool_): |
|
return lambda x: bool(int(x)) |
|
if issubclass(typ, np.uint64): |
|
return np.uint64 |
|
if issubclass(typ, np.int64): |
|
return np.int64 |
|
if issubclass(typ, np.integer): |
|
return lambda x: int(float(x)) |
|
elif issubclass(typ, np.floating): |
|
return float |
|
elif issubclass(typ, np.complex): |
|
return complex |
|
elif issubclass(typ, np.bytes_): |
|
return bytes |
|
else: |
|
return str |
|
|
|
|
|
def loadtxt(fname, dtype=float, comments='#', delimiter=None, |
|
converters=None, skiprows=0, usecols=None, unpack=False, |
|
ndmin=0): |
|
""" |
|
Load data from a text file. |
|
|
|
Each row in the text file must have the same number of values. |
|
|
|
Parameters |
|
---------- |
|
fname : file or str |
|
File, filename, or generator to read. If the filename extension is |
|
``.gz`` or ``.bz2``, the file is first decompressed. Note that |
|
generators should return byte strings for Python 3k. |
|
dtype : data-type, optional |
|
Data-type of the resulting array; default: float. If this is a |
|
record data-type, the resulting array will be 1-dimensional, and |
|
each row will be interpreted as an element of the array. In this |
|
case, the number of columns used must match the number of fields in |
|
the data-type. |
|
comments : str, optional |
|
The character used to indicate the start of a comment; |
|
default: '#'. |
|
delimiter : str, optional |
|
The string used to separate values. By default, this is any |
|
whitespace. |
|
converters : dict, optional |
|
A dictionary mapping column number to a function that will convert |
|
that column to a float. E.g., if column 0 is a date string: |
|
``converters = {0: datestr2num}``. Converters can also be used to |
|
provide a default value for missing data (but see also `genfromtxt`): |
|
``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. |
|
skiprows : int, optional |
|
Skip the first `skiprows` lines; default: 0. |
|
usecols : sequence, optional |
|
Which columns to read, with 0 being the first. For example, |
|
``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. |
|
The default, None, results in all columns being read. |
|
unpack : bool, optional |
|
If True, the returned array is transposed, so that arguments may be |
|
unpacked using ``x, y, z = loadtxt(...)``. When used with a record |
|
data-type, arrays are returned for each field. Default is False. |
|
ndmin : int, optional |
|
The returned array will have at least `ndmin` dimensions. |
|
Otherwise mono-dimensional axes will be squeezed. |
|
Legal values: 0 (default), 1 or 2. |
|
|
|
.. versionadded:: 1.6.0 |
|
|
|
Returns |
|
------- |
|
out : ndarray |
|
Data read from the text file. |
|
|
|
See Also |
|
-------- |
|
load, fromstring, fromregex |
|
genfromtxt : Load data with missing values handled as specified. |
|
scipy.io.loadmat : reads MATLAB data files |
|
|
|
Notes |
|
----- |
|
This function aims to be a fast reader for simply formatted files. The |
|
`genfromtxt` function provides more sophisticated handling of, e.g., |
|
lines with missing values. |
|
|
|
Examples |
|
-------- |
|
>>> from StringIO import StringIO # StringIO behaves like a file object |
|
>>> c = StringIO("0 1\\n2 3") |
|
>>> np.loadtxt(c) |
|
array([[ 0., 1.], |
|
[ 2., 3.]]) |
|
|
|
>>> d = StringIO("M 21 72\\nF 35 58") |
|
>>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), |
|
... 'formats': ('S1', 'i4', 'f4')}) |
|
array([('M', 21, 72.0), ('F', 35, 58.0)], |
|
dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')]) |
|
|
|
>>> c = StringIO("1,0,2\\n3,0,4") |
|
>>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) |
|
>>> x |
|
array([ 1., 3.]) |
|
>>> y |
|
array([ 2., 4.]) |
|
|
|
""" |
|
|
|
comments = asbytes(comments) |
|
user_converters = converters |
|
if delimiter is not None: |
|
delimiter = asbytes(delimiter) |
|
if usecols is not None: |
|
usecols = list(usecols) |
|
|
|
fown = False |
|
try: |
|
if _is_string_like(fname): |
|
fown = True |
|
if fname.endswith('.gz'): |
|
fh = iter(seek_gzip_factory(fname)) |
|
elif fname.endswith('.bz2'): |
|
import bz2 |
|
fh = iter(bz2.BZ2File(fname)) |
|
elif sys.version_info[0] == 2: |
|
fh = iter(open(fname, 'U')) |
|
else: |
|
fh = iter(open(fname)) |
|
else: |
|
fh = iter(fname) |
|
except TypeError: |
|
raise ValueError('fname must be a string, file handle, or generator') |
|
X = [] |
|
|
|
def flatten_dtype(dt): |
|
"""Unpack a structured data-type, and produce re-packing info.""" |
|
if dt.names is None: |
|
|
|
|
|
|
|
shape = dt.shape |
|
if len(shape) == 0: |
|
return ([dt.base], None) |
|
else: |
|
packing = [(shape[-1], list)] |
|
if len(shape) > 1: |
|
for dim in dt.shape[-2::-1]: |
|
packing = [(dim*packing[0][0], packing*dim)] |
|
return ([dt.base] * int(np.prod(dt.shape)), packing) |
|
else: |
|
types = [] |
|
packing = [] |
|
for field in dt.names: |
|
tp, bytes = dt.fields[field] |
|
flat_dt, flat_packing = flatten_dtype(tp) |
|
types.extend(flat_dt) |
|
|
|
if len(tp.shape) > 0: |
|
packing.extend(flat_packing) |
|
else: |
|
packing.append((len(flat_dt), flat_packing)) |
|
return (types, packing) |
|
|
|
def pack_items(items, packing): |
|
"""Pack items into nested lists based on re-packing info.""" |
|
if packing is None: |
|
return items[0] |
|
elif packing is tuple: |
|
return tuple(items) |
|
elif packing is list: |
|
return list(items) |
|
else: |
|
start = 0 |
|
ret = [] |
|
for length, subpacking in packing: |
|
ret.append(pack_items(items[start:start+length], subpacking)) |
|
start += length |
|
return tuple(ret) |
|
|
|
def split_line(line): |
|
"""Chop off comments, strip, and split at delimiter.""" |
|
line = asbytes(line).split(comments)[0].strip(asbytes('\r\n')) |
|
if line: |
|
return line.split(delimiter) |
|
else: |
|
return [] |
|
|
|
try: |
|
|
|
dtype = np.dtype(dtype) |
|
defconv = _getconv(dtype) |
|
|
|
|
|
for i in range(skiprows): |
|
next(fh) |
|
|
|
|
|
|
|
first_vals = None |
|
try: |
|
while not first_vals: |
|
first_line = next(fh) |
|
first_vals = split_line(first_line) |
|
except StopIteration: |
|
|
|
first_line = '' |
|
first_vals = [] |
|
warnings.warn('loadtxt: Empty input file: "%s"' % fname) |
|
N = len(usecols or first_vals) |
|
|
|
dtype_types, packing = flatten_dtype(dtype) |
|
if len(dtype_types) > 1: |
|
|
|
|
|
converters = [_getconv(dt) for dt in dtype_types] |
|
else: |
|
|
|
converters = [defconv for i in range(N)] |
|
if N > 1: |
|
packing = [(N, tuple)] |
|
|
|
|
|
for i, conv in (user_converters or {}).items(): |
|
if usecols: |
|
try: |
|
i = usecols.index(i) |
|
except ValueError: |
|
|
|
continue |
|
converters[i] = conv |
|
|
|
|
|
for i, line in enumerate(itertools.chain([first_line], fh)): |
|
vals = split_line(line) |
|
if len(vals) == 0: |
|
continue |
|
if usecols: |
|
vals = [vals[i] for i in usecols] |
|
if len(vals) != N: |
|
line_num = i + skiprows + 1 |
|
raise ValueError("Wrong number of columns at line %d" |
|
% line_num) |
|
|
|
|
|
items = [conv(val) for (conv, val) in zip(converters, vals)] |
|
|
|
items = pack_items(items, packing) |
|
X.append(items) |
|
finally: |
|
if fown: |
|
fh.close() |
|
|
|
X = np.array(X, dtype) |
|
|
|
|
|
if X.ndim == 3 and X.shape[:2] == (1, 1): |
|
X.shape = (1, -1) |
|
|
|
|
|
|
|
if ndmin not in [0, 1, 2]: |
|
raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) |
|
|
|
if X.ndim > ndmin: |
|
X = np.squeeze(X) |
|
|
|
|
|
if X.ndim < ndmin: |
|
if ndmin == 1: |
|
X = np.atleast_1d(X) |
|
elif ndmin == 2: |
|
X = np.atleast_2d(X).T |
|
|
|
if unpack: |
|
if len(dtype_types) > 1: |
|
|
|
return [X[field] for field in dtype.names] |
|
else: |
|
return X.T |
|
else: |
|
return X |
|
|
|
|
|
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', |
|
footer='', comments='# '): |
|
""" |
|
Save an array to a text file. |
|
|
|
Parameters |
|
---------- |
|
fname : filename or file handle |
|
If the filename ends in ``.gz``, the file is automatically saved in |
|
compressed gzip format. `loadtxt` understands gzipped files |
|
transparently. |
|
X : array_like |
|
Data to be saved to a text file. |
|
fmt : str or sequence of strs, optional |
|
A single format (%10.5f), a sequence of formats, or a |
|
multi-format string, e.g. 'Iteration %d -- %10.5f', in which |
|
case `delimiter` is ignored. For complex `X`, the legal options |
|
for `fmt` are: |
|
a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted |
|
like `' (%s+%sj)' % (fmt, fmt)` |
|
b) a full string specifying every real and imaginary part, e.g. |
|
`' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns |
|
c) a list of specifiers, one per column - in this case, the real |
|
and imaginary part must have separate specifiers, |
|
e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns |
|
delimiter : str, optional |
|
String or character separating columns. |
|
newline : str, optional |
|
String or character separating lines. |
|
|
|
.. versionadded:: 1.5.0 |
|
header : str, optional |
|
String that will be written at the beginning of the file. |
|
|
|
.. versionadded:: 1.7.0 |
|
footer : str, optional |
|
String that will be written at the end of the file. |
|
|
|
.. versionadded:: 1.7.0 |
|
comments : str, optional |
|
String that will be prepended to the ``header`` and ``footer`` strings, |
|
to mark them as comments. Default: '# ', as expected by e.g. |
|
``numpy.loadtxt``. |
|
|
|
.. versionadded:: 1.7.0 |
|
|
|
|
|
See Also |
|
-------- |
|
save : Save an array to a binary file in NumPy ``.npy`` format |
|
savez : Save several arrays into an uncompressed ``.npz`` archive |
|
savez_compressed : Save several arrays into a compressed ``.npz`` archive |
|
|
|
Notes |
|
----- |
|
Further explanation of the `fmt` parameter |
|
(``%[flag]width[.precision]specifier``): |
|
|
|
flags: |
|
``-`` : left justify |
|
|
|
``+`` : Forces to precede result with + or -. |
|
|
|
``0`` : Left pad the number with zeros instead of space (see width). |
|
|
|
width: |
|
Minimum number of characters to be printed. The value is not truncated |
|
if it has more characters. |
|
|
|
precision: |
|
- For integer specifiers (eg. ``d,i,o,x``), the minimum number of |
|
digits. |
|
- For ``e, E`` and ``f`` specifiers, the number of digits to print |
|
after the decimal point. |
|
- For ``g`` and ``G``, the maximum number of significant digits. |
|
- For ``s``, the maximum number of characters. |
|
|
|
specifiers: |
|
``c`` : character |
|
|
|
``d`` or ``i`` : signed decimal integer |
|
|
|
``e`` or ``E`` : scientific notation with ``e`` or ``E``. |
|
|
|
``f`` : decimal floating point |
|
|
|
``g,G`` : use the shorter of ``e,E`` or ``f`` |
|
|
|
``o`` : signed octal |
|
|
|
``s`` : string of characters |
|
|
|
``u`` : unsigned decimal integer |
|
|
|
``x,X`` : unsigned hexadecimal integer |
|
|
|
This explanation of ``fmt`` is not complete, for an exhaustive |
|
specification see [1]_. |
|
|
|
References |
|
---------- |
|
.. [1] `Format Specification Mini-Language |
|
<http://docs.python.org/library/string.html# |
|
format-specification-mini-language>`_, Python Documentation. |
|
|
|
Examples |
|
-------- |
|
>>> x = y = z = np.arange(0.0,5.0,1.0) |
|
>>> np.savetxt('test.out', x, delimiter=',') # X is an array |
|
>>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays |
|
>>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation |
|
|
|
""" |
|
|
|
|
|
if isinstance(fmt, bytes): |
|
fmt = asstr(fmt) |
|
delimiter = asstr(delimiter) |
|
|
|
own_fh = False |
|
if _is_string_like(fname): |
|
own_fh = True |
|
if fname.endswith('.gz'): |
|
import gzip |
|
fh = gzip.open(fname, 'wb') |
|
else: |
|
if sys.version_info[0] >= 3: |
|
fh = open(fname, 'wb') |
|
else: |
|
fh = open(fname, 'w') |
|
elif hasattr(fname, 'write'): |
|
fh = fname |
|
else: |
|
raise ValueError('fname must be a string or file handle') |
|
|
|
try: |
|
X = np.asarray(X) |
|
|
|
|
|
if X.ndim == 1: |
|
|
|
if X.dtype.names is None: |
|
X = np.atleast_2d(X).T |
|
ncol = 1 |
|
|
|
|
|
else: |
|
ncol = len(X.dtype.descr) |
|
else: |
|
ncol = X.shape[1] |
|
|
|
iscomplex_X = np.iscomplexobj(X) |
|
|
|
|
|
if type(fmt) in (list, tuple): |
|
if len(fmt) != ncol: |
|
raise AttributeError('fmt has wrong shape. %s' % str(fmt)) |
|
format = asstr(delimiter).join(map(asstr, fmt)) |
|
elif isinstance(fmt, str): |
|
n_fmt_chars = fmt.count('%') |
|
error = ValueError('fmt has wrong number of %% formats: %s' % fmt) |
|
if n_fmt_chars == 1: |
|
if iscomplex_X: |
|
fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol |
|
else: |
|
fmt = [fmt, ] * ncol |
|
format = delimiter.join(fmt) |
|
elif iscomplex_X and n_fmt_chars != (2 * ncol): |
|
raise error |
|
elif ((not iscomplex_X) and n_fmt_chars != ncol): |
|
raise error |
|
else: |
|
format = fmt |
|
else: |
|
raise ValueError('invalid fmt: %r' % (fmt,)) |
|
|
|
if len(header) > 0: |
|
header = header.replace('\n', '\n' + comments) |
|
fh.write(asbytes(comments + header + newline)) |
|
if iscomplex_X: |
|
for row in X: |
|
row2 = [] |
|
for number in row: |
|
row2.append(number.real) |
|
row2.append(number.imag) |
|
fh.write(asbytes(format % tuple(row2) + newline)) |
|
else: |
|
for row in X: |
|
fh.write(asbytes(format % tuple(row) + newline)) |
|
if len(footer) > 0: |
|
footer = footer.replace('\n', '\n' + comments) |
|
fh.write(asbytes(comments + footer + newline)) |
|
finally: |
|
if own_fh: |
|
fh.close() |
|
|
|
|
|
def fromregex(file, regexp, dtype): |
|
""" |
|
Construct an array from a text file, using regular expression parsing. |
|
|
|
The returned array is always a structured array, and is constructed from |
|
all matches of the regular expression in the file. Groups in the regular |
|
expression are converted to fields of the structured array. |
|
|
|
Parameters |
|
---------- |
|
file : str or file |
|
File name or file object to read. |
|
regexp : str or regexp |
|
Regular expression used to parse the file. |
|
Groups in the regular expression correspond to fields in the dtype. |
|
dtype : dtype or list of dtypes |
|
Dtype for the structured array. |
|
|
|
Returns |
|
------- |
|
output : ndarray |
|
The output array, containing the part of the content of `file` that |
|
was matched by `regexp`. `output` is always a structured array. |
|
|
|
Raises |
|
------ |
|
TypeError |
|
When `dtype` is not a valid dtype for a structured array. |
|
|
|
See Also |
|
-------- |
|
fromstring, loadtxt |
|
|
|
Notes |
|
----- |
|
Dtypes for structured arrays can be specified in several forms, but all |
|
forms specify at least the data type and field name. For details see |
|
`doc.structured_arrays`. |
|
|
|
Examples |
|
-------- |
|
>>> f = open('test.dat', 'w') |
|
>>> f.write("1312 foo\\n1534 bar\\n444 qux") |
|
>>> f.close() |
|
|
|
>>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] |
|
>>> output = np.fromregex('test.dat', regexp, |
|
... [('num', np.int64), ('key', 'S3')]) |
|
>>> output |
|
array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], |
|
dtype=[('num', '<i8'), ('key', '|S3')]) |
|
>>> output['num'] |
|
array([1312, 1534, 444], dtype=int64) |
|
|
|
""" |
|
own_fh = False |
|
if not hasattr(file, "read"): |
|
file = open(file, 'rb') |
|
own_fh = True |
|
|
|
try: |
|
if not hasattr(regexp, 'match'): |
|
regexp = re.compile(asbytes(regexp)) |
|
if not isinstance(dtype, np.dtype): |
|
dtype = np.dtype(dtype) |
|
|
|
seq = regexp.findall(file.read()) |
|
if seq and not isinstance(seq[0], tuple): |
|
|
|
|
|
|
|
newdtype = np.dtype(dtype[dtype.names[0]]) |
|
output = np.array(seq, dtype=newdtype) |
|
output.dtype = dtype |
|
else: |
|
output = np.array(seq, dtype=dtype) |
|
|
|
return output |
|
finally: |
|
if own_fh: |
|
file.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def genfromtxt(fname, dtype=float, comments='#', delimiter=None, |
|
skiprows=0, skip_header=0, skip_footer=0, converters=None, |
|
missing='', missing_values=None, filling_values=None, |
|
usecols=None, names=None, |
|
excludelist=None, deletechars=None, replace_space='_', |
|
autostrip=False, case_sensitive=True, defaultfmt="f%i", |
|
unpack=None, usemask=False, loose=True, invalid_raise=True): |
|
""" |
|
Load data from a text file, with missing values handled as specified. |
|
|
|
Each line past the first `skip_header` lines is split at the `delimiter` |
|
character, and characters following the `comments` character are discarded. |
|
|
|
Parameters |
|
---------- |
|
fname : file or str |
|
File, filename, or generator to read. If the filename extension is |
|
`.gz` or `.bz2`, the file is first decompressed. Note that |
|
generators must return byte strings in Python 3k. |
|
dtype : dtype, optional |
|
Data type of the resulting array. |
|
If None, the dtypes will be determined by the contents of each |
|
column, individually. |
|
comments : str, optional |
|
The character used to indicate the start of a comment. |
|
All the characters occurring on a line after a comment are discarded |
|
delimiter : str, int, or sequence, optional |
|
The string used to separate values. By default, any consecutive |
|
whitespaces act as delimiter. An integer or sequence of integers |
|
can also be provided as width(s) of each field. |
|
skip_rows : int, optional |
|
`skip_rows` was deprecated in numpy 1.5, and will be removed in |
|
numpy 2.0. Please use `skip_header` instead. |
|
skip_header : int, optional |
|
The number of lines to skip at the beginning of the file. |
|
skip_footer : int, optional |
|
The number of lines to skip at the end of the file. |
|
converters : variable, optional |
|
The set of functions that convert the data of a column to a value. |
|
The converters can also be used to provide a default value |
|
for missing data: ``converters = {3: lambda s: float(s or 0)}``. |
|
missing : variable, optional |
|
`missing` was deprecated in numpy 1.5, and will be removed in |
|
numpy 2.0. Please use `missing_values` instead. |
|
missing_values : variable, optional |
|
The set of strings corresponding to missing data. |
|
filling_values : variable, optional |
|
The set of values to be used as default when the data are missing. |
|
usecols : sequence, optional |
|
Which columns to read, with 0 being the first. For example, |
|
``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. |
|
names : {None, True, str, sequence}, optional |
|
If `names` is True, the field names are read from the first valid line |
|
after the first `skip_header` lines. |
|
If `names` is a sequence or a single-string of comma-separated names, |
|
the names will be used to define the field names in a structured dtype. |
|
If `names` is None, the names of the dtype fields will be used, if any. |
|
excludelist : sequence, optional |
|
A list of names to exclude. This list is appended to the default list |
|
['return','file','print']. Excluded names are appended an underscore: |
|
for example, `file` would become `file_`. |
|
deletechars : str, optional |
|
A string combining invalid characters that must be deleted from the |
|
names. |
|
defaultfmt : str, optional |
|
A format used to define default field names, such as "f%i" or "f_%02i". |
|
autostrip : bool, optional |
|
Whether to automatically strip white spaces from the variables. |
|
replace_space : char, optional |
|
Character(s) used in replacement of white spaces in the variables |
|
names. By default, use a '_'. |
|
case_sensitive : {True, False, 'upper', 'lower'}, optional |
|
If True, field names are case sensitive. |
|
If False or 'upper', field names are converted to upper case. |
|
If 'lower', field names are converted to lower case. |
|
unpack : bool, optional |
|
If True, the returned array is transposed, so that arguments may be |
|
unpacked using ``x, y, z = loadtxt(...)`` |
|
usemask : bool, optional |
|
If True, return a masked array. |
|
If False, return a regular array. |
|
loose : bool, optional |
|
If True, do not raise errors for invalid values. |
|
invalid_raise : bool, optional |
|
If True, an exception is raised if an inconsistency is detected in the |
|
number of columns. |
|
If False, a warning is emitted and the offending lines are skipped. |
|
|
|
Returns |
|
------- |
|
out : ndarray |
|
Data read from the text file. If `usemask` is True, this is a |
|
masked array. |
|
|
|
See Also |
|
-------- |
|
numpy.loadtxt : equivalent function when no data is missing. |
|
|
|
Notes |
|
----- |
|
* When spaces are used as delimiters, or when no delimiter has been given |
|
as input, there should not be any missing data between two fields. |
|
* When the variables are named (either by a flexible dtype or with `names`, |
|
there must not be any header in the file (else a ValueError |
|
exception is raised). |
|
* Individual values are not stripped of spaces by default. |
|
When using a custom converter, make sure the function does remove spaces. |
|
|
|
References |
|
---------- |
|
.. [1] Numpy User Guide, section `I/O with Numpy |
|
<http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_. |
|
|
|
Examples |
|
--------- |
|
>>> from StringIO import StringIO |
|
>>> import numpy as np |
|
|
|
Comma delimited file with mixed dtype |
|
|
|
>>> s = StringIO("1,1.3,abcde") |
|
>>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), |
|
... ('mystring','S5')], delimiter=",") |
|
>>> data |
|
array((1, 1.3, 'abcde'), |
|
dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) |
|
|
|
Using dtype = None |
|
|
|
>>> s.seek(0) # needed for StringIO example only |
|
>>> data = np.genfromtxt(s, dtype=None, |
|
... names = ['myint','myfloat','mystring'], delimiter=",") |
|
>>> data |
|
array((1, 1.3, 'abcde'), |
|
dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) |
|
|
|
Specifying dtype and names |
|
|
|
>>> s.seek(0) |
|
>>> data = np.genfromtxt(s, dtype="i8,f8,S5", |
|
... names=['myint','myfloat','mystring'], delimiter=",") |
|
>>> data |
|
array((1, 1.3, 'abcde'), |
|
dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) |
|
|
|
An example with fixed-width columns |
|
|
|
>>> s = StringIO("11.3abcde") |
|
>>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], |
|
... delimiter=[1,3,5]) |
|
>>> data |
|
array((1, 1.3, 'abcde'), |
|
dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) |
|
|
|
""" |
|
|
|
if comments is not None: |
|
comments = asbytes(comments) |
|
if isinstance(delimiter, unicode): |
|
delimiter = asbytes(delimiter) |
|
if isinstance(missing, unicode): |
|
missing = asbytes(missing) |
|
if isinstance(missing_values, (unicode, list, tuple)): |
|
missing_values = asbytes_nested(missing_values) |
|
|
|
|
|
if usemask: |
|
from numpy.ma import MaskedArray, make_mask_descr |
|
|
|
user_converters = converters or {} |
|
if not isinstance(user_converters, dict): |
|
raise TypeError( |
|
"The input argument 'converter' should be a valid dictionary " |
|
"(got '%s' instead)" % type(user_converters)) |
|
|
|
|
|
own_fhd = False |
|
try: |
|
if isinstance(fname, basestring): |
|
if sys.version_info[0] == 2: |
|
fhd = iter(np.lib._datasource.open(fname, 'rbU')) |
|
else: |
|
fhd = iter(np.lib._datasource.open(fname, 'rb')) |
|
own_fhd = True |
|
else: |
|
fhd = iter(fname) |
|
except TypeError: |
|
raise TypeError( |
|
"fname must be a string, filehandle, or generator. " |
|
"(got %s instead)" % type(fname)) |
|
|
|
split_line = LineSplitter(delimiter=delimiter, comments=comments, |
|
autostrip=autostrip)._handyman |
|
validate_names = NameValidator(excludelist=excludelist, |
|
deletechars=deletechars, |
|
case_sensitive=case_sensitive, |
|
replace_space=replace_space) |
|
|
|
|
|
if skiprows: |
|
warnings.warn( |
|
"The use of `skiprows` is deprecated, it will be removed in " |
|
"numpy 2.0.\nPlease use `skip_header` instead.", |
|
DeprecationWarning) |
|
skip_header = skiprows |
|
|
|
for i in range(skip_header): |
|
next(fhd) |
|
|
|
|
|
first_values = None |
|
try: |
|
while not first_values: |
|
first_line = next(fhd) |
|
if names is True: |
|
if comments in first_line: |
|
first_line = ( |
|
asbytes('').join(first_line.split(comments)[1:])) |
|
first_values = split_line(first_line) |
|
except StopIteration: |
|
|
|
first_line = asbytes('') |
|
first_values = [] |
|
warnings.warn('genfromtxt: Empty input file: "%s"' % fname) |
|
|
|
|
|
if names is True: |
|
fval = first_values[0].strip() |
|
if fval in comments: |
|
del first_values[0] |
|
|
|
|
|
if usecols is not None: |
|
try: |
|
usecols = [_.strip() for _ in usecols.split(",")] |
|
except AttributeError: |
|
try: |
|
usecols = list(usecols) |
|
except TypeError: |
|
usecols = [usecols, ] |
|
nbcols = len(usecols or first_values) |
|
|
|
|
|
if names is True: |
|
names = validate_names([_bytes_to_name(_.strip()) |
|
for _ in first_values]) |
|
first_line = asbytes('') |
|
elif _is_string_like(names): |
|
names = validate_names([_.strip() for _ in names.split(',')]) |
|
elif names: |
|
names = validate_names(names) |
|
|
|
if dtype is not None: |
|
dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names) |
|
|
|
if names is not None: |
|
names = list(names) |
|
|
|
if usecols: |
|
for (i, current) in enumerate(usecols): |
|
|
|
if _is_string_like(current): |
|
usecols[i] = names.index(current) |
|
elif current < 0: |
|
usecols[i] = current + len(first_values) |
|
|
|
if (dtype is not None) and (len(dtype) > nbcols): |
|
descr = dtype.descr |
|
dtype = np.dtype([descr[_] for _ in usecols]) |
|
names = list(dtype.names) |
|
|
|
elif (names is not None) and (len(names) > nbcols): |
|
names = [names[_] for _ in usecols] |
|
elif (names is not None) and (dtype is not None): |
|
names = list(dtype.names) |
|
|
|
|
|
|
|
user_missing_values = missing_values or () |
|
|
|
|
|
missing_values = [list([asbytes('')]) for _ in range(nbcols)] |
|
|
|
|
|
if isinstance(user_missing_values, dict): |
|
|
|
for (key, val) in user_missing_values.items(): |
|
|
|
if _is_string_like(key): |
|
try: |
|
|
|
key = names.index(key) |
|
except ValueError: |
|
|
|
continue |
|
|
|
if usecols: |
|
try: |
|
key = usecols.index(key) |
|
except ValueError: |
|
pass |
|
|
|
if isinstance(val, (list, tuple)): |
|
val = [str(_) for _ in val] |
|
else: |
|
val = [str(val), ] |
|
|
|
if key is None: |
|
|
|
for miss in missing_values: |
|
miss.extend(val) |
|
else: |
|
missing_values[key].extend(val) |
|
|
|
elif isinstance(user_missing_values, (list, tuple)): |
|
for (value, entry) in zip(user_missing_values, missing_values): |
|
value = str(value) |
|
if value not in entry: |
|
entry.append(value) |
|
|
|
elif isinstance(user_missing_values, bytes): |
|
user_value = user_missing_values.split(asbytes(",")) |
|
for entry in missing_values: |
|
entry.extend(user_value) |
|
|
|
else: |
|
for entry in missing_values: |
|
entry.extend([str(user_missing_values)]) |
|
|
|
|
|
if missing != asbytes(''): |
|
warnings.warn( |
|
"The use of `missing` is deprecated, it will be removed in " |
|
"Numpy 2.0.\nPlease use `missing_values` instead.", |
|
DeprecationWarning) |
|
values = [str(_) for _ in missing.split(asbytes(","))] |
|
for entry in missing_values: |
|
entry.extend(values) |
|
|
|
|
|
|
|
user_filling_values = filling_values or [] |
|
|
|
filling_values = [None] * nbcols |
|
|
|
if isinstance(user_filling_values, dict): |
|
for (key, val) in user_filling_values.items(): |
|
if _is_string_like(key): |
|
try: |
|
|
|
key = names.index(key) |
|
except ValueError: |
|
|
|
continue |
|
|
|
if usecols: |
|
try: |
|
key = usecols.index(key) |
|
except ValueError: |
|
pass |
|
|
|
filling_values[key] = val |
|
|
|
elif isinstance(user_filling_values, (list, tuple)): |
|
n = len(user_filling_values) |
|
if (n <= nbcols): |
|
filling_values[:n] = user_filling_values |
|
else: |
|
filling_values = user_filling_values[:nbcols] |
|
|
|
else: |
|
filling_values = [user_filling_values] * nbcols |
|
|
|
|
|
if dtype is None: |
|
|
|
|
|
converters = [StringConverter(None, missing_values=miss, default=fill) |
|
for (miss, fill) in zip(missing_values, filling_values)] |
|
else: |
|
dtype_flat = flatten_dtype(dtype, flatten_base=True) |
|
|
|
if len(dtype_flat) > 1: |
|
|
|
zipit = zip(dtype_flat, missing_values, filling_values) |
|
converters = [StringConverter(dt, locked=True, |
|
missing_values=miss, default=fill) |
|
for (dt, miss, fill) in zipit] |
|
else: |
|
|
|
zipit = zip(missing_values, filling_values) |
|
converters = [StringConverter(dtype, locked=True, |
|
missing_values=miss, default=fill) |
|
for (miss, fill) in zipit] |
|
|
|
uc_update = [] |
|
for (j, conv) in user_converters.items(): |
|
|
|
if _is_string_like(j): |
|
try: |
|
j = names.index(j) |
|
i = j |
|
except ValueError: |
|
continue |
|
elif usecols: |
|
try: |
|
i = usecols.index(j) |
|
except ValueError: |
|
|
|
continue |
|
else: |
|
i = j |
|
|
|
if len(first_line): |
|
testing_value = first_values[j] |
|
else: |
|
testing_value = None |
|
converters[i].update(conv, locked=True, |
|
testing_value=testing_value, |
|
default=filling_values[i], |
|
missing_values=missing_values[i],) |
|
uc_update.append((i, conv)) |
|
|
|
user_converters.update(uc_update) |
|
|
|
|
|
|
|
|
|
|
|
|
|
rows = [] |
|
append_to_rows = rows.append |
|
|
|
if usemask: |
|
masks = [] |
|
append_to_masks = masks.append |
|
|
|
invalid = [] |
|
append_to_invalid = invalid.append |
|
|
|
|
|
for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): |
|
values = split_line(line) |
|
nbvalues = len(values) |
|
|
|
if nbvalues == 0: |
|
continue |
|
|
|
if usecols: |
|
try: |
|
values = [values[_] for _ in usecols] |
|
except IndexError: |
|
append_to_invalid((i + skip_header + 1, nbvalues)) |
|
continue |
|
elif nbvalues != nbcols: |
|
append_to_invalid((i + skip_header + 1, nbvalues)) |
|
continue |
|
|
|
append_to_rows(tuple(values)) |
|
if usemask: |
|
append_to_masks(tuple([v.strip() in m |
|
for (v, m) in zip(values, missing_values)])) |
|
|
|
if own_fhd: |
|
fhd.close() |
|
|
|
|
|
if dtype is None: |
|
for (i, converter) in enumerate(converters): |
|
current_column = [itemgetter(i)(_m) for _m in rows] |
|
try: |
|
converter.iterupgrade(current_column) |
|
except ConverterLockError: |
|
errmsg = "Converter #%i is locked and cannot be upgraded: " % i |
|
current_column = map(itemgetter(i), rows) |
|
for (j, value) in enumerate(current_column): |
|
try: |
|
converter.upgrade(value) |
|
except (ConverterError, ValueError): |
|
errmsg += "(occurred line #%i for value '%s')" |
|
errmsg %= (j + 1 + skip_header, value) |
|
raise ConverterError(errmsg) |
|
|
|
|
|
nbinvalid = len(invalid) |
|
if nbinvalid > 0: |
|
nbrows = len(rows) + nbinvalid - skip_footer |
|
|
|
template = " Line #%%i (got %%i columns instead of %i)" % nbcols |
|
if skip_footer > 0: |
|
nbinvalid_skipped = len([_ for _ in invalid |
|
if _[0] > nbrows + skip_header]) |
|
invalid = invalid[:nbinvalid - nbinvalid_skipped] |
|
skip_footer -= nbinvalid_skipped |
|
|
|
|
|
|
|
|
|
|
|
errmsg = [template % (i, nb) |
|
for (i, nb) in invalid] |
|
if len(errmsg): |
|
errmsg.insert(0, "Some errors were detected !") |
|
errmsg = "\n".join(errmsg) |
|
|
|
if invalid_raise: |
|
raise ValueError(errmsg) |
|
|
|
else: |
|
warnings.warn(errmsg, ConversionWarning) |
|
|
|
|
|
if skip_footer > 0: |
|
rows = rows[:-skip_footer] |
|
if usemask: |
|
masks = masks[:-skip_footer] |
|
|
|
|
|
|
|
if loose: |
|
rows = list( |
|
zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] |
|
for (i, conv) in enumerate(converters)])) |
|
else: |
|
rows = list( |
|
zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] |
|
for (i, conv) in enumerate(converters)])) |
|
|
|
|
|
data = rows |
|
if dtype is None: |
|
|
|
column_types = [conv.type for conv in converters] |
|
|
|
strcolidx = [i for (i, v) in enumerate(column_types) |
|
if v in (type('S'), np.string_)] |
|
|
|
for i in strcolidx: |
|
column_types[i] = "|S%i" % max(len(row[i]) for row in data) |
|
|
|
if names is None: |
|
|
|
base = set([c.type for c in converters if c._checked]) |
|
if len(base) == 1: |
|
(ddtype, mdtype) = (list(base)[0], np.bool) |
|
else: |
|
ddtype = [(defaultfmt % i, dt) |
|
for (i, dt) in enumerate(column_types)] |
|
if usemask: |
|
mdtype = [(defaultfmt % i, np.bool) |
|
for (i, dt) in enumerate(column_types)] |
|
else: |
|
ddtype = list(zip(names, column_types)) |
|
mdtype = list(zip(names, [np.bool] * len(column_types))) |
|
output = np.array(data, dtype=ddtype) |
|
if usemask: |
|
outputmask = np.array(masks, dtype=mdtype) |
|
else: |
|
|
|
if names and dtype.names: |
|
dtype.names = names |
|
|
|
if len(dtype_flat) > 1: |
|
|
|
|
|
|
|
|
|
if 'O' in (_.char for _ in dtype_flat): |
|
if has_nested_fields(dtype): |
|
raise NotImplementedError( |
|
"Nested fields involving objects are not supported...") |
|
else: |
|
output = np.array(data, dtype=dtype) |
|
else: |
|
rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) |
|
output = rows.view(dtype) |
|
|
|
if usemask: |
|
rowmasks = np.array( |
|
masks, dtype=np.dtype([('', np.bool) for t in dtype_flat])) |
|
|
|
mdtype = make_mask_descr(dtype) |
|
outputmask = rowmasks.view(mdtype) |
|
|
|
else: |
|
|
|
if user_converters: |
|
ishomogeneous = True |
|
descr = [] |
|
for i, ttype in enumerate([conv.type for conv in converters]): |
|
|
|
if i in user_converters: |
|
ishomogeneous &= (ttype == dtype.type) |
|
if ttype == np.string_: |
|
ttype = "|S%i" % max(len(row[i]) for row in data) |
|
descr.append(('', ttype)) |
|
else: |
|
descr.append(('', dtype)) |
|
|
|
if not ishomogeneous: |
|
|
|
if len(descr) > 1: |
|
dtype = np.dtype(descr) |
|
|
|
else: |
|
dtype = np.dtype(ttype) |
|
|
|
output = np.array(data, dtype) |
|
if usemask: |
|
if dtype.names: |
|
mdtype = [(_, np.bool) for _ in dtype.names] |
|
else: |
|
mdtype = np.bool |
|
outputmask = np.array(masks, dtype=mdtype) |
|
|
|
names = output.dtype.names |
|
if usemask and names: |
|
for (name, conv) in zip(names or (), converters): |
|
missing_values = [conv(_) for _ in conv.missing_values |
|
if _ != asbytes('')] |
|
for mval in missing_values: |
|
outputmask[name] |= (output[name] == mval) |
|
|
|
if usemask: |
|
output = output.view(MaskedArray) |
|
output._mask = outputmask |
|
if unpack: |
|
return output.squeeze().T |
|
return output.squeeze() |
|
|
|
|
|
def ndfromtxt(fname, **kwargs): |
|
""" |
|
Load ASCII data stored in a file and return it as a single array. |
|
|
|
Parameters |
|
---------- |
|
fname, kwargs : For a description of input parameters, see `genfromtxt`. |
|
|
|
See Also |
|
-------- |
|
numpy.genfromtxt : generic function. |
|
|
|
""" |
|
kwargs['usemask'] = False |
|
return genfromtxt(fname, **kwargs) |
|
|
|
|
|
def mafromtxt(fname, **kwargs): |
|
""" |
|
Load ASCII data stored in a text file and return a masked array. |
|
|
|
Parameters |
|
---------- |
|
fname, kwargs : For a description of input parameters, see `genfromtxt`. |
|
|
|
See Also |
|
-------- |
|
numpy.genfromtxt : generic function to load ASCII data. |
|
|
|
""" |
|
kwargs['usemask'] = True |
|
return genfromtxt(fname, **kwargs) |
|
|
|
|
|
def recfromtxt(fname, **kwargs): |
|
""" |
|
Load ASCII data from a file and return it in a record array. |
|
|
|
If ``usemask=False`` a standard `recarray` is returned, |
|
if ``usemask=True`` a MaskedRecords array is returned. |
|
|
|
Parameters |
|
---------- |
|
fname, kwargs : For a description of input parameters, see `genfromtxt`. |
|
|
|
See Also |
|
-------- |
|
numpy.genfromtxt : generic function |
|
|
|
Notes |
|
----- |
|
By default, `dtype` is None, which means that the data-type of the output |
|
array will be determined from the data. |
|
|
|
""" |
|
kwargs.setdefault("dtype", None) |
|
usemask = kwargs.get('usemask', False) |
|
output = genfromtxt(fname, **kwargs) |
|
if usemask: |
|
from numpy.ma.mrecords import MaskedRecords |
|
output = output.view(MaskedRecords) |
|
else: |
|
output = output.view(np.recarray) |
|
return output |
|
|
|
|
|
def recfromcsv(fname, **kwargs): |
|
""" |
|
Load ASCII data stored in a comma-separated file. |
|
|
|
The returned array is a record array (if ``usemask=False``, see |
|
`recarray`) or a masked record array (if ``usemask=True``, |
|
see `ma.mrecords.MaskedRecords`). |
|
|
|
Parameters |
|
---------- |
|
fname, kwargs : For a description of input parameters, see `genfromtxt`. |
|
|
|
See Also |
|
-------- |
|
numpy.genfromtxt : generic function to load ASCII data. |
|
|
|
Notes |
|
----- |
|
By default, `dtype` is None, which means that the data-type of the output |
|
array will be determined from the data. |
|
|
|
""" |
|
|
|
kwargs.setdefault("case_sensitive", "lower") |
|
kwargs.setdefault("names", True) |
|
kwargs.setdefault("delimiter", ",") |
|
kwargs.setdefault("dtype", None) |
|
output = genfromtxt(fname, **kwargs) |
|
|
|
usemask = kwargs.get("usemask", False) |
|
if usemask: |
|
from numpy.ma.mrecords import MaskedRecords |
|
output = output.view(MaskedRecords) |
|
else: |
|
output = output.view(np.recarray) |
|
return output |
|
|