Renamed Array to XArray

HertugHelms · Feb 21, 2014 · de28cd6 · de28cd6
1 parent 6e0b12c
commit de28cd6
Show file tree

Hide file tree

Showing 14 changed files with 394 additions and 392 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# xray: transparently manipulate scientific datasets in Python
+# xray: extended arrays for working with scientific datasets in Python
 
 **xray** is a Python package for working with aligned sets of homogeneous,
 n-dimensional arrays. It implements flexible array operations and dataset
@@ -10,11 +10,12 @@ change.***
 
 ## Main Feaures
 
-  - A `DatasetArray` object that is compatible with NumPy's ndarray and ufuncs
-    but keeps ancilliary variables and metadata intact.
-  - Array broadcasting based on dimension names and coordinate indices
-    instead of only shapes.
-  - Flexible split-apply-combine functionality with the `Array.groupby` method
+  - Extended array objects (`XArray` and `DatasetArray`) that are compatible
+    with NumPy's ndarray and ufuncs but that keep ancilliary variables and
+    metadata intact.
+  - Flexible array broadcasting based on dimension names and coordinate indices.
+  - Lazily load arrays from netCDF files on disk or OpenDAP URLs.
+  - Flexible split-apply-combine functionality with the array `groupby` method
     (patterned after [pandas][pandas]).
   - Fast label-based indexing and (limited) time-series functionality built on
     [pandas][pandas].

diff --git a/src/xray/__init__.py b/src/xray/__init__.py
@@ -1,12 +1,12 @@
-from .array_ import Array, broadcast_variables
+from .xarray import XArray, broadcast_xarrays
 from .dataset import Dataset, open_dataset
 from .dataset_array import DatasetArray, align
-from .utils import orthogonal_indexer, num2datetimeindex, variable_equal
+from .utils import orthogonal_indexer, num2datetimeindex, xarray_equal
 
 from . import backends
 
 concat = DatasetArray.from_stack
 
-__all__ = ['open_dataset', 'Dataset', 'DatasetArray', 'Array', 'align',
-           'broadcast_variables', 'orthogonal_indexer', 'num2datetimeindex',
-           'variable_equal']
+__all__ = ['open_dataset', 'Dataset', 'DatasetArray', 'XArray', 'align',
+           'broadcast_xarrays', 'orthogonal_indexer', 'num2datetimeindex',
+           'xarray_equal']
diff --git a/src/xray/backends.py b/src/xray/backends.py
@@ -12,7 +12,7 @@
 from scipy.io import netcdf
 from collections import OrderedDict
 
-import array_
+import xarray
 import conventions
 from utils import FrozenOrderedDict, Frozen, datetimeindex2num
 
@@ -66,11 +66,11 @@ def convert_to_cf_variable(array):
         (data, units, calendar) = datetimeindex2num(array.data)
         attributes['units'] = units
         attributes['calendar'] = calendar
-    return array_.Array(array.dimensions, data, attributes)
+    return xarray.XArray(array.dimensions, data, attributes)
 
 
 def convert_scipy_variable(var):
-    return array_.Array(var.dimensions, var.data, var._attributes)
+    return xarray.XArray(var.dimensions, var.data, var._attributes)
 
 
 class ScipyDataStore(AbstractDataStore):
@@ -166,7 +166,7 @@ def convert_nc4_variable(var):
     # netcdf file would now have been scaled twice!
     attr = OrderedDict((k, var.getncattr(k)) for k in var.ncattrs()
                        if k not in ['scale_factor', 'add_offset'])
-    return array_.Array(var.dimensions, var, attr, indexing_mode='orthogonal')
+    return xarray.XArray(var.dimensions, var, attr, indexing_mode='orthogonal')
 
 
 class NetCDF4DataStore(AbstractDataStore):

diff --git a/src/xray/dataset.py b/src/xray/dataset.py
@@ -5,7 +5,7 @@
 from cStringIO import StringIO
 from collections import OrderedDict, Mapping
 
-import array_ as array
+import xarray
 import backends
 import conventions
 import groupby
@@ -76,7 +76,7 @@ def _get_virtual_variable(self, key):
                     data = (month // 3) % 4 + 1
                 else:
                     data = getattr(self[ref_var].data, suffix)
-                return array.Array(self[ref_var].dimensions, data)
+                return xarray.XArray(self[ref_var].dimensions, data)
         raise KeyError('virtual variable %r not found' % key)
 
     def __getitem__(self, key):
@@ -113,12 +113,12 @@ def __init__(self, variables=None, attributes=None):
         Parameters
         ----------
         variables : dict-like, optional
-            A mapping from variable names to `xray.Array` objects or sequences
-            of the form `(dimensions, data[, attributes])` which can be used as
-            arguments to create a new `xray.Array`. Each dimension must have
-            the same length in all variables in which it appears. One
-            dimensional variables with name equal to their dimension are
-            coordinate variables, which means they are saved in the dataset as
+            A mapping from variable names to `XArray` objects or sequences of
+            the form `(dimensions, data[, attributes])` which can be used as
+            arguments to create a new `XArray`. Each dimension must have the
+            same length in all variables in which it appears. One dimensional
+            variables with name equal to their dimension are coordinate
+            variables, which means they are saved in the dataset as
             `pandas.Index` objects.
         attributes : dict-like, optional
             Global attributes to save on this dataset.
@@ -132,12 +132,12 @@ def __init__(self, variables=None, attributes=None):
         self._attributes = OrderedDict(attributes)
 
     def _as_variable(self, name, var):
-        if not isinstance(var, array.Array):
+        if not isinstance(var, xarray.XArray):
             try:
-                var = array.Array(*var)
+                var = xarray.XArray(*var)
             except TypeError:
                 raise TypeError('Dataset variables must be of type '
-                                'DatasetArray or Array, or a sequence of the '
+                                'DatasetArray or XArray, or a sequence of the '
                                 'form (dimensions, data[, attributes])')
 
         if name in var.dimensions:
@@ -251,7 +251,7 @@ def __eq__(self, other):
             # require matching dimension or variable order for equality
             return (sorted(self.attributes.items())
                         == sorted(other.attributes.items())
-                    and all(k1 == k2 and utils.variable_equal(v1, v2)
+                    and all(k1 == k2 and utils.xarray_equal(v1, v2)
                             for (k1, v1), (k2, v2)
                             in zip(sorted(self.variables.items()),
                                    sorted(other.variables.items()))))
@@ -449,8 +449,8 @@ def renamed(self, name_dict):
             dims = tuple(name_dict.get(dim, dim) for dim in v.dimensions)
             #TODO: public interface for renaming a variable without loading
             # data?
-            variables[name] = array.Array(dims, v._data, v.attributes,
-                                          v._indexing_mode)
+            variables[name] = xarray.XArray(dims, v._data, v.attributes,
+                                            v._indexing_mode)
 
         return type(self)(variables, self.attributes)
 
@@ -481,7 +481,7 @@ def merge(self, other, inplace=False):
         """
         # check for conflicts
         utils.update_safety_check(self.variables, other.variables,
-                                  compat=utils.variable_equal)
+                                  compat=utils.xarray_equal)
         # update contents
         obj = self if inplace else self.copy()
         obj._set_variables(OrderedDict((k, v) for k, v
@@ -627,9 +627,9 @@ def to_dataframe(self):
         shape = tuple(self.dimensions.values())
         empty_data = np.lib.stride_tricks.as_strided(np.array(0), shape=shape,
                                                      strides=[0] * len(shape))
-        template = array.Array(self.dimensions.keys(), empty_data)
+        template = xarray.XArray(self.dimensions.keys(), empty_data)
         for k in columns:
-            _, var = array.broadcast_variables(template, self[k])
+            _, var = xarray.broadcast_xarrays(template, self[k])
             _, var_data = np.broadcast_arrays(template.data, var.data)
             data.append(var_data.reshape(-1))
         # note: pd.MultiIndex.from_product is new in pandas-0.13.1

diff --git a/src/xray/dataset_array.py b/src/xray/dataset_array.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 
-import array_
+import xarray
 import dataset as dataset_
 import groupby
 import ops
@@ -386,8 +386,9 @@ def from_stack(cls, arrays, dimension='stacked_dimension',
             if focus is None:
                 focus = 'stacked_variable'
 
-        ds[focus] = array_.Array.from_stack(arrays, dimension,
-                                            stacked_indexers, length, template)
+        ds[focus] = xarray.XArray.from_stack(arrays, dimension,
+                                             stacked_indexers, length,
+                                             template)
         return cls(ds, focus)
 
     def to_dataframe(self):

diff --git a/src/xray/groupby.py b/src/xray/groupby.py
@@ -2,7 +2,7 @@
 
 from common import ImplementsReduce
 from ops import inject_reduce_methods
-import array_
+import xarray
 import dataset
 import numpy as np
 
@@ -12,7 +12,7 @@ def unique_value_groups(ar):
 
     Parameters
     ----------
-    ar : array_like
+    ar : xarraylike
         Input array. This will be flattened if it is not already 1-D.
 
     Returns
@@ -49,11 +49,11 @@ class GroupBy(object):
 
     See Also
     --------
-    Array.groupby
+    XArray.groupby
     DatasetArray.groupby
     """
     def __init__(self, obj, group_name, group_coord, squeeze=True):
-        """See Array.groupby and DatasetArray.groupby
+        """See XArray.groupby and DatasetArray.groupby
         """
         if group_coord.ndim != 1:
             # TODO: remove this limitation?
@@ -116,7 +116,7 @@ def iter_indexed(self):
 
 class ArrayGroupBy(GroupBy, ImplementsReduce):
     def iter_shortcut(self):
-        """Fast version of `iter_groups` that yields Arrays without metadata
+        """Fast version of `iter_groups` that yields XArrays without metadata
         """
         # extract the underlying Array object
         array = self.obj
@@ -138,7 +138,7 @@ def iter_shortcut(self):
             indexer = tuple(indices if n == group_axis else slice(None)
                             for n in range(array.ndim))
             data = array.data[indexer]
-            yield array_.Array(dims, data)
+            yield xarray.XArray(dims, data)
 
     def apply(self, func, shortcut=False, **kwargs):
         """Apply a function over each array in the group and stack them

diff --git a/src/xray/utils.py b/src/xray/utils.py
@@ -165,7 +165,7 @@ def datetimeindex2num(dates, units=None, calendar=None):
     return (num, units, calendar)
 
 
-def variable_equal(v1, v2, rtol=1e-05, atol=1e-08):
+def xarray_equal(v1, v2, rtol=1e-05, atol=1e-08):
     """True if two objects have the same dimensions, attributes and data;
     otherwise False
 

diff --git a/src/xray/array_.py → src/xray/xarray.py b/src/xray/array_.py → src/xray/xarray.py
@@ -32,7 +32,7 @@ def _as_compatible_data(data):
     return data
 
 
-class Array(AbstractArray):
+class XArray(AbstractArray):
     """A netcdf-like variable consisting of dimensions, data and attributes
     which describe a single Array. A single Array object is not fully described
     outside the context of its parent Dataset (if you want such a fully
@@ -476,7 +476,7 @@ def _binary_op(f, reflexive=False):
         def func(self, other):
             if isinstance(other, dataset_array.DatasetArray):
                 return NotImplemented
-            self_data, other_data, dims = _broadcast_variable_data(self, other)
+            self_data, other_data, dims = _broadcast_xarray_data(self, other)
             new_data = (f(self_data, other_data)
                         if not reflexive
                         else f(other_data, self_data))
@@ -492,7 +492,7 @@ def func(self, other):
     def _inplace_binary_op(f):
         @functools.wraps(f)
         def func(self, other):
-            self_data, other_data, dims = _broadcast_variable_data(self, other)
+            self_data, other_data, dims = _broadcast_xarray_data(self, other)
             if dims != self.dimensions:
                 raise ValueError('dimensions cannot change for in-place '
                                  'operations')
@@ -502,11 +502,11 @@ def func(self, other):
             return self
         return func
 
-ops.inject_special_operations(Array)
+ops.inject_special_operations(XArray)
 
 
-def broadcast_variables(first, second):
-    """Given two arrays, return two arrays with matching dimensions and numpy
+def broadcast_xarrays(first, second):
+    """Given two XArrays, return two AXrrays with matching dimensions and numpy
     broadcast compatible data
 
     Parameters
@@ -546,21 +546,22 @@ def broadcast_variables(first, second):
     # expand first_data's dimensions so it's broadcast compatible after
     # adding second's dimensions at the end
     first_data = first.data[(Ellipsis,) + (None,) * len(second_only_dims)]
-    new_first = Array(dimensions, first_data)
+    new_first = XArray(dimensions, first_data, first.attributes)
     # expand and reorder second_data so the dimensions line up
     first_only_dims = [d for d in dimensions if d not in second.dimensions]
     second_dims = list(second.dimensions) + first_only_dims
     second_data = second.data[(Ellipsis,) + (None,) * len(first_only_dims)]
-    new_second = Array(second_dims, second_data).transpose(*dimensions)
+    new_second = XArray(second_dims, second_data, first.attributes
+        ).transpose(*dimensions)
     return new_first, new_second
 
 
-def _broadcast_variable_data(self, other):
+def _broadcast_xarray_data(self, other):
     if isinstance(other, dataset.Dataset):
         raise TypeError('datasets do not support mathematical operations')
     elif all(hasattr(other, attr) for attr in ['dimensions', 'data', 'shape']):
         # `other` satisfies the xray.Array API
-        new_self, new_other = broadcast_variables(self, other)
+        new_self, new_other = broadcast_xarrays(self, other)
         self_data = new_self.data
         other_data = new_other.data
         dimensions = new_self.dimensions

diff --git a/test/__init__.py b/test/__init__.py
@@ -6,13 +6,13 @@
 
 
 class TestCase(unittest.TestCase):
-    def assertVarEqual(self, v1, v2):
-        self.assertTrue(utils.variable_equal(v1, v2))
+    def assertXArrayEqual(self, v1, v2):
+        self.assertTrue(utils.xarray_equal(v1, v2))
 
-    def assertVarNotEqual(self, v1, v2):
-        self.assertFalse(utils.variable_equal(v1, v2))
+    def assertXArrayNotEqual(self, v1, v2):
+        self.assertFalse(utils.xarray_equal(v1, v2))
 
-    def assertNDArrayEqual(self, a1, a2):
+    def assertArrayEqual(self, a1, a2):
         assert_array_equal(a1, a2)