Skip to content

Commit

Permalink
Renamed Array to XArray
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer committed Feb 21, 2014
1 parent 6e0b12c commit de28cd6
Show file tree
Hide file tree
Showing 14 changed files with 394 additions and 392 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# xray: transparently manipulate scientific datasets in Python
# xray: extended arrays for working with scientific datasets in Python

**xray** is a Python package for working with aligned sets of homogeneous,
n-dimensional arrays. It implements flexible array operations and dataset
Expand All @@ -10,11 +10,12 @@ change.***

## Main Feaures

- A `DatasetArray` object that is compatible with NumPy's ndarray and ufuncs
but keeps ancilliary variables and metadata intact.
- Array broadcasting based on dimension names and coordinate indices
instead of only shapes.
- Flexible split-apply-combine functionality with the `Array.groupby` method
- Extended array objects (`XArray` and `DatasetArray`) that are compatible
with NumPy's ndarray and ufuncs but that keep ancilliary variables and
metadata intact.
- Flexible array broadcasting based on dimension names and coordinate indices.
- Lazily load arrays from netCDF files on disk or OpenDAP URLs.
- Flexible split-apply-combine functionality with the array `groupby` method
(patterned after [pandas][pandas]).
- Fast label-based indexing and (limited) time-series functionality built on
[pandas][pandas].
Expand Down
10 changes: 5 additions & 5 deletions src/xray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from .array_ import Array, broadcast_variables
from .xarray import XArray, broadcast_xarrays
from .dataset import Dataset, open_dataset
from .dataset_array import DatasetArray, align
from .utils import orthogonal_indexer, num2datetimeindex, variable_equal
from .utils import orthogonal_indexer, num2datetimeindex, xarray_equal

from . import backends

concat = DatasetArray.from_stack

__all__ = ['open_dataset', 'Dataset', 'DatasetArray', 'Array', 'align',
'broadcast_variables', 'orthogonal_indexer', 'num2datetimeindex',
'variable_equal']
__all__ = ['open_dataset', 'Dataset', 'DatasetArray', 'XArray', 'align',
'broadcast_xarrays', 'orthogonal_indexer', 'num2datetimeindex',
'xarray_equal']
8 changes: 4 additions & 4 deletions src/xray/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from scipy.io import netcdf
from collections import OrderedDict

import array_
import xarray
import conventions
from utils import FrozenOrderedDict, Frozen, datetimeindex2num

Expand Down Expand Up @@ -66,11 +66,11 @@ def convert_to_cf_variable(array):
(data, units, calendar) = datetimeindex2num(array.data)
attributes['units'] = units
attributes['calendar'] = calendar
return array_.Array(array.dimensions, data, attributes)
return xarray.XArray(array.dimensions, data, attributes)


def convert_scipy_variable(var):
return array_.Array(var.dimensions, var.data, var._attributes)
return xarray.XArray(var.dimensions, var.data, var._attributes)


class ScipyDataStore(AbstractDataStore):
Expand Down Expand Up @@ -166,7 +166,7 @@ def convert_nc4_variable(var):
# netcdf file would now have been scaled twice!
attr = OrderedDict((k, var.getncattr(k)) for k in var.ncattrs()
if k not in ['scale_factor', 'add_offset'])
return array_.Array(var.dimensions, var, attr, indexing_mode='orthogonal')
return xarray.XArray(var.dimensions, var, attr, indexing_mode='orthogonal')


class NetCDF4DataStore(AbstractDataStore):
Expand Down
34 changes: 17 additions & 17 deletions src/xray/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cStringIO import StringIO
from collections import OrderedDict, Mapping

import array_ as array
import xarray
import backends
import conventions
import groupby
Expand Down Expand Up @@ -76,7 +76,7 @@ def _get_virtual_variable(self, key):
data = (month // 3) % 4 + 1
else:
data = getattr(self[ref_var].data, suffix)
return array.Array(self[ref_var].dimensions, data)
return xarray.XArray(self[ref_var].dimensions, data)
raise KeyError('virtual variable %r not found' % key)

def __getitem__(self, key):
Expand Down Expand Up @@ -113,12 +113,12 @@ def __init__(self, variables=None, attributes=None):
Parameters
----------
variables : dict-like, optional
A mapping from variable names to `xray.Array` objects or sequences
of the form `(dimensions, data[, attributes])` which can be used as
arguments to create a new `xray.Array`. Each dimension must have
the same length in all variables in which it appears. One
dimensional variables with name equal to their dimension are
coordinate variables, which means they are saved in the dataset as
A mapping from variable names to `XArray` objects or sequences of
the form `(dimensions, data[, attributes])` which can be used as
arguments to create a new `XArray`. Each dimension must have the
same length in all variables in which it appears. One dimensional
variables with name equal to their dimension are coordinate
variables, which means they are saved in the dataset as
`pandas.Index` objects.
attributes : dict-like, optional
Global attributes to save on this dataset.
Expand All @@ -132,12 +132,12 @@ def __init__(self, variables=None, attributes=None):
self._attributes = OrderedDict(attributes)

def _as_variable(self, name, var):
if not isinstance(var, array.Array):
if not isinstance(var, xarray.XArray):
try:
var = array.Array(*var)
var = xarray.XArray(*var)
except TypeError:
raise TypeError('Dataset variables must be of type '
'DatasetArray or Array, or a sequence of the '
'DatasetArray or XArray, or a sequence of the '
'form (dimensions, data[, attributes])')

if name in var.dimensions:
Expand Down Expand Up @@ -251,7 +251,7 @@ def __eq__(self, other):
# require matching dimension or variable order for equality
return (sorted(self.attributes.items())
== sorted(other.attributes.items())
and all(k1 == k2 and utils.variable_equal(v1, v2)
and all(k1 == k2 and utils.xarray_equal(v1, v2)
for (k1, v1), (k2, v2)
in zip(sorted(self.variables.items()),
sorted(other.variables.items()))))
Expand Down Expand Up @@ -449,8 +449,8 @@ def renamed(self, name_dict):
dims = tuple(name_dict.get(dim, dim) for dim in v.dimensions)
#TODO: public interface for renaming a variable without loading
# data?
variables[name] = array.Array(dims, v._data, v.attributes,
v._indexing_mode)
variables[name] = xarray.XArray(dims, v._data, v.attributes,
v._indexing_mode)

return type(self)(variables, self.attributes)

Expand Down Expand Up @@ -481,7 +481,7 @@ def merge(self, other, inplace=False):
"""
# check for conflicts
utils.update_safety_check(self.variables, other.variables,
compat=utils.variable_equal)
compat=utils.xarray_equal)
# update contents
obj = self if inplace else self.copy()
obj._set_variables(OrderedDict((k, v) for k, v
Expand Down Expand Up @@ -627,9 +627,9 @@ def to_dataframe(self):
shape = tuple(self.dimensions.values())
empty_data = np.lib.stride_tricks.as_strided(np.array(0), shape=shape,
strides=[0] * len(shape))
template = array.Array(self.dimensions.keys(), empty_data)
template = xarray.XArray(self.dimensions.keys(), empty_data)
for k in columns:
_, var = array.broadcast_variables(template, self[k])
_, var = xarray.broadcast_xarrays(template, self[k])
_, var_data = np.broadcast_arrays(template.data, var.data)
data.append(var_data.reshape(-1))
# note: pd.MultiIndex.from_product is new in pandas-0.13.1
Expand Down
7 changes: 4 additions & 3 deletions src/xray/dataset_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd

import array_
import xarray
import dataset as dataset_
import groupby
import ops
Expand Down Expand Up @@ -386,8 +386,9 @@ def from_stack(cls, arrays, dimension='stacked_dimension',
if focus is None:
focus = 'stacked_variable'

ds[focus] = array_.Array.from_stack(arrays, dimension,
stacked_indexers, length, template)
ds[focus] = xarray.XArray.from_stack(arrays, dimension,
stacked_indexers, length,
template)
return cls(ds, focus)

def to_dataframe(self):
Expand Down
12 changes: 6 additions & 6 deletions src/xray/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from common import ImplementsReduce
from ops import inject_reduce_methods
import array_
import xarray
import dataset
import numpy as np

Expand All @@ -12,7 +12,7 @@ def unique_value_groups(ar):
Parameters
----------
ar : array_like
ar : xarraylike
Input array. This will be flattened if it is not already 1-D.
Returns
Expand Down Expand Up @@ -49,11 +49,11 @@ class GroupBy(object):
See Also
--------
Array.groupby
XArray.groupby
DatasetArray.groupby
"""
def __init__(self, obj, group_name, group_coord, squeeze=True):
"""See Array.groupby and DatasetArray.groupby
"""See XArray.groupby and DatasetArray.groupby
"""
if group_coord.ndim != 1:
# TODO: remove this limitation?
Expand Down Expand Up @@ -116,7 +116,7 @@ def iter_indexed(self):

class ArrayGroupBy(GroupBy, ImplementsReduce):
def iter_shortcut(self):
"""Fast version of `iter_groups` that yields Arrays without metadata
"""Fast version of `iter_groups` that yields XArrays without metadata
"""
# extract the underlying Array object
array = self.obj
Expand All @@ -138,7 +138,7 @@ def iter_shortcut(self):
indexer = tuple(indices if n == group_axis else slice(None)
for n in range(array.ndim))
data = array.data[indexer]
yield array_.Array(dims, data)
yield xarray.XArray(dims, data)

def apply(self, func, shortcut=False, **kwargs):
"""Apply a function over each array in the group and stack them
Expand Down
2 changes: 1 addition & 1 deletion src/xray/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def datetimeindex2num(dates, units=None, calendar=None):
return (num, units, calendar)


def variable_equal(v1, v2, rtol=1e-05, atol=1e-08):
def xarray_equal(v1, v2, rtol=1e-05, atol=1e-08):
"""True if two objects have the same dimensions, attributes and data;
otherwise False
Expand Down
21 changes: 11 additions & 10 deletions src/xray/array_.py → src/xray/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _as_compatible_data(data):
return data


class Array(AbstractArray):
class XArray(AbstractArray):
"""A netcdf-like variable consisting of dimensions, data and attributes
which describe a single Array. A single Array object is not fully described
outside the context of its parent Dataset (if you want such a fully
Expand Down Expand Up @@ -476,7 +476,7 @@ def _binary_op(f, reflexive=False):
def func(self, other):
if isinstance(other, dataset_array.DatasetArray):
return NotImplemented
self_data, other_data, dims = _broadcast_variable_data(self, other)
self_data, other_data, dims = _broadcast_xarray_data(self, other)
new_data = (f(self_data, other_data)
if not reflexive
else f(other_data, self_data))
Expand All @@ -492,7 +492,7 @@ def func(self, other):
def _inplace_binary_op(f):
@functools.wraps(f)
def func(self, other):
self_data, other_data, dims = _broadcast_variable_data(self, other)
self_data, other_data, dims = _broadcast_xarray_data(self, other)
if dims != self.dimensions:
raise ValueError('dimensions cannot change for in-place '
'operations')
Expand All @@ -502,11 +502,11 @@ def func(self, other):
return self
return func

ops.inject_special_operations(Array)
ops.inject_special_operations(XArray)


def broadcast_variables(first, second):
"""Given two arrays, return two arrays with matching dimensions and numpy
def broadcast_xarrays(first, second):
"""Given two XArrays, return two AXrrays with matching dimensions and numpy
broadcast compatible data
Parameters
Expand Down Expand Up @@ -546,21 +546,22 @@ def broadcast_variables(first, second):
# expand first_data's dimensions so it's broadcast compatible after
# adding second's dimensions at the end
first_data = first.data[(Ellipsis,) + (None,) * len(second_only_dims)]
new_first = Array(dimensions, first_data)
new_first = XArray(dimensions, first_data, first.attributes)
# expand and reorder second_data so the dimensions line up
first_only_dims = [d for d in dimensions if d not in second.dimensions]
second_dims = list(second.dimensions) + first_only_dims
second_data = second.data[(Ellipsis,) + (None,) * len(first_only_dims)]
new_second = Array(second_dims, second_data).transpose(*dimensions)
new_second = XArray(second_dims, second_data, first.attributes
).transpose(*dimensions)
return new_first, new_second


def _broadcast_variable_data(self, other):
def _broadcast_xarray_data(self, other):
if isinstance(other, dataset.Dataset):
raise TypeError('datasets do not support mathematical operations')
elif all(hasattr(other, attr) for attr in ['dimensions', 'data', 'shape']):
# `other` satisfies the xray.Array API
new_self, new_other = broadcast_variables(self, other)
new_self, new_other = broadcast_xarrays(self, other)
self_data = new_self.data
other_data = new_other.data
dimensions = new_self.dimensions
Expand Down
10 changes: 5 additions & 5 deletions test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@


class TestCase(unittest.TestCase):
def assertVarEqual(self, v1, v2):
self.assertTrue(utils.variable_equal(v1, v2))
def assertXArrayEqual(self, v1, v2):
self.assertTrue(utils.xarray_equal(v1, v2))

def assertVarNotEqual(self, v1, v2):
self.assertFalse(utils.variable_equal(v1, v2))
def assertXArrayNotEqual(self, v1, v2):
self.assertFalse(utils.xarray_equal(v1, v2))

def assertNDArrayEqual(self, a1, a2):
def assertArrayEqual(self, a1, a2):
assert_array_equal(a1, a2)


Expand Down
Loading

0 comments on commit de28cd6

Please sign in to comment.