Skip to content

Commit

Permalink
Deprecated 'attributes' in favor of 'attrs'
Browse files Browse the repository at this point in the history
Also:
1. Don't try to preserve attributes under mathematical operations.
2. Finish up some cleanup related to "equals" and "identical" for testing.
3. Options for how strictly to compare varaibles when merging or concatenating
   (see pydata#25).

Fixes pydata#103 and pydata#104.
  • Loading branch information
shoyer committed Apr 27, 2014
1 parent 9744aaf commit 23b2cdd
Show file tree
Hide file tree
Showing 17 changed files with 339 additions and 350 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ makes many powerful array operations possible:
- Database like aligment based on coordinate labels that smoothly
handles missing values: `x, y = xray.align(x, y, join='outer')`.
- Keep track of arbitrary metadata in the form of a Python dictionary:
`x.attributes`.
`x.attrs`.

**xray** aims to provide a data analysis toolkit as powerful as
[pandas][pandas] but designed for working with homogeneous N-dimensional
Expand Down Expand Up @@ -103,7 +103,7 @@ several limitations that led us to build xray instead of extending Iris:
attempts to build all functionality (`Coord` supports a much more
limited set of functionality). xray has its equivalent of the Cube
(the `DataArray` object), but under the hood it is only thin wrapper
on the more primitive building blocks of Dataset and XArray objects.
on the more primitive building blocks of Dataset and Variable objects.
2. Iris has a strict interpretation of [CF conventions][cf], which,
although a principled choice, we have found to be impractical for
everyday uses. With Iris, every quantity has physical (SI) units, all
Expand Down Expand Up @@ -145,10 +145,10 @@ labeled numpy arrays that provided some guidance for the design of xray.
enough. The goal is to be as fast as pandas or raw numpy.
- Provide a uniform API for loading and saving scientific data in a variety
of formats (including streaming data).
- Understand metadata according to [Climate and Forecast Conventions][cf]
when appropriate, but don't strictly enforce them. Conflicting attributes
(e.g., units) should be silently dropped instead of causing errors. The
onus is on the user to make sure that operations make sense.
- Take a pragmatic approach to metadata (attributes), and be very cautious
before implementing any functionality that relies on it. Automatically
maintaining attributes is a tricky and very hard to get right (see
discussion about Iris above).

## Getting started

Expand Down
4 changes: 2 additions & 2 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Attributes and underlying data
Dataset.coordinates
Dataset.noncoordinates
Dataset.dimensions
Dataset.attributes
Dataset.attrs

Dataset contents
~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -112,7 +112,7 @@ Attributes and underlying data
DataArray.coordinates
DataArray.name
DataArray.dataset
DataArray.attributes
DataArray.attrs

Selecting
~~~~~~~~~
Expand Down
54 changes: 40 additions & 14 deletions test/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest

import numpy as np
from numpy.testing import assert_array_equal

from xray import utils, DataArray
Expand Down Expand Up @@ -36,12 +37,26 @@ def requires_netCDF4(test):
return test if has_netCDF4 else unittest.skip('requires netCDF4')(test)


def data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08):
exact_dtypes = [np.datetime64, np.timedelta64, np.string_]
if any(any(np.issubdtype(arr.dtype, t) for t in exact_dtypes)
or arr.dtype == object for arr in [arr1, arr2]):
return np.array_equal(arr1, arr2)
else:
return utils.allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol)


class TestCase(unittest.TestCase):
def assertVariableEqual(self, v1, v2):
self.assertTrue(as_variable(v1).equals(v2))

def assertVariableIdentical(self, v1, v2):
self.assertTrue(as_variable(v1).identical(v2))

def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08):
self.assertTrue(utils.variable_allclose(v1, v2, rtol=rtol, atol=atol))
self.assertEqual(v1.dimensions, v2.dimensions)
self.assertTrue(data_allclose_or_equiv(v1.values, v2.values,
rtol=rtol, atol=atol))

def assertVariableNotEqual(self, v1, v2):
self.assertFalse(as_variable(v1).equals(v2))
Expand All @@ -52,36 +67,47 @@ def assertArrayEqual(self, a1, a2):
def assertDatasetEqual(self, d1, d2):
# this method is functionally equivalent to `assert d1 == d2`, but it
# checks each aspect of equality separately for easier debugging
self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes))
self.assertEqual(sorted(d1.variables), sorted(d2.variables))
for k in d1:
v1 = d1.variables[k]
v2 = d2.variables[k]
self.assertVariableEqual(v1, v2)

def assertDatasetIdentical(self, d1, d2):
# this method is functionally equivalent to `assert d1.identical(d2)`,
# but it checks each aspect of equality separately for easier debugging
self.assertTrue(utils.dict_equal(d1.attrs, d2.attrs))
self.assertEqual(sorted(d1.variables), sorted(d2.variables))
for k in d1:
v1 = d1.variables[k]
v2 = d2.variables[k]
self.assertTrue(v1.identical(v2))

def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08):
self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes))
self.assertEqual(sorted(d1.variables), sorted(d2.variables))
for k in d1:
v1 = d1.variables[k]
v2 = d2.variables[k]
self.assertVariableAllClose(v1, v2, rtol=rtol, atol=atol)

def assertCoordsEqual(self, d1, d2):
self.assertEqual(sorted(d1.coordinates), sorted(d2.coordinates))
for k in d1.coordinates:
v1 = d1.coordinates[k]
v2 = d2.coordinates[k]
self.assertVariableEqual(v1, v2)

def assertDataArrayEqual(self, ar1, ar2):
self.assertVariableEqual(ar1, ar2)
self.assertCoordsEqual(ar1, ar2)

def assertDataArrayIdentical(self, ar1, ar2):
self.assertEqual(ar1.name, ar2.name)
self.assertDatasetEqual(ar1.dataset, ar2.dataset)
self.assertDatasetIdentical(ar1.dataset, ar2.dataset)

def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
self.assertEqual(ar1.name, ar2.name)
self.assertDatasetAllClose(ar1.dataset, ar2.dataset,
rtol=rtol, atol=atol)

def assertDataArrayEquiv(self, ar1, ar2):
self.assertIsInstance(ar1, DataArray)
self.assertIsInstance(ar2, DataArray)
random_name = 'randomly-renamed-variable'
self.assertDataArrayEqual(ar1.rename(random_name),
ar2.rename(random_name))
self.assertVariableAllClose(ar1, ar2, rtol=rtol, atol=atol)
self.assertCoordsEqual(ar1, ar2)


class ReturnItem(object):
Expand Down
20 changes: 9 additions & 11 deletions test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_roundtrip_test_data(self):
def test_roundtrip_string_data(self):
expected = Dataset({'x': ('t', ['abc', 'def'])})
actual = self.roundtrip(expected)
self.assertDatasetEqual(expected, actual)
self.assertDatasetIdentical(expected, actual)

def test_roundtrip_mask_and_scale(self):
decoded = create_masked_and_scaled_data()
Expand All @@ -81,7 +81,7 @@ def test_roundtrip_mask_and_scale(self):
def test_roundtrip_example_1_netcdf(self):
expected = open_example_dataset('example_1.nc')
actual = self.roundtrip(expected)
self.assertDatasetEqual(expected, actual)
self.assertDatasetIdentical(expected, actual)

def test_orthogonal_indexing(self):
in_memory = create_test_data()
Expand All @@ -98,7 +98,7 @@ def test_orthogonal_indexing(self):
def test_pickle(self):
on_disk = open_example_dataset('bears.nc')
unpickled = pickle.loads(pickle.dumps(on_disk))
self.assertDatasetEqual(on_disk, unpickled)
self.assertDatasetIdentical(on_disk, unpickled)


@contextlib.contextmanager
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_mask_and_scale(self):
# now check xray
ds = open_dataset(tmp_file)
expected = create_masked_and_scaled_data()
self.assertDatasetEqual(expected, ds)
self.assertDatasetIdentical(expected, ds)

def test_0dimensional_variable(self):
# This fix verifies our work-around to this netCDF4-python bug:
Expand All @@ -219,7 +219,7 @@ def test_0dimensional_variable(self):

ds = open_dataset(tmp_file)
expected = Dataset({'x': ((), 123)})
self.assertDatasetEqual(expected, ds)
self.assertDatasetIdentical(expected, ds)

def test_variable_len_strings(self):
with create_tmp_file() as tmp_file:
Expand All @@ -234,7 +234,7 @@ def test_variable_len_strings(self):
expected = Dataset({'x': ('x', values)})
for kwargs in [{}, {'decode_cf': True}]:
actual = open_dataset(tmp_file, **kwargs)
self.assertDatasetEqual(expected, actual)
self.assertDatasetIdentical(expected, actual)


@requires_netCDF4
Expand All @@ -251,9 +251,9 @@ def roundtrip(self, data, **kwargs):


def clear_attributes(ds):
ds.attributes.clear()
ds.attrs.clear()
for v in ds.itervalues():
v.attributes.clear()
v.attrs.clear()


@requires_netCDF4
Expand All @@ -263,7 +263,5 @@ def test_cmp_local_file(self):
url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc'
actual = Dataset.load_store(backends.PydapDataStore(url))
expected = open_example_dataset('bears.nc')
# don't check attributes, since pydap decodes the strings improperly
for ds in [actual, expected]:
clear_attributes(ds)
# don't check attributes since pydap doesn't serialize them correctly
self.assertDatasetEqual(actual, expected)
Loading

0 comments on commit 23b2cdd

Please sign in to comment.