Deprecated 'attributes' in favor of 'attrs'

Also: 1. Don't try to preserve attributes under mathematical operations. 2. Finish up some cleanup related to "equals" and "identical" for testing. 3. Options for how strictly to compare varaibles when merging or concatenating (see pydata#25). Fixes pydata#103 and pydata#104.
HertugHelms · Apr 27, 2014 · 23b2cdd · 23b2cdd
1 parent 9744aaf
commit 23b2cdd
Show file tree

Hide file tree

Showing 17 changed files with 339 additions and 350 deletions.
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@ makes many powerful array operations possible:
   - Database like aligment based on coordinate labels that smoothly
     handles missing values: `x, y = xray.align(x, y, join='outer')`.
   - Keep track of arbitrary metadata in the form of a Python dictionary:
-    `x.attributes`.
+    `x.attrs`.
 
 **xray** aims to provide a data analysis toolkit as powerful as
 [pandas][pandas] but designed for working with homogeneous N-dimensional
@@ -103,7 +103,7 @@ several limitations that led us to build xray instead of extending Iris:
      attempts to build all functionality (`Coord` supports a much more
      limited set of functionality). xray has its equivalent of the Cube
      (the `DataArray` object), but under the hood it is only thin wrapper
-     on the more primitive building blocks of Dataset and XArray objects.
+     on the more primitive building blocks of Dataset and Variable objects.
   2. Iris has a strict interpretation of [CF conventions][cf], which,
      although a principled choice, we have found to be impractical for
      everyday uses. With Iris, every quantity has physical (SI) units, all
@@ -145,10 +145,10 @@ labeled numpy arrays that provided some guidance for the design of xray.
     enough. The goal is to be as fast as pandas or raw numpy.
   - Provide a uniform API for loading and saving scientific data in a variety
     of formats (including streaming data).
-  - Understand metadata according to [Climate and Forecast Conventions][cf]
-    when appropriate, but don't strictly enforce them. Conflicting attributes
-    (e.g., units) should be silently dropped instead of causing errors. The
-    onus is on the user to make sure that operations make sense.
+  - Take a pragmatic approach to metadata (attributes), and be very cautious
+    before implementing any functionality that relies on it. Automatically
+    maintaining attributes is a tricky and very hard to get right (see
+    discussion about Iris above).
 
 ## Getting started
 

diff --git a/doc/api.rst b/doc/api.rst
@@ -25,7 +25,7 @@ Attributes and underlying data
    Dataset.coordinates
    Dataset.noncoordinates
    Dataset.dimensions
-   Dataset.attributes
+   Dataset.attrs
 
 Dataset contents
 ~~~~~~~~~~~~~~~~
@@ -112,7 +112,7 @@ Attributes and underlying data
    DataArray.coordinates
    DataArray.name
    DataArray.dataset
-   DataArray.attributes
+   DataArray.attrs
 
 Selecting
 ~~~~~~~~~

diff --git a/test/__init__.py b/test/__init__.py
@@ -1,5 +1,6 @@
 import unittest
 
+import numpy as np
 from numpy.testing import assert_array_equal
 
 from xray import utils, DataArray
@@ -36,12 +37,26 @@ def requires_netCDF4(test):
     return test if has_netCDF4 else unittest.skip('requires netCDF4')(test)
 
 
+def data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08):
+    exact_dtypes = [np.datetime64, np.timedelta64, np.string_]
+    if any(any(np.issubdtype(arr.dtype, t) for t in exact_dtypes)
+           or arr.dtype == object for arr in [arr1, arr2]):
+        return np.array_equal(arr1, arr2)
+    else:
+        return utils.allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol)
+
+
 class TestCase(unittest.TestCase):
     def assertVariableEqual(self, v1, v2):
         self.assertTrue(as_variable(v1).equals(v2))
 
+    def assertVariableIdentical(self, v1, v2):
+        self.assertTrue(as_variable(v1).identical(v2))
+
     def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08):
-        self.assertTrue(utils.variable_allclose(v1, v2, rtol=rtol, atol=atol))
+        self.assertEqual(v1.dimensions, v2.dimensions)
+        self.assertTrue(data_allclose_or_equiv(v1.values, v2.values,
+                                               rtol=rtol, atol=atol))
 
     def assertVariableNotEqual(self, v1, v2):
         self.assertFalse(as_variable(v1).equals(v2))
@@ -52,36 +67,47 @@ def assertArrayEqual(self, a1, a2):
     def assertDatasetEqual(self, d1, d2):
         # this method is functionally equivalent to `assert d1 == d2`, but it
         # checks each aspect of equality separately for easier debugging
-        self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes))
         self.assertEqual(sorted(d1.variables), sorted(d2.variables))
         for k in d1:
             v1 = d1.variables[k]
             v2 = d2.variables[k]
             self.assertVariableEqual(v1, v2)
 
+    def assertDatasetIdentical(self, d1, d2):
+        # this method is functionally equivalent to `assert d1.identical(d2)`,
+        # but it checks each aspect of equality separately for easier debugging
+        self.assertTrue(utils.dict_equal(d1.attrs, d2.attrs))
+        self.assertEqual(sorted(d1.variables), sorted(d2.variables))
+        for k in d1:
+            v1 = d1.variables[k]
+            v2 = d2.variables[k]
+            self.assertTrue(v1.identical(v2))
+
     def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08):
-        self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes))
         self.assertEqual(sorted(d1.variables), sorted(d2.variables))
         for k in d1:
             v1 = d1.variables[k]
             v2 = d2.variables[k]
             self.assertVariableAllClose(v1, v2, rtol=rtol, atol=atol)
 
+    def assertCoordsEqual(self, d1, d2):
+        self.assertEqual(sorted(d1.coordinates), sorted(d2.coordinates))
+        for k in d1.coordinates:
+            v1 = d1.coordinates[k]
+            v2 = d2.coordinates[k]
+            self.assertVariableEqual(v1, v2)
+
     def assertDataArrayEqual(self, ar1, ar2):
+        self.assertVariableEqual(ar1, ar2)
+        self.assertCoordsEqual(ar1, ar2)
+
+    def assertDataArrayIdentical(self, ar1, ar2):
         self.assertEqual(ar1.name, ar2.name)
-        self.assertDatasetEqual(ar1.dataset, ar2.dataset)
+        self.assertDatasetIdentical(ar1.dataset, ar2.dataset)
 
     def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
-        self.assertEqual(ar1.name, ar2.name)
-        self.assertDatasetAllClose(ar1.dataset, ar2.dataset,
-                                   rtol=rtol, atol=atol)
-
-    def assertDataArrayEquiv(self, ar1, ar2):
-        self.assertIsInstance(ar1, DataArray)
-        self.assertIsInstance(ar2, DataArray)
-        random_name = 'randomly-renamed-variable'
-        self.assertDataArrayEqual(ar1.rename(random_name),
-                                  ar2.rename(random_name))
+        self.assertVariableAllClose(ar1, ar2, rtol=rtol, atol=atol)
+        self.assertCoordsEqual(ar1, ar2)
 
 
 class ReturnItem(object):

diff --git a/test/test_backends.py b/test/test_backends.py
@@ -66,7 +66,7 @@ def test_roundtrip_test_data(self):
     def test_roundtrip_string_data(self):
         expected = Dataset({'x': ('t', ['abc', 'def'])})
         actual = self.roundtrip(expected)
-        self.assertDatasetEqual(expected, actual)
+        self.assertDatasetIdentical(expected, actual)
 
     def test_roundtrip_mask_and_scale(self):
         decoded = create_masked_and_scaled_data()
@@ -81,7 +81,7 @@ def test_roundtrip_mask_and_scale(self):
     def test_roundtrip_example_1_netcdf(self):
         expected = open_example_dataset('example_1.nc')
         actual = self.roundtrip(expected)
-        self.assertDatasetEqual(expected, actual)
+        self.assertDatasetIdentical(expected, actual)
 
     def test_orthogonal_indexing(self):
         in_memory = create_test_data()
@@ -98,7 +98,7 @@ def test_orthogonal_indexing(self):
     def test_pickle(self):
         on_disk = open_example_dataset('bears.nc')
         unpickled = pickle.loads(pickle.dumps(on_disk))
-        self.assertDatasetEqual(on_disk, unpickled)
+        self.assertDatasetIdentical(on_disk, unpickled)
 
 
 @contextlib.contextmanager
@@ -206,7 +206,7 @@ def test_mask_and_scale(self):
             # now check xray
             ds = open_dataset(tmp_file)
             expected = create_masked_and_scaled_data()
-            self.assertDatasetEqual(expected, ds)
+            self.assertDatasetIdentical(expected, ds)
 
     def test_0dimensional_variable(self):
         # This fix verifies our work-around to this netCDF4-python bug:
@@ -219,7 +219,7 @@ def test_0dimensional_variable(self):
 
             ds = open_dataset(tmp_file)
             expected = Dataset({'x': ((), 123)})
-            self.assertDatasetEqual(expected, ds)
+            self.assertDatasetIdentical(expected, ds)
 
     def test_variable_len_strings(self):
         with create_tmp_file() as tmp_file:
@@ -234,7 +234,7 @@ def test_variable_len_strings(self):
             expected = Dataset({'x': ('x', values)})
             for kwargs in [{}, {'decode_cf': True}]:
                 actual = open_dataset(tmp_file, **kwargs)
-                self.assertDatasetEqual(expected, actual)
+                self.assertDatasetIdentical(expected, actual)
 
 
 @requires_netCDF4
@@ -251,9 +251,9 @@ def roundtrip(self, data, **kwargs):
 
 
 def clear_attributes(ds):
-    ds.attributes.clear()
+    ds.attrs.clear()
     for v in ds.itervalues():
-        v.attributes.clear()
+        v.attrs.clear()
 
 
 @requires_netCDF4
@@ -263,7 +263,5 @@ def test_cmp_local_file(self):
         url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc'
         actual = Dataset.load_store(backends.PydapDataStore(url))
         expected = open_example_dataset('bears.nc')
-        # don't check attributes, since pydap decodes the strings improperly
-        for ds in [actual, expected]:
-            clear_attributes(ds)
+        # don't check attributes since pydap doesn't serialize them correctly
         self.assertDatasetEqual(actual, expected)