From 23b2cdd4d7f174fc108e7e1c0c44404ae66aca04 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 25 Apr 2014 17:24:59 -0700 Subject: [PATCH] Deprecated 'attributes' in favor of 'attrs' Also: 1. Don't try to preserve attributes under mathematical operations. 2. Finish up some cleanup related to "equals" and "identical" for testing. 3. Options for how strictly to compare varaibles when merging or concatenating (see #25). Fixes #103 and #104. --- README.md | 12 +-- doc/api.rst | 4 +- test/__init__.py | 54 +++++++++---- test/test_backends.py | 20 +++-- test/test_data_array.py | 81 +++++++++---------- test/test_dataset.py | 100 +++++++++++++---------- test/test_variable.py | 166 +++++++++++++++++++------------------- xray/backends/netCDF4_.py | 6 +- xray/backends/pydap_.py | 2 +- xray/backends/scipy_.py | 4 +- xray/common.py | 4 +- xray/conventions.py | 4 +- xray/data_array.py | 12 +-- xray/dataset.py | 121 ++++++++++++++------------- xray/groupby.py | 2 +- xray/utils.py | 23 ------ xray/variable.py | 74 ++++++----------- 17 files changed, 339 insertions(+), 350 deletions(-) diff --git a/README.md b/README.md index 698cd2abb75..5c9d2732a82 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ makes many powerful array operations possible: - Database like aligment based on coordinate labels that smoothly handles missing values: `x, y = xray.align(x, y, join='outer')`. - Keep track of arbitrary metadata in the form of a Python dictionary: - `x.attributes`. + `x.attrs`. **xray** aims to provide a data analysis toolkit as powerful as [pandas][pandas] but designed for working with homogeneous N-dimensional @@ -103,7 +103,7 @@ several limitations that led us to build xray instead of extending Iris: attempts to build all functionality (`Coord` supports a much more limited set of functionality). xray has its equivalent of the Cube (the `DataArray` object), but under the hood it is only thin wrapper - on the more primitive building blocks of Dataset and XArray objects. + on the more primitive building blocks of Dataset and Variable objects. 2. Iris has a strict interpretation of [CF conventions][cf], which, although a principled choice, we have found to be impractical for everyday uses. With Iris, every quantity has physical (SI) units, all @@ -145,10 +145,10 @@ labeled numpy arrays that provided some guidance for the design of xray. enough. The goal is to be as fast as pandas or raw numpy. - Provide a uniform API for loading and saving scientific data in a variety of formats (including streaming data). - - Understand metadata according to [Climate and Forecast Conventions][cf] - when appropriate, but don't strictly enforce them. Conflicting attributes - (e.g., units) should be silently dropped instead of causing errors. The - onus is on the user to make sure that operations make sense. + - Take a pragmatic approach to metadata (attributes), and be very cautious + before implementing any functionality that relies on it. Automatically + maintaining attributes is a tricky and very hard to get right (see + discussion about Iris above). ## Getting started diff --git a/doc/api.rst b/doc/api.rst index e7c42f9a0d9..113e2c01a71 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -25,7 +25,7 @@ Attributes and underlying data Dataset.coordinates Dataset.noncoordinates Dataset.dimensions - Dataset.attributes + Dataset.attrs Dataset contents ~~~~~~~~~~~~~~~~ @@ -112,7 +112,7 @@ Attributes and underlying data DataArray.coordinates DataArray.name DataArray.dataset - DataArray.attributes + DataArray.attrs Selecting ~~~~~~~~~ diff --git a/test/__init__.py b/test/__init__.py index 9b065c8ef36..aa845fe30b0 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,5 +1,6 @@ import unittest +import numpy as np from numpy.testing import assert_array_equal from xray import utils, DataArray @@ -36,12 +37,26 @@ def requires_netCDF4(test): return test if has_netCDF4 else unittest.skip('requires netCDF4')(test) +def data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08): + exact_dtypes = [np.datetime64, np.timedelta64, np.string_] + if any(any(np.issubdtype(arr.dtype, t) for t in exact_dtypes) + or arr.dtype == object for arr in [arr1, arr2]): + return np.array_equal(arr1, arr2) + else: + return utils.allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol) + + class TestCase(unittest.TestCase): def assertVariableEqual(self, v1, v2): self.assertTrue(as_variable(v1).equals(v2)) + def assertVariableIdentical(self, v1, v2): + self.assertTrue(as_variable(v1).identical(v2)) + def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08): - self.assertTrue(utils.variable_allclose(v1, v2, rtol=rtol, atol=atol)) + self.assertEqual(v1.dimensions, v2.dimensions) + self.assertTrue(data_allclose_or_equiv(v1.values, v2.values, + rtol=rtol, atol=atol)) def assertVariableNotEqual(self, v1, v2): self.assertFalse(as_variable(v1).equals(v2)) @@ -52,36 +67,47 @@ def assertArrayEqual(self, a1, a2): def assertDatasetEqual(self, d1, d2): # this method is functionally equivalent to `assert d1 == d2`, but it # checks each aspect of equality separately for easier debugging - self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes)) self.assertEqual(sorted(d1.variables), sorted(d2.variables)) for k in d1: v1 = d1.variables[k] v2 = d2.variables[k] self.assertVariableEqual(v1, v2) + def assertDatasetIdentical(self, d1, d2): + # this method is functionally equivalent to `assert d1.identical(d2)`, + # but it checks each aspect of equality separately for easier debugging + self.assertTrue(utils.dict_equal(d1.attrs, d2.attrs)) + self.assertEqual(sorted(d1.variables), sorted(d2.variables)) + for k in d1: + v1 = d1.variables[k] + v2 = d2.variables[k] + self.assertTrue(v1.identical(v2)) + def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08): - self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes)) self.assertEqual(sorted(d1.variables), sorted(d2.variables)) for k in d1: v1 = d1.variables[k] v2 = d2.variables[k] self.assertVariableAllClose(v1, v2, rtol=rtol, atol=atol) + def assertCoordsEqual(self, d1, d2): + self.assertEqual(sorted(d1.coordinates), sorted(d2.coordinates)) + for k in d1.coordinates: + v1 = d1.coordinates[k] + v2 = d2.coordinates[k] + self.assertVariableEqual(v1, v2) + def assertDataArrayEqual(self, ar1, ar2): + self.assertVariableEqual(ar1, ar2) + self.assertCoordsEqual(ar1, ar2) + + def assertDataArrayIdentical(self, ar1, ar2): self.assertEqual(ar1.name, ar2.name) - self.assertDatasetEqual(ar1.dataset, ar2.dataset) + self.assertDatasetIdentical(ar1.dataset, ar2.dataset) def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08): - self.assertEqual(ar1.name, ar2.name) - self.assertDatasetAllClose(ar1.dataset, ar2.dataset, - rtol=rtol, atol=atol) - - def assertDataArrayEquiv(self, ar1, ar2): - self.assertIsInstance(ar1, DataArray) - self.assertIsInstance(ar2, DataArray) - random_name = 'randomly-renamed-variable' - self.assertDataArrayEqual(ar1.rename(random_name), - ar2.rename(random_name)) + self.assertVariableAllClose(ar1, ar2, rtol=rtol, atol=atol) + self.assertCoordsEqual(ar1, ar2) class ReturnItem(object): diff --git a/test/test_backends.py b/test/test_backends.py index 2bac6ce87d6..9e2c21a378f 100644 --- a/test/test_backends.py +++ b/test/test_backends.py @@ -66,7 +66,7 @@ def test_roundtrip_test_data(self): def test_roundtrip_string_data(self): expected = Dataset({'x': ('t', ['abc', 'def'])}) actual = self.roundtrip(expected) - self.assertDatasetEqual(expected, actual) + self.assertDatasetIdentical(expected, actual) def test_roundtrip_mask_and_scale(self): decoded = create_masked_and_scaled_data() @@ -81,7 +81,7 @@ def test_roundtrip_mask_and_scale(self): def test_roundtrip_example_1_netcdf(self): expected = open_example_dataset('example_1.nc') actual = self.roundtrip(expected) - self.assertDatasetEqual(expected, actual) + self.assertDatasetIdentical(expected, actual) def test_orthogonal_indexing(self): in_memory = create_test_data() @@ -98,7 +98,7 @@ def test_orthogonal_indexing(self): def test_pickle(self): on_disk = open_example_dataset('bears.nc') unpickled = pickle.loads(pickle.dumps(on_disk)) - self.assertDatasetEqual(on_disk, unpickled) + self.assertDatasetIdentical(on_disk, unpickled) @contextlib.contextmanager @@ -206,7 +206,7 @@ def test_mask_and_scale(self): # now check xray ds = open_dataset(tmp_file) expected = create_masked_and_scaled_data() - self.assertDatasetEqual(expected, ds) + self.assertDatasetIdentical(expected, ds) def test_0dimensional_variable(self): # This fix verifies our work-around to this netCDF4-python bug: @@ -219,7 +219,7 @@ def test_0dimensional_variable(self): ds = open_dataset(tmp_file) expected = Dataset({'x': ((), 123)}) - self.assertDatasetEqual(expected, ds) + self.assertDatasetIdentical(expected, ds) def test_variable_len_strings(self): with create_tmp_file() as tmp_file: @@ -234,7 +234,7 @@ def test_variable_len_strings(self): expected = Dataset({'x': ('x', values)}) for kwargs in [{}, {'decode_cf': True}]: actual = open_dataset(tmp_file, **kwargs) - self.assertDatasetEqual(expected, actual) + self.assertDatasetIdentical(expected, actual) @requires_netCDF4 @@ -251,9 +251,9 @@ def roundtrip(self, data, **kwargs): def clear_attributes(ds): - ds.attributes.clear() + ds.attrs.clear() for v in ds.itervalues(): - v.attributes.clear() + v.attrs.clear() @requires_netCDF4 @@ -263,7 +263,5 @@ def test_cmp_local_file(self): url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc' actual = Dataset.load_store(backends.PydapDataStore(url)) expected = open_example_dataset('bears.nc') - # don't check attributes, since pydap decodes the strings improperly - for ds in [actual, expected]: - clear_attributes(ds) + # don't check attributes since pydap doesn't serialize them correctly self.assertDatasetEqual(actual, expected) diff --git a/test/test_data_array.py b/test/test_data_array.py index 596bcf63044..a2c7222ab9a 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -30,8 +30,7 @@ def test_properties(self): self.assertEqual(self.dv.name, 'foo') self.assertVariableEqual(self.dv.variable, self.v) self.assertArrayEqual(self.dv.values, self.v.values) - for attr in ['dimensions', 'dtype', 'shape', 'size', 'ndim', - 'attributes']: + for attr in ['dimensions', 'dtype', 'shape', 'size', 'ndim', 'attrs']: self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr)) self.assertEqual(len(self.dv), len(self.v)) self.assertVariableEqual(self.dv, self.v) @@ -57,7 +56,7 @@ def test_equals_and_identical(self): self.assertFalse(self.dv.identical(da4)) da5 = self.dv.copy() - da5.attributes['foo'] = 'bar' + da5.attrs['foo'] = 'bar' self.assertTrue(self.dv.equals(da5)) self.assertFalse(self.dv.identical(da5)) @@ -77,11 +76,11 @@ def test_equals_and_identical(self): def test_items(self): # strings pull out dataarrays - self.assertDataArrayEqual(self.dv, self.ds['foo']) + self.assertDataArrayIdentical(self.dv, self.ds['foo']) x = self.dv['x'] y = self.dv['y'] - self.assertDataArrayEqual(self.ds['x'], x) - self.assertDataArrayEqual(self.ds['y'], y) + self.assertDataArrayIdentical(self.ds['x'], x) + self.assertDataArrayIdentical(self.ds['y'], y) # integer indexing I = ReturnItem() for i in [I[:], I[...], I[x.values], I[x.variable], I[x], I[x, y], @@ -101,22 +100,22 @@ def test_indexed(self): self.assertEqual(self.dv[0].dataset, self.ds.indexed(x=0)) self.assertEqual(self.dv[:3, :5].dataset, self.ds.indexed(x=slice(3), y=slice(5))) - self.assertDataArrayEqual(self.dv, self.dv.indexed(x=slice(None))) - self.assertDataArrayEqual(self.dv[:3], self.dv.indexed(x=slice(3))) + self.assertDataArrayIdentical(self.dv, self.dv.indexed(x=slice(None))) + self.assertDataArrayIdentical(self.dv[:3], self.dv.indexed(x=slice(3))) def test_labeled(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) - self.assertDataArrayEqual(self.dv, self.dv.labeled(x=slice(None))) - self.assertDataArrayEqual(self.dv[1], self.dv.labeled(x='b')) - self.assertDataArrayEqual(self.dv[:3], self.dv.labeled(x=slice('c'))) + self.assertDataArrayIdentical(self.dv, self.dv.labeled(x=slice(None))) + self.assertDataArrayIdentical(self.dv[1], self.dv.labeled(x='b')) + self.assertDataArrayIdentical(self.dv[:3], self.dv.labeled(x=slice('c'))) def test_loc(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) - self.assertDataArrayEqual(self.dv[:3], self.dv.loc[:'c']) - self.assertDataArrayEqual(self.dv[1], self.dv.loc['b']) - self.assertDataArrayEqual(self.dv[:3], self.dv.loc[['a', 'b', 'c']]) - self.assertDataArrayEqual(self.dv[:3, :4], - self.dv.loc[['a', 'b', 'c'], np.arange(4)]) + self.assertDataArrayIdentical(self.dv[:3], self.dv.loc[:'c']) + self.assertDataArrayIdentical(self.dv[1], self.dv.loc['b']) + self.assertDataArrayIdentical(self.dv[:3], self.dv.loc[['a', 'b', 'c']]) + self.assertDataArrayIdentical(self.dv[:3, :4], + self.dv.loc[['a', 'b', 'c'], np.arange(4)]) self.dv.loc['a':'j'] = 0 self.assertTrue(np.all(self.dv.values == 0)) @@ -131,7 +130,7 @@ def test_rename(self): def test_dataset_getitem(self): dv = self.ds['foo'] - self.assertDataArrayEqual(dv, self.dv) + self.assertDataArrayIdentical(dv, self.dv) def test_array_interface(self): self.assertArrayEqual(np.asarray(self.dv), self.x) @@ -142,10 +141,10 @@ def test_array_interface(self): # test ufuncs expected = deepcopy(self.ds) expected['foo'][:] = np.sin(self.x) - self.assertDataArrayEquiv(expected['foo'], np.sin(self.dv)) - self.assertDataArrayEquiv(self.dv, np.maximum(self.v, self.dv)) + self.assertDataArrayEqual(expected['foo'], np.sin(self.dv)) + self.assertDataArrayEqual(self.dv, np.maximum(self.v, self.dv)) bar = Variable(['x', 'y'], np.zeros((10, 20))) - self.assertDataArrayEquiv(self.dv, np.maximum(self.dv, bar)) + self.assertDataArrayEqual(self.dv, np.maximum(self.dv, bar)) def test_math(self): x = self.x @@ -153,15 +152,15 @@ def test_math(self): a = self.dv # variable math was already tested extensively, so let's just make sure # that all types are properly converted here - self.assertDataArrayEquiv(a, +a) - self.assertDataArrayEquiv(a, a + 0) - self.assertDataArrayEquiv(a, 0 + a) - self.assertDataArrayEquiv(a, a + 0 * v) - self.assertDataArrayEquiv(a, 0 * v + a) - self.assertDataArrayEquiv(a, a + 0 * x) - self.assertDataArrayEquiv(a, 0 * x + a) - self.assertDataArrayEquiv(a, a + 0 * a) - self.assertDataArrayEquiv(a, 0 * a + a) + self.assertDataArrayEqual(a, +a) + self.assertDataArrayEqual(a, a + 0) + self.assertDataArrayEqual(a, 0 + a) + self.assertDataArrayEqual(a, a + 0 * v) + self.assertDataArrayEqual(a, 0 * v + a) + self.assertDataArrayEqual(a, a + 0 * x) + self.assertDataArrayEqual(a, 0 * x + a) + self.assertDataArrayEqual(a, a + 0 * a) + self.assertDataArrayEqual(a, 0 * a + a) # test different indices ds2 = self.ds.update({'x': ('x', 3 + np.arange(10))}, inplace=False) b = ds2['foo'] @@ -180,12 +179,12 @@ def test_dataset_math(self): actual = 2 * obs['tmax'] expected = Dataset({'tmax2': ('x', 2 * (10 + np.arange(5))), 'x': obs['x']})['tmax2'] - self.assertDataArrayEquiv(actual, expected) + self.assertDataArrayEqual(actual, expected) actual = obs['tmax'] - obs['tmin'] expected = Dataset({'trange': ('x', 10 * np.ones(5)), 'x': obs['x']})['trange'] - self.assertDataArrayEquiv(actual, expected) + self.assertDataArrayEqual(actual, expected) sim = Dataset({'tmin': ('x', 1 + np.arange(5)), 'tmax': ('x', 11 + np.arange(5)), @@ -194,7 +193,7 @@ def test_dataset_math(self): actual = sim['tmin'] - obs['tmin'] expected = Dataset({'error': ('x', np.ones(5)), 'x': obs['x']})['error'] - self.assertDataArrayEquiv(actual, expected) + self.assertDataArrayEqual(actual, expected) # in place math shouldn't remove or conflict with other variables actual = deepcopy(sim['tmin']) @@ -202,14 +201,14 @@ def test_dataset_math(self): expected = Dataset({'tmin': ('x', np.ones(5)), 'tmax': sim['tmax'], 'x': sim['x']})['tmin'] - self.assertDataArrayEquiv(actual, expected) + self.assertDataArrayEqual(actual, expected) def test_coord_math(self): ds = Dataset({'x': ('x', 1 + np.arange(3))}) expected = ds.copy() expected['x2'] = ('x', np.arange(3)) actual = ds['x'] - 1 - self.assertDataArrayEquiv(expected['x2'], actual) + self.assertDataArrayEqual(expected['x2'], actual) def test_item_math(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) @@ -255,9 +254,9 @@ def test_groupby_iter(self): for ((act_x, act_dv), (exp_x, exp_ds)) in \ zip(self.dv.groupby('y'), self.ds.groupby('y')): self.assertEqual(exp_x, act_x) - self.assertDataArrayEqual(exp_ds['foo'], act_dv) + self.assertDataArrayIdentical(exp_ds['foo'], act_dv) for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv): - self.assertDataArrayEqual(exp_dv, act_dv) + self.assertDataArrayIdentical(exp_dv, act_dv) def test_groupby(self): agg_var = Variable(['y'], np.array(['a'] * 9 + ['c'] + ['b'] * 10)) @@ -271,7 +270,7 @@ def test_groupby(self): expected = self.dv grouped = self.dv.groupby(g, squeeze=squeeze) actual = grouped.apply(identity, shortcut=shortcut) - self.assertDataArrayEqual(expected, actual) + self.assertDataArrayIdentical(expected, actual) grouped = self.dv.groupby('abc', squeeze=True) expected_sum_all = Dataset( @@ -316,15 +315,15 @@ def test_concat(self): # from iteration: grouped = [g for _, g in foo.groupby('x')] stacked = DataArray.concat(grouped, self.ds['x']) - self.assertDataArrayEqual(foo.select(), stacked) + self.assertDataArrayIdentical(foo.select(), stacked) def test_align(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) with self.assertRaises(ValueError): self.dv + self.dv[:5] dv1, dv2 = align(self.dv, self.dv[:5], join='inner') - self.assertDataArrayEqual(dv1, self.dv[:5]) - self.assertDataArrayEqual(dv2, self.dv[:5]) + self.assertDataArrayIdentical(dv1, self.dv[:5]) + self.assertDataArrayIdentical(dv2, self.dv[:5]) def test_to_and_from_series(self): expected = self.dv.to_dataframe()['foo'] @@ -333,4 +332,4 @@ def test_to_and_from_series(self): self.assertArrayEqual(expected.index.values, actual.index.values) self.assertEqual('foo', actual.name) # test roundtrip - self.assertDataArrayEqual(self.dv, DataArray.from_series(actual)) + self.assertDataArrayIdentical(self.dv, DataArray.from_series(actual)) diff --git a/test/test_dataset.py b/test/test_dataset.py index 4d5e3c2d0d5..87f867cd0c2 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -50,7 +50,7 @@ class InaccessibleVariableDataStore(backends.InMemoryDataStore): def __init__(self): self.dimensions = OrderedDict() self._variables = OrderedDict() - self.attributes = OrderedDict() + self.attrs = OrderedDict() def set_variable(self, name, variable): self._variables[name] = variable @@ -61,7 +61,7 @@ def variables(self): return utils.FrozenOrderedDict( (k, Variable(v.dimensions, InaccessibleArray(v.values), - v.attributes)) + v.attrs)) for k, v in self._variables.iteritems()) @@ -110,7 +110,7 @@ def test_init(self): # verify handling of DataArrays expected = Dataset({'x': var1, 'z': var3}) actual = Dataset({'z': expected['z']}) - self.assertDatasetEqual(expected, actual) + self.assertDatasetIdentical(expected, actual) def test_variable(self): a = Dataset() @@ -159,7 +159,7 @@ def test_equals_and_identical(self): self.assertTrue(data.identical(data)) data2 = create_test_data(seed=42) - data2.attributes['foobar'] = 'baz' + data2.attrs['foobar'] = 'baz' self.assertTrue(data.equals(data2)) self.assertFalse(data.identical(data2)) @@ -182,7 +182,7 @@ def test_indexed(self): # Verify that the data is what we expect for v in data.variables: self.assertEqual(data[v].dimensions, ret[v].dimensions) - self.assertEqual(data[v].attributes, ret[v].attributes) + self.assertEqual(data[v].attrs, ret[v].attrs) slice_list = [slice(None)] * data[v].values.ndim for d, s in slicers.iteritems(): if d in data[v].dimensions: @@ -236,28 +236,28 @@ def test_reindex_like(self): data = create_test_data() expected = data.indexed(dim1=slice(10), time=slice(13)) actual = data.reindex_like(expected) - self.assertDatasetEqual(actual, expected) + self.assertDatasetIdentical(actual, expected) expected = data.copy(deep=True) expected['dim3'] = ('dim3', list('cdefghijkl')) expected['var3'][:-2] = expected['var3'][2:] expected['var3'][-2:] = np.nan actual = data.reindex_like(expected) - self.assertDatasetEqual(actual, expected) + self.assertDatasetIdentical(actual, expected) def test_reindex(self): data = create_test_data() - self.assertDatasetEqual(data, data.reindex()) + self.assertDatasetIdentical(data, data.reindex()) expected = data.indexed(dim1=slice(10)) actual = data.reindex(dim1=data['dim1'][:10]) - self.assertDatasetEqual(actual, expected) + self.assertDatasetIdentical(actual, expected) actual = data.reindex(dim1=data['dim1'][:10].values) - self.assertDatasetEqual(actual, expected) + self.assertDatasetIdentical(actual, expected) actual = data.reindex(dim1=data['dim1'][:10].as_index) - self.assertDatasetEqual(actual, expected) + self.assertDatasetIdentical(actual, expected) def test_align(self): left = create_test_data() @@ -271,28 +271,28 @@ def test_align(self): left2, right2 = align(left, right, join='inner') self.assertArrayEqual(left2['dim3'], intersection) - self.assertDatasetEqual(left2, right2) + self.assertDatasetIdentical(left2, right2) left2, right2 = align(left, right, join='outer') self.assertVariableEqual(left2['dim3'], right2['dim3']) self.assertArrayEqual(left2['dim3'], union) - self.assertDatasetEqual(left2.labeled(dim3=intersection), - right2.labeled(dim3=intersection)) + self.assertDatasetIdentical(left2.labeled(dim3=intersection), + right2.labeled(dim3=intersection)) self.assertTrue(np.isnan(left2['var3'][-2:]).all()) self.assertTrue(np.isnan(right2['var3'][:2]).all()) left2, right2 = align(left, right, join='left') self.assertVariableEqual(left2['dim3'], right2['dim3']) self.assertVariableEqual(left2['dim3'], left['dim3']) - self.assertDatasetEqual(left2.labeled(dim3=intersection), - right2.labeled(dim3=intersection)) + self.assertDatasetIdentical(left2.labeled(dim3=intersection), + right2.labeled(dim3=intersection)) self.assertTrue(np.isnan(right2['var3'][:2]).all()) left2, right2 = align(left, right, join='right') self.assertVariableEqual(left2['dim3'], right2['dim3']) self.assertVariableEqual(left2['dim3'], right['dim3']) - self.assertDatasetEqual(left2.labeled(dim3=intersection), - right2.labeled(dim3=intersection)) + self.assertDatasetIdentical(left2.labeled(dim3=intersection), + right2.labeled(dim3=intersection)) self.assertTrue(np.isnan(left2['var3'][-2:]).all()) def test_variable_indexing(self): @@ -335,7 +335,7 @@ def test_copy(self): data = create_test_data() copied = data.copy(deep=False) - self.assertDatasetEqual(data, copied) + self.assertDatasetIdentical(data, copied) for k in data: v0 = data.variables[k] v1 = copied.variables[k] @@ -344,7 +344,7 @@ def test_copy(self): self.assertNotIn('foo', data) copied = data.copy(deep=True) - self.assertDatasetEqual(data, copied) + self.assertDatasetIdentical(data, copied) for k in data: v0 = data.variables[k] v1 = copied.variables[k] @@ -365,8 +365,8 @@ def test_rename(self): if name in dims: dims[dims.index(name)] = newname - self.assertVariableEqual(Variable(dims, v.values, v.attributes), - renamed.variables[k]) + self.assertVariableEqual(Variable(dims, v.values, v.attrs), + renamed.variables[k]) self.assertEqual(v.encoding, renamed.variables[k].encoding) self.assertEqual(type(v), type(renamed.variables[k])) @@ -390,7 +390,7 @@ def test_update(self): expected_vars = OrderedDict(create_test_data(seed=0).variables) expected_vars['var2'] = var2 expected = Dataset(expected_vars) - self.assertDatasetEqual(expected, actual) + self.assertDatasetIdentical(expected, actual) # test in-place data2 = data.update(data, inplace=True) self.assertIs(data2, data) @@ -430,7 +430,7 @@ def test_virtual_variables(self): # test slicing the virtual variable -- it should still be virtual actual = data['time.dayofyear'][:10].dataset expected = data.indexed(time=slice(10)) - self.assertDatasetEqual(expected, actual) + self.assertDatasetIdentical(expected, actual) @unittest.expectedFailure def test_slice_virtual_variable(self): @@ -446,12 +446,12 @@ def test_setitem(self): data1['A'] = var data2 = data1.copy() data2['A'] = var - self.assertDatasetEqual(data1, data2) + self.assertDatasetIdentical(data1, data2) # assign a dataset array dv = 2 * data2['A'] data1['B'] = dv.variable data2['B'] = dv - self.assertDatasetEqual(data1, data2) + self.assertDatasetIdentical(data1, data2) # assign an array with self.assertRaisesRegexp(TypeError, 'variables must be of type'): data2['C'] = var.values @@ -478,7 +478,7 @@ def get_args(v): return [set(args[0]) & set(v.dimensions)] if args else [] expected = Dataset({k: v.squeeze(*get_args(v)) for k, v in data.variables.iteritems()}) - self.assertDatasetEqual(expected, data.squeeze(*args)) + self.assertDatasetIdentical(expected, data.squeeze(*args)) # invalid squeeze with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'): data.squeeze('y') @@ -517,30 +517,32 @@ def test_concat(self): split_data = [data.indexed(dim1=slice(10)), data.indexed(dim1=slice(10, None))] - self.assertDatasetEqual(data, Dataset.concat(split_data, 'dim1')) + self.assertDatasetIdentical(data, Dataset.concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset({k: v.transpose(*data[k].dimensions) for k, v in dataset.variables.iteritems()}, - dataset.attributes) + dataset.attrs) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] - self.assertDatasetEqual(data, Dataset.concat(datasets, dim)) - self.assertDatasetEqual(data, Dataset.concat(datasets, data[dim])) - self.assertDatasetEqual(data, Dataset.concat(datasets, data[dim], - mode='minimal')) + self.assertDatasetIdentical(data, Dataset.concat(datasets, dim)) + self.assertDatasetIdentical( + data, Dataset.concat(datasets, data[dim])) + self.assertDatasetIdentical( + data, Dataset.concat(datasets, data[dim], mode='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [k for k, v in data.variables.iteritems() if dim in v.dimensions and k != dim] - actual = Dataset.concat(datasets, data[dim], concat_over=concat_over) - self.assertDatasetEqual(data, rectify_dim_order(actual)) + actual = Dataset.concat(datasets, data[dim], + concat_over=concat_over) + self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = Dataset.concat(datasets, data[dim], mode='different') - self.assertDatasetEqual(data, rectify_dim_order(actual)) + self.assertDatasetIdentical(data, rectify_dim_order(actual)) # Now add a new variable that doesn't depend on any of the current # dims and make sure the mode argument behaves as expected @@ -563,21 +565,33 @@ def rectify_dim_order(dataset): datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dimension - self.assertDatasetEqual(expected, Dataset.concat(datasets, dimension)) + self.assertDatasetIdentical( + expected, Dataset.concat(datasets, dimension)) + + # TODO: factor this into several distinct tests + data = create_test_data() + split_data = [data.indexed(dim1=slice(10)), + data.indexed(dim1=slice(10, None))] with self.assertRaisesRegexp(ValueError, 'must supply at least one'): Dataset.concat([], 'dim1') + with self.assertRaisesRegexp(ValueError, 'not all elements in'): Dataset.concat(split_data, 'dim1', concat_over=['not_found']) + with self.assertRaisesRegexp(ValueError, 'global attributes not'): data0, data1 = deepcopy(split_data) - data1.attributes['foo'] = 'bar' - Dataset.concat([data0, data1], 'dim1') + data1.attrs['foo'] = 'bar' + Dataset.concat([data0, data1], 'dim1', compat='identical') + self.assertDatasetIdentical( + data, Dataset.concat([data0, data1], 'dim1', compat='equals')) + with self.assertRaisesRegexp(ValueError, 'encountered unexpected'): data0, data1 = deepcopy(split_data) data1['foo'] = ('bar', np.random.randn(10)) Dataset.concat([data0, data1], 'dim1') - with self.assertRaisesRegexp(ValueError, 'identical across datasets'): + + with self.assertRaisesRegexp(ValueError, 'not equal across datasets'): data0, data1 = deepcopy(split_data) data1['dim2'] = 2 * data1['dim2'] Dataset.concat([data0, data1], 'dim1') @@ -594,7 +608,7 @@ def test_to_and_from_dataframe(self): self.assertTrue(expected.equals(actual)) # check roundtrip - self.assertDatasetEqual(ds, Dataset.from_dataframe(actual)) + self.assertDatasetIdentical(ds, Dataset.from_dataframe(actual)) # test a case with a MultiIndex w = np.random.randn(2, 3) @@ -608,12 +622,12 @@ def test_to_and_from_dataframe(self): self.assertTrue(expected.equals(actual)) # check roundtrip - self.assertDatasetEqual(ds, Dataset.from_dataframe(actual)) + self.assertDatasetIdentical(ds, Dataset.from_dataframe(actual)) def test_pickle(self): data = create_test_data() roundtripped = pickle.loads(pickle.dumps(data)) - self.assertDatasetEqual(data, roundtripped) + self.assertDatasetIdentical(data, roundtripped) def test_lazy_load(self): store = InaccessibleVariableDataStore() diff --git a/test/test_variable.py b/test/test_variable.py index 6d227c818d1..e593212d5cf 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -24,7 +24,7 @@ def test_properties(self): self.assertEqual(v.size, 10) self.assertEqual(v.ndim, 1) self.assertEqual(len(v), 10) - self.assertEqual(v.attributes, {'foo': u'bar'}) + self.assertEqual(v.attrs, {'foo': u'bar'}) def test_0d_data(self): d = datetime(2000, 1, 1) @@ -52,7 +52,7 @@ def test_0d_data(self): def test_pandas_data(self): v = self.cls(['x'], pd.Series([0, 1, 2], index=[3, 2, 1])) - self.assertVariableEqual(v, v[[0, 1, 2]]) + self.assertVariableIdentical(v, v[[0, 1, 2]]) v = self.cls(['x'], pd.Index([0, 1, 2])) self.assertEqual(v[0].values, v.values[0]) @@ -61,13 +61,13 @@ def test_1d_math(self): y = np.ones(5) v = self.cls(['x'], x) # unary ops - self.assertVariableEqual(v, +v) - self.assertVariableEqual(v, abs(v)) + self.assertVariableIdentical(v, +v) + self.assertVariableIdentical(v, abs(v)) self.assertArrayEqual((-v).values, -x) # bianry ops with numbers - self.assertVariableEqual(v, v + 0) - self.assertVariableEqual(v, 0 + v) - self.assertVariableEqual(v, v * 1) + self.assertVariableIdentical(v, v + 0) + self.assertVariableIdentical(v, 0 + v) + self.assertVariableIdentical(v, v * 1) self.assertArrayEqual((v > 2).values, x > 2) self.assertArrayEqual((0 == v).values, 0 == x) self.assertArrayEqual((v - 1).values, x - 1) @@ -77,15 +77,13 @@ def test_1d_math(self): self.assertArrayEqual((x * v).values, x ** 2) self.assertArrayEqual(v - y, v - 1) self.assertArrayEqual(y - v, 1 - v) - # verify math-safe attributes + # verify attributes are dropped v2 = self.cls(['x'], x, {'units': 'meters'}) - self.assertVariableEqual(v, +v2) - v3 = self.cls(['x'], x, {'something': 'else'}) - self.assertVariableEqual(v3, +v3) + self.assertVariableIdentical(v, +v2) # binary ops with all variables self.assertArrayEqual(v + v, 2 * v) w = self.cls(['x'], y, {'foo': 'bar'}) - self.assertVariableEqual(v + w, self.cls(['x'], x + y)) + self.assertVariableIdentical(v + w, self.cls(['x'], x + y)) self.assertArrayEqual((v * w).values, x * y) # something complicated self.assertArrayEqual((v ** 2 * w - 1 + x).values, x ** 2 * y - 1 + x) @@ -104,8 +102,8 @@ def test_1d_reduce(self): x = np.arange(5) v = self.cls(['x'], x) actual = v.sum() - expected = Variable((), 10, {'cell_methods': 'x: sum'}) - self.assertVariableEqual(expected, actual) + expected = Variable((), 10) + self.assertVariableIdentical(expected, actual) self.assertIs(type(actual), Variable) def test_array_interface(self): @@ -114,10 +112,10 @@ def test_array_interface(self): self.assertArrayEqual(np.asarray(v), x) # test patched in methods self.assertArrayEqual(v.astype(float), x.astype(float)) - self.assertVariableEqual(v.argsort(), v) - self.assertVariableEqual(v.clip(2, 3), self.cls('x', x.clip(2, 3))) + self.assertVariableIdentical(v.argsort(), v) + self.assertVariableIdentical(v.clip(2, 3), self.cls('x', x.clip(2, 3))) # test ufuncs - self.assertVariableEqual(np.sin(v), self.cls(['x'], np.sin(x))) + self.assertVariableIdentical(np.sin(v), self.cls(['x'], np.sin(x))) self.assertIsInstance(np.sin(v), Variable) self.assertNotIsInstance(np.sin(v), Coordinate) @@ -126,11 +124,11 @@ def test_concat(self): y = np.ones(5) v = self.cls(['a'], x) w = self.cls(['a'], y) - self.assertVariableEqual(Variable(['b', 'a'], np.array([x, y])), + self.assertVariableIdentical(Variable(['b', 'a'], np.array([x, y])), Variable.concat([v, w], 'b')) - self.assertVariableEqual(Variable(['b', 'a'], np.array([x, y])), + self.assertVariableIdentical(Variable(['b', 'a'], np.array([x, y])), Variable.concat((v, w), 'b')) - self.assertVariableEqual(Variable(['b', 'a'], np.array([x, y])), + self.assertVariableIdentical(Variable(['b', 'a'], np.array([x, y])), Variable.concat((v, w), 'b', length=2)) with self.assertRaisesRegexp(ValueError, 'actual length'): Variable.concat([v, w], 'b', length=1) @@ -140,12 +138,12 @@ def test_concat(self): Variable.concat([v, Variable(['c'], y)], 'b') # test concatenating along a dimension v = Variable(['time', 'x'], np.random.random((10, 8))) - self.assertVariableEqual(v, Variable.concat([v[:5], v[5:]], 'time')) - self.assertVariableEqual(v, Variable.concat([v[:5], v[5], v[6:]], 'time')) - self.assertVariableEqual(v, Variable.concat([v[0], v[1:]], 'time')) + self.assertVariableIdentical(v, Variable.concat([v[:5], v[5:]], 'time')) + self.assertVariableIdentical(v, Variable.concat([v[:5], v[5], v[6:]], 'time')) + self.assertVariableIdentical(v, Variable.concat([v[0], v[1:]], 'time')) # test dimension order - self.assertVariableEqual(v, Variable.concat([v[:, :5], v[:, 5:]], 'x')) - self.assertVariableEqual(v.transpose(), + self.assertVariableIdentical(v, Variable.concat([v[:, :5], v[:, 5:]], 'x')) + self.assertVariableIdentical(v.transpose(), Variable.concat([v[:, 0], v[:, 1:]], 'x')) def test_copy(self): @@ -153,7 +151,7 @@ def test_copy(self): for deep in [True, False]: w = v.copy(deep=deep) self.assertIs(type(v), type(w)) - self.assertVariableEqual(v, w) + self.assertVariableIdentical(v, w) self.assertEqual(v.dtype, w.dtype) if self.cls is Variable: if deep: @@ -202,20 +200,20 @@ def test_as_variable(self): data = np.arange(10) expected = Variable('x', data) - self.assertVariableEqual(expected, as_variable(expected)) + self.assertVariableIdentical(expected, as_variable(expected)) ds = Dataset({'x': expected}) - self.assertVariableEqual(expected, as_variable(ds['x'])) + self.assertVariableIdentical(expected, as_variable(ds['x'])) self.assertNotIsInstance(ds['x'], Variable) self.assertIsInstance(as_variable(ds['x']), Variable) self.assertIsInstance(as_variable(ds['x'], strict=False), DataArray) FakeVariable = namedtuple('FakeVariable', 'values dimensions') fake_xarray = FakeVariable(expected.values, expected.dimensions) - self.assertVariableEqual(expected, as_variable(fake_xarray)) + self.assertVariableIdentical(expected, as_variable(fake_xarray)) xarray_tuple = (expected.dimensions, expected.values) - self.assertVariableEqual(expected, as_variable(xarray_tuple)) + self.assertVariableIdentical(expected, as_variable(xarray_tuple)) with self.assertRaisesRegexp(TypeError, 'cannot convert numpy'): as_variable(data) @@ -238,24 +236,25 @@ def test_items(self): data = np.random.random((10, 11)) v = Variable(['x', 'y'], data) # test slicing - self.assertVariableEqual(v, v[:]) - self.assertVariableEqual(v, v[...]) - self.assertVariableEqual(Variable(['y'], data[0]), v[0]) - self.assertVariableEqual(Variable(['x'], data[:, 0]), v[:, 0]) - self.assertVariableEqual(Variable(['x', 'y'], data[:3, :2]), v[:3, :2]) + self.assertVariableIdentical(v, v[:]) + self.assertVariableIdentical(v, v[...]) + self.assertVariableIdentical(Variable(['y'], data[0]), v[0]) + self.assertVariableIdentical(Variable(['x'], data[:, 0]), v[:, 0]) + self.assertVariableIdentical(Variable(['x', 'y'], data[:3, :2]), + v[:3, :2]) # test array indexing x = Variable(['x'], np.arange(10)) y = Variable(['y'], np.arange(11)) - self.assertVariableEqual(v, v[x.values]) - self.assertVariableEqual(v, v[x]) - self.assertVariableEqual(v[:3], v[x < 3]) - self.assertVariableEqual(v[:, 3:], v[:, y >= 3]) - self.assertVariableEqual(v[:3, 3:], v[x < 3, y >= 3]) - self.assertVariableEqual(v[:3, :2], v[x[:3], y[:2]]) - self.assertVariableEqual(v[:3, :2], v[range(3), range(2)]) + self.assertVariableIdentical(v, v[x.values]) + self.assertVariableIdentical(v, v[x]) + self.assertVariableIdentical(v[:3], v[x < 3]) + self.assertVariableIdentical(v[:, 3:], v[:, y >= 3]) + self.assertVariableIdentical(v[:3, 3:], v[x < 3, y >= 3]) + self.assertVariableIdentical(v[:3, :2], v[x[:3], y[:2]]) + self.assertVariableIdentical(v[:3, :2], v[range(3), range(2)]) # test iteration for n, item in enumerate(v): - self.assertVariableEqual(Variable(['y'], data[n]), item) + self.assertVariableIdentical(Variable(['y'], data[n]), item) with self.assertRaisesRegexp(TypeError, 'iteration over a 0-d'): iter(Variable([], 0)) # test setting @@ -264,38 +263,38 @@ def test_items(self): def test_indexed(self): v = Variable(['time', 'x'], self.d) - self.assertVariableEqual(v.indexed(time=slice(None)), v) - self.assertVariableEqual(v.indexed(time=0), v[0]) - self.assertVariableEqual(v.indexed(time=slice(0, 3)), v[:3]) - self.assertVariableEqual(v.indexed(x=0), v[:, 0]) + self.assertVariableIdentical(v.indexed(time=slice(None)), v) + self.assertVariableIdentical(v.indexed(time=0), v[0]) + self.assertVariableIdentical(v.indexed(time=slice(0, 3)), v[:3]) + self.assertVariableIdentical(v.indexed(x=0), v[:, 0]) with self.assertRaisesRegexp(ValueError, 'do not exist'): v.indexed(not_a_dim=0) def test_transpose(self): v = Variable(['time', 'x'], self.d) v2 = Variable(['x', 'time'], self.d.T) - self.assertVariableEqual(v, v2.transpose()) - self.assertVariableEqual(v.transpose(), v.T) + self.assertVariableIdentical(v, v2.transpose()) + self.assertVariableIdentical(v.transpose(), v.T) x = np.random.randn(2, 3, 4, 5) w = Variable(['a', 'b', 'c', 'd'], x) w2 = Variable(['d', 'b', 'c', 'a'], np.einsum('abcd->dbca', x)) self.assertEqual(w2.shape, (5, 3, 4, 2)) - self.assertVariableEqual(w2, w.transpose('d', 'b', 'c', 'a')) - self.assertVariableEqual(w, w2.transpose('a', 'b', 'c', 'd')) + self.assertVariableIdentical(w2, w.transpose('d', 'b', 'c', 'a')) + self.assertVariableIdentical(w, w2.transpose('a', 'b', 'c', 'd')) w3 = Variable(['b', 'c', 'd', 'a'], np.einsum('abcd->bcda', x)) - self.assertVariableEqual(w, w3.transpose('a', 'b', 'c', 'd')) + self.assertVariableIdentical(w, w3.transpose('a', 'b', 'c', 'd')) def test_squeeze(self): v = Variable(['x', 'y'], [[1]]) - self.assertVariableEqual(Variable([], 1), v.squeeze()) - self.assertVariableEqual(Variable(['y'], [1]), v.squeeze('x')) - self.assertVariableEqual(Variable(['y'], [1]), v.squeeze(['x'])) - self.assertVariableEqual(Variable(['x'], [1]), v.squeeze('y')) - self.assertVariableEqual(Variable([], 1), v.squeeze(['x', 'y'])) + self.assertVariableIdentical(Variable([], 1), v.squeeze()) + self.assertVariableIdentical(Variable(['y'], [1]), v.squeeze('x')) + self.assertVariableIdentical(Variable(['y'], [1]), v.squeeze(['x'])) + self.assertVariableIdentical(Variable(['x'], [1]), v.squeeze('y')) + self.assertVariableIdentical(Variable([], 1), v.squeeze(['x', 'y'])) v = Variable(['x', 'y'], [[1, 2]]) - self.assertVariableEqual(Variable(['y'], [1, 2]), v.squeeze()) - self.assertVariableEqual(Variable(['y'], [1, 2]), v.squeeze('x')) + self.assertVariableIdentical(Variable(['y'], [1, 2]), v.squeeze()) + self.assertVariableIdentical(Variable(['y'], [1, 2]), v.squeeze('x')) with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'): v.squeeze('y') @@ -312,30 +311,30 @@ def test_broadcasting_math(self): x = np.random.randn(2, 3) v = Variable(['a', 'b'], x) # 1d to 2d broadcasting - self.assertVariableEqual( + self.assertVariableIdentical( v * v, Variable(['a', 'b'], np.einsum('ab,ab->ab', x, x))) - self.assertVariableEqual( + self.assertVariableIdentical( v * v[0], Variable(['a', 'b'], np.einsum('ab,b->ab', x, x[0]))) - self.assertVariableEqual( + self.assertVariableIdentical( v[0] * v, Variable(['b', 'a'], np.einsum('b,ab->ba', x[0], x))) - self.assertVariableEqual( + self.assertVariableIdentical( v[0] * v[:, 0], Variable(['b', 'a'], np.einsum('b,a->ba', x[0], x[:, 0]))) # higher dim broadcasting y = np.random.randn(3, 4, 5) w = Variable(['b', 'c', 'd'], y) - self.assertVariableEqual( + self.assertVariableIdentical( v * w, Variable(['a', 'b', 'c', 'd'], - np.einsum('ab,bcd->abcd', x, y))) - self.assertVariableEqual( + np.einsum('ab,bcd->abcd', x, y))) + self.assertVariableIdentical( w * v, Variable(['b', 'c', 'd', 'a'], - np.einsum('bcd,ab->bcda', y, x))) - self.assertVariableEqual( + np.einsum('bcd,ab->bcda', y, x))) + self.assertVariableIdentical( v * w[0], Variable(['a', 'b', 'c', 'd'], - np.einsum('ab,cd->abcd', x, y[0]))) + np.einsum('ab,cd->abcd', x, y[0]))) def test_broadcasting_failures(self): a = Variable(['x'], np.arange(10)) @@ -357,22 +356,19 @@ def test_inplace_math(self): self.assertArrayEqual(v.values, np.arange(5) + 1) def test_reduce(self): - v = Variable(['x', 'y'], self.d) - self.assertVariableEqual(v.reduce(np.std, 'x'), - Variable(['y'], self.d.std(axis=0), - {'cell_methods': 'x: std'})) - self.assertVariableEqual(v.reduce(np.std, axis=0), - v.reduce(np.std, dimension='x')) - self.assertVariableEqual(v.reduce(np.std, ['y', 'x']), - Variable([], self.d.std(axis=(0, 1)), - {'cell_methods': 'x: y: std'})) - self.assertVariableEqual(v.reduce(np.std), - Variable([], self.d.std(), - {'cell_methods': 'x: y: std'})) - self.assertVariableEqual(v.reduce(np.mean, 'x').reduce(np.std, 'y'), - Variable([], self.d.mean(axis=0).std(), - {'cell_methods': 'x: mean y: std'})) - self.assertVariableEqual(v.mean('x'), v.reduce(np.mean, 'x')) + v = Variable(['x', 'y'], self.d, {'ignored': 'attributes'}) + self.assertVariableIdentical(v.reduce(np.std, 'x'), + Variable(['y'], self.d.std(axis=0))) + self.assertVariableIdentical(v.reduce(np.std, axis=0), + v.reduce(np.std, dimension='x')) + self.assertVariableIdentical(v.reduce(np.std, ['y', 'x']), + Variable([], self.d.std(axis=(0, 1)))) + self.assertVariableIdentical(v.reduce(np.std), + Variable([], self.d.std())) + self.assertVariableIdentical( + v.reduce(np.mean, 'x').reduce(np.std, 'y'), + Variable([], self.d.mean(axis=0).std())) + self.assertVariableIdentical(v.mean('x'), v.reduce(np.mean, 'x')) class TestCoordinate(TestCase, VariableSubclassTestCases): diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index e4ff9c88bf6..ff15de9f038 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -82,7 +82,7 @@ def convert_variable(var): for k, v in self.ds.variables.iteritems()) @property - def attributes(self): + def attrs(self): return FrozenOrderedDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs()) @@ -100,7 +100,7 @@ def set_attribute(self, key, value): def set_variable(self, name, variable): variable = encode_cf_variable(variable) self.set_necessary_dimensions(variable) - fill_value = variable.attributes.pop('_FillValue', None) + fill_value = variable.attrs.pop('_FillValue', None) encoding = variable.encoding self.ds.createVariable( varname=name, @@ -121,7 +121,7 @@ def set_variable(self, name, variable): nc4_var[:] = variable.values else: nc4_var[:] = variable.values[:] - nc4_var.setncatts(variable.attributes) + nc4_var.setncatts(variable.attrs) def del_attribute(self, key): self.ds.delncattr(key) diff --git a/xray/backends/pydap_.py b/xray/backends/pydap_.py index 68c01fe2201..1c258e19c7c 100644 --- a/xray/backends/pydap_.py +++ b/xray/backends/pydap_.py @@ -51,5 +51,5 @@ def variables(self): for k, v in self.ds.iteritems()) @property - def attributes(self): + def attrs(self): return Frozen(self.ds.attributes) diff --git a/xray/backends/scipy_.py b/xray/backends/scipy_.py index c7c429b719e..29b2f4a84fc 100644 --- a/xray/backends/scipy_.py +++ b/xray/backends/scipy_.py @@ -43,7 +43,7 @@ def variables(self): for k, v in self.ds.variables.iteritems()) @property - def attributes(self): + def attrs(self): return Frozen(self.ds._attributes) @property @@ -83,7 +83,7 @@ def set_variable(self, name, variable): scipy_var.assignValue(data) else: scipy_var[:] = data[:] - for k, v in variable.attributes.iteritems(): + for k, v in variable.attrs.iteritems(): self._validate_attr_key(k) setattr(scipy_var, k, self._cast_attr_value(v)) diff --git a/xray/common.py b/xray/common.py index 9eb4b62301e..cd1ad03dbd0 100644 --- a/xray/common.py +++ b/xray/common.py @@ -101,9 +101,9 @@ def _get_axis_num(self, dim): def _summarize_attributes(data): - if data.attributes: + if data.attrs: attr_summary = '\n'.join(' %s: %s' % (k, v) for k, v - in data.attributes.iteritems()) + in data.attrs.iteritems()) else: attr_summary = ' Empty' return attr_summary diff --git a/xray/conventions.py b/xray/conventions.py index cc88dbf809f..95f5ecb6068 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -400,7 +400,7 @@ def encode_cf_variable(var): """ dimensions = var.dimensions data = var.values - attributes = var.attributes.copy() + attributes = var.attrs.copy() encoding = var.encoding.copy() if (np.issubdtype(data.dtype, np.datetime64) @@ -465,7 +465,7 @@ def decode_cf_variable(var, mask_and_scale=True): # use _data instead of data so as not to trigger loading data data = var._data dimensions = var.dimensions - attributes = var.attributes.copy() + attributes = var.attrs.copy() encoding = var.encoding.copy() def pop_to(source, dest, k): diff --git a/xray/data_array.py b/xray/data_array.py index 717b368b375..7bfafc01348 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -54,7 +54,7 @@ class DataArray(AbstractArray): dimensions (known in numpy as "broadcasting") based on dimension names, regardless of their original order. - Keep track of arbitrary metadata in the form of a Python dictionary: - ``x.attributes`` + ``x.attrs`` - Convert to a pandas Series: ``x.to_series()``. Getting items from or doing mathematical operations with a DataArray @@ -220,19 +220,21 @@ def loc(self): @property def attributes(self): """Dictionary storing arbitrary metadata with this array.""" - return self.variable.attributes + utils.alias_warning('attributes', 'attrs', 3) + return self.variable.attrs @attributes.setter def attributes(self, value): - self.variable.attributes = value + utils.alias_warning('attributes', 'attrs', 3) + self.variable.attrs = value @property def attrs(self): - return self.variable.attributes + return self.variable.attrs @attrs.setter def attrs(self, value): - self.variable.attributes = value + self.variable.attrs = value @property def encoding(self): diff --git a/xray/dataset.py b/xray/dataset.py index 91b0ab604b3..22d7927b91d 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -176,7 +176,7 @@ def _calculate_dimensions(variables): return dimensions -def _get_dataset_vars_and_attr(obj): +def _get_dataset_vars_and_attrs(obj): """Returns the variables and attributes associated with a dataset Like `as_dataset`, handles DataArrays, Datasets and dictionaries of @@ -187,10 +187,16 @@ def _get_dataset_vars_and_attr(obj): if hasattr(obj, 'dataset'): obj = obj.dataset variables = getattr(obj, 'variables', obj) - attributes = getattr(obj, 'attributes', {}) + attributes = getattr(obj, 'attrs', {}) return variables, attributes +def _assert_compat_valid(compat): + if compat not in ['equals', 'identical']: + raise ValueError("compat=%r invalid: must be 'equals' or " + "'identical'" % compat) + + def as_dataset(obj): """Cast the given object to a Dataset. @@ -283,7 +289,7 @@ def load_store(cls, store, decode_cf=True): # time variables to datetime indices. variables = OrderedDict((k, conventions.decode_cf_variable(v)) for k, v in variables.iteritems()) - return cls(variables, store.attributes) + return cls(variables, store.attrs) @property def variables(self): @@ -302,21 +308,23 @@ def variables(self): @property def attributes(self): - """Dictionary of global attributes on this dataset - """ + utils.alias_warning('attributes', 'attrs', 3) return self._attributes @attributes.setter def attributes(self, value): + utils.alias_warning('attributes', 'attrs', 3) self._attributes = OrderedDict(value) @property def attrs(self): + """Dictionary of global attributes on this dataset + """ return self._attributes @attrs.setter def attrs(self, value): - self.attributes = value + self._attributes = OrderedDict(value) @property def dimensions(self): @@ -339,7 +347,7 @@ def copy(self, deep=False): for k, v in self.variables.iteritems()) else: variables = self.variables - return type(self)(variables, self.attributes) + return type(self)(variables, self.attrs) def __copy__(self): return self.copy(deep=False) @@ -443,7 +451,7 @@ def identical(self, other): the same global attributes. """ try: - return (utils.dict_equal(self.attributes, other.attributes) + return (utils.dict_equal(self.attrs, other.attrs) and len(self) == len(other) and all(k1 == k2 and v1.identical(v2) for (k1, v1), (k2, v2) @@ -469,7 +477,7 @@ def noncoordinates(self): def dump_to_store(self, store): """Store dataset contents to a backends.*DataStore object.""" store.set_variables(self.variables) - store.set_attributes(self.attributes) + store.set_attributes(self.attrs) store.sync() def to_netcdf(self, filepath, **kwdargs): @@ -535,7 +543,7 @@ def indexed(self, **indexers): var_indexers = {k: v for k, v in indexers.iteritems() if k in var.dimensions} variables[name] = var.indexed(**var_indexers) - return type(self)(variables, self.attributes) + return type(self)(variables, self.attrs) indexed_by = utils.function_alias(indexed, 'indexed_by') @@ -700,8 +708,7 @@ def get_fill_value_and_dtype(dtype): if not (hasattr(new_var, 'dimensions') and hasattr(new_var, 'values')): new_var = variable.Coordinate(var.dimensions, new_var, - var.attributes, - var.encoding) + var.attrs, var.encoding) elif copy: new_var = variable.as_variable(new_var).copy() else: @@ -717,7 +724,7 @@ def get_fill_value_and_dtype(dtype): data[:] = fill_value # create a new Variable so we can use orthogonal indexing new_var = variable.Variable( - var.dimensions, data, var.attributes) + var.dimensions, data, var.attrs) new_var[assign_to] = var[assign_from].values elif any_not_full_slices(assign_from): # type coercion is not necessary as there are no missing @@ -729,7 +736,7 @@ def get_fill_value_and_dtype(dtype): # we neither created a new ndarray nor used fancy indexing new_var = var.copy() if copy else var variables[name] = new_var - return type(self)(variables, self.attributes) + return type(self)(variables, self.attrs) def rename(self, name_dict): """Returns a new object with renamed variables and dimensions. @@ -756,7 +763,7 @@ def rename(self, name_dict): var = v.copy(deep=False) var.dimensions = dims variables[name] = var - return type(self)(variables, self.attributes) + return type(self)(variables, self.attrs) def update(self, other, inplace=True): """Update this dataset's variables and attributes with those from @@ -781,21 +788,22 @@ def update(self, other, inplace=True): If any dimensions would inconsistent sizes between different variables in the updated dataset. """ - other_variables, other_attributes = _get_dataset_vars_and_attr(other) + other_variables, other_attrs = _get_dataset_vars_and_attrs(other) new_variables = _expand_variables(other_variables) obj = self if inplace else self.copy() obj._update_vars_and_dims(new_variables, needs_copy=inplace) - obj.attributes.update(other_attributes) + obj.attrs.update(other_attrs) return obj - def merge(self, other, inplace=False, overwrite_vars=None, - attribute_conflicts='ignore'): - """Merge two datasets into a single new dataset. + def merge(self, other, inplace=False, overwrite_vars=set(), + compat='equals'): + """Merge the variables of two datasets into a single new dataset. - This method generally not allow for overriding data. Variables and - dimensions are checked for conflicts. However, conflicting attributes - are removed. + This method generally not allow for overriding data, with the exception + of attributes, which are ignored on the second dataset. Variables with + the same name are checked for conflicts via the equals or identical + methods. Parameters ---------- @@ -804,12 +812,13 @@ def merge(self, other, inplace=False, overwrite_vars=None, inplace : bool, optional If True, merge the other dataset into this dataset in-place. Otherwise, return a new dataset object. - overwrite_vars : list, optional - If provided, update variables of these names without checking for + overwrite_vars : str or sequence, optional + If provided, update variables of these name(s) without checking for conflicts in this dataset. - attribute_conflicts : str, optional - How to handle attribute conflicts on datasets and variables. The - only currently supported option is 'ignore'. + compat : {'equals', 'identical'}, optional + String indicating how to compare variables of the same name for + potential conflicts. 'equals' means that all values and dimensions + must be the same; 'identical' means attributes must also be equal. Returns ------- @@ -819,40 +828,28 @@ def merge(self, other, inplace=False, overwrite_vars=None, Raises ------ ValueError - If any variables or dimensions conflict. Conflicting attributes + If any variables conflict. Conflicting variables attributes are silently dropped. - - Warning - ------- - The current interface and defaults for handling for conflicting - attributes is not ideal and very preliminary. Expect this behavior to - change in future pre-release versions of xray. See the discussion - on GitHub: https://github.com/akleeman/xray/issues/25 """ - if attribute_conflicts != 'ignore': - raise NotImplementedError - - other_variables, other_attributes = _get_dataset_vars_and_attr(other) + _assert_compat_valid(compat) + other_variables, other_attrs = _get_dataset_vars_and_attrs(other) # determine variables to check for conflicts - if overwrite_vars is None: + if not overwrite_vars: potential_conflicts = self.variables else: if isinstance(overwrite_vars, basestring): overwrite_vars = {overwrite_vars} + else: + overwrite_vars = set(overwrite_vars) potential_conflicts = {k: v for k, v in self.variables.iteritems() - if k not in overwrite_vars} + if k not in overwrite_vars} # update variables new_variables = _expand_variables(other_variables, potential_conflicts, - 'equals') + compat) obj = self if inplace else self.copy() obj._update_vars_and_dims(new_variables, needs_copy=inplace) - - # remove conflicting attributes - for k, v in other_attributes.iteritems(): - if k in obj.attributes and v != obj.attributes[k]: - del obj.attributes[k] return obj def select(self, *names): @@ -871,7 +868,7 @@ def select(self, *names): the names variables and their coordinates are included. """ variables = OrderedDict((k, self[k]) for k in names) - return type(self)(variables, self.attributes) + return type(self)(variables, self.attrs) def unselect(self, *names): """Returns a new dataset without the named variables. @@ -896,7 +893,7 @@ def unselect(self, *names): if any(name in v.dimensions for name in names)} variables = OrderedDict((k, v) for k, v in self.variables.iteritems() if k not in drop) - return type(self)(variables, self.attributes) + return type(self)(variables, self.attrs) def groupby(self, group, squeeze=True): """Group this dataset by unique values of the indicated group. @@ -913,8 +910,7 @@ def groupby(self, group, squeeze=True): Returns ------- - grouped : GroupBy - A `GroupBy` object patterned after `pandas.GroupBy` that can be + grouped : GroupBy A `GroupBy` object patterned after `pandas.GroupBy` that can be iterated over in the form of `(unique_value, grouped_array)` pairs. """ if isinstance(group, basestring): @@ -950,7 +946,7 @@ def squeeze(self, dimension=None): @classmethod def concat(cls, datasets, dimension='concat_dimension', indexers=None, - mode='different', concat_over=None): + mode='different', concat_over=None, compat='equals'): """Concatenate datasets along a new or existing dimension. Parameters @@ -980,6 +976,12 @@ def concat(cls, datasets, dimension='concat_dimension', indexers=None, concat_over : None or str or iterable of str, optional Names of additional variables to concatenate, in which "dimension" does not already appear as a dimension. + compat : {'equals', 'identical'}, optional + String indicating how to compare non-concatenated variables and + dataset global attributes for potential conflicts. 'equals' means + that all variable values and dimensions must be the same; + 'identical' means that variable attributes and global attributes + must also be equal. Returns ------- @@ -990,6 +992,8 @@ def concat(cls, datasets, dimension='concat_dimension', indexers=None, -------- DataArray.concat """ + _assert_compat_valid(compat) + # don't bother trying to work with datasets as a generator instead of a # list; the gains would be minimal datasets = list(map(as_dataset, datasets)) @@ -1039,7 +1043,7 @@ def differs(vname, v): concat_over.add(k) # create the new dataset and add constant variables - concatenated = cls({}, datasets[0].attributes) + concatenated = cls({}, datasets[0].attrs) for k, v in datasets[0].iteritems(): if k not in concat_over: concatenated[k] = v @@ -1047,16 +1051,17 @@ def differs(vname, v): # check that global attributes and non-concatenated variables are fixed # across all datasets for ds in datasets[1:]: - # TODO: remove attribute checks? (identical -> equals) - if not utils.dict_equal(ds.attributes, concatenated.attributes): + if (compat == 'identical' + and not utils.dict_equal(ds.attrs, concatenated.attrs)): raise ValueError('dataset global attributes not equal') for k, v in ds.variables.iteritems(): if k not in concatenated and k not in concat_over: raise ValueError('encountered unexpected variable %r' % k) elif (k in concatenated and k != dim_name and - not v.identical(concatenated[k])): + not getattr(v, compat)(concatenated[k])): + verb = 'equal' if compat == 'equals' else compat raise ValueError( - 'variable %r not identical across datasets' % k) + 'variable %r not %s across datasets' % (k, verb)) # stack up each variable to fill-out the dataset for k in concat_over: diff --git a/xray/groupby.py b/xray/groupby.py index c788658d8f9..ae58dd88c47 100644 --- a/xray/groupby.py +++ b/xray/groupby.py @@ -173,7 +173,7 @@ def _iter_grouped_shortcut(self): def _combine_shortcut(self, applied, concat_dim, indexers): stacked = variable.Variable.concat( applied, concat_dim, indexers, shortcut=True) - stacked.attributes.update(self.obj.attributes) + stacked.attrs.update(self.obj.attrs) name = self.obj.name ds = self.obj.dataset.unselect(name) diff --git a/xray/utils.py b/xray/utils.py index bbc217ad2cc..33eb7cc7aef 100644 --- a/xray/utils.py +++ b/xray/utils.py @@ -177,29 +177,6 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): return np.isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all() -def variable_allclose(v1, v2, rtol=1e-05, atol=1e-08): - """True if two objects have the same dimensions, attributes and data; - otherwise False. - - This function is necessary because `v1 == v2` for XArrays and DataArrays - does element-wise comparisions (like numpy.ndarrays). - """ - def data_equiv(arr1, arr2): - exact_dtypes = [np.datetime64, np.timedelta64, np.string_] - if any(any(np.issubdtype(arr.dtype, t) for t in exact_dtypes) - or arr.dtype == object for arr in [arr1, arr2]): - return np.array_equal(arr1, arr2) - else: - return allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol) - - v1, v2 = map(variable.as_variable, [v1, v2]) - return (v1.dimensions == v2.dimensions - and dict_equal(v1.attributes, v2.attributes) - and (v1._data is v2._data or data_equiv(v1.values, v2.values))) - -xarray_allclose = function_alias(variable_allclose, 'xarray_allclose') - - def array_equiv(arr1, arr2): """Like np.array_equal, but also allows values to be NaN in both arrays """ diff --git a/xray/variable.py b/xray/variable.py index 89a46393597..1da91b3b894 100644 --- a/xray/variable.py +++ b/xray/variable.py @@ -220,7 +220,7 @@ def data(self, value): def to_coord(self): """Return this variable as a Coordinate""" - return Coordinate(self.dimensions, self._data, self.attributes, + return Coordinate(self.dimensions, self._data, self.attrs, encoding=self.encoding) @property @@ -269,7 +269,7 @@ def __getitem__(self, key): assert values.ndim == len(dimensions) else: assert len(dimensions) == 0 - return type(self)(dimensions, values, self.attributes, self.encoding) + return type(self)(dimensions, values, self.attrs, self.encoding) def __setitem__(self, key, value): """__setitem__ is overloaded to access the underlying numpy values with @@ -281,16 +281,18 @@ def __setitem__(self, key, value): @property def attributes(self): - """Dictionary of local attributes on this variable. - """ + utils.alias_warning('attributes', 'attrs', 3) return self._attributes @attributes.setter def attributes(self, value): + utils.alias_warning('attributes', 'attrs', 3) self._attributes = value @property def attrs(self): + """Dictionary of local attributes on this variable. + """ return self._attributes @attrs.setter @@ -307,8 +309,7 @@ def copy(self, deep=True): # note: # dimensions is already an immutable tuple # attributes and encoding will be copied when the new Array is created - return type(self)(self.dimensions, data, self.attributes, - self.encoding) + return type(self)(self.dimensions, data, self.attrs, self.encoding) def __copy__(self): return self.copy(deep=False) @@ -378,7 +379,7 @@ def transpose(self, *dimensions): dimensions = self.dimensions[::-1] axes = self.get_axis_num(dimensions) data = self.values.transpose(*axes) - return type(self)(dimensions, data, self.attributes, self.encoding) + return type(self)(dimensions, data, self.attrs, self.encoding) def squeeze(self, dimension=None): """Return a new Variable object with squeezed data. @@ -437,28 +438,16 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): raise ValueError("cannot supply both 'axis' and 'dimension' " "arguments") - # reduce the data if dimension is not None: axis = self.get_axis_num(dimension) data = func(self.values, axis=axis, **kwargs) - # construct the new Variable removed_axes = (range(self.ndim) if axis is None else np.atleast_1d(axis) % self.ndim) - dims = [dim for axis, dim in enumerate(self.dimensions) - if axis not in removed_axes] - var = Variable(dims, data, _math_safe_attributes(self.attributes)) - - # update 'cell_methods' according to CF conventions - removed_dims = [dim for axis, dim in enumerate(self.dimensions) - if axis in removed_axes] - summary = '%s: %s' % (': '.join(removed_dims), func.__name__) - if 'cell_methods' in var.attributes: - var.attributes['cell_methods'] += ' ' + summary - else: - var.attributes['cell_methods'] = summary + dims = [dim for n, dim in enumerate(self.dimensions) + if n not in removed_axes] - return var + return Variable(dims, data) @classmethod def concat(cls, variables, dimension='stacked_dimension', @@ -538,7 +527,7 @@ def concat(cls, variables, dimension='stacked_dimension', dims = (dimension,) + first_var.dimensions concatenated = cls(dims, np.empty(shape, dtype=first_var.dtype)) - concatenated.attributes.update(first_var.attributes) + concatenated.attrs.update(first_var.attrs) alt_dims = tuple(d for d in dims if d != dimension) @@ -552,8 +541,7 @@ def concat(cls, variables, dimension='stacked_dimension', var = var.transpose(*concatenated.dimensions) elif var.dimensions != alt_dims: raise ValueError('inconsistent dimensions') - utils.remove_incompatible_items(concatenated.attributes, - var.attributes) + utils.remove_incompatible_items(concatenated.attrs, var.attrs) key = tuple(indexer if n == axis else slice(None) for n in range(concatenated.ndim)) @@ -583,20 +571,19 @@ def identical(self, other): """Like equals, but also checks attributes. """ try: - return (utils.dict_equal(self.attributes, other.attributes) + return (utils.dict_equal(self.attrs, other.attrs) and self.equals(other)) except AttributeError: return False def __array_wrap__(self, obj, context=None): - return Variable(self.dimensions, obj, self.attributes) + return Variable(self.dimensions, obj) @staticmethod def _unary_op(f): @functools.wraps(f) def func(self, *args, **kwargs): - return Variable(self.dimensions, f(self.values, *args, **kwargs), - _math_safe_attributes(self.attributes)) + return Variable(self.dimensions, f(self.values, *args, **kwargs)) return func @staticmethod @@ -609,12 +596,7 @@ def func(self, other): new_data = (f(self_data, other_data) if not reflexive else f(other_data, self_data)) - new_attr = _math_safe_attributes(self.attributes) - # TODO: reconsider handling of conflicting attributes - if hasattr(other, 'attributes'): - new_attr = utils.ordered_dict_intersection( - new_attr, _math_safe_attributes(other.attributes)) - return Variable(dims, new_data, new_attr) + return Variable(dims, new_data) return func @staticmethod @@ -626,9 +608,6 @@ def func(self, other): raise ValueError('dimensions cannot change for in-place ' 'operations') self.values = f(self_data, other_data) - if hasattr(other, 'attributes'): - utils.remove_incompatible_items( - self.attributes, _math_safe_attributes(other.attributes)) return self return func @@ -653,9 +632,9 @@ def __init__(self, *args, **kwargs): def __getitem__(self, key): values = self._data[key] if not hasattr(values, 'ndim') or values.ndim == 0: - return Variable((), values, self.attributes, self.encoding) + return Variable((), values, self.attrs, self.encoding) else: - return type(self)(self.dimensions, values, self.attributes, + return type(self)(self.dimensions, values, self.attrs, self.encoding) def __setitem__(self, key, value): @@ -670,8 +649,7 @@ def copy(self, deep=True): # there is no need to copy the index values here even if deep=True # since pandas.Index objects are immutable data = PandasIndexAdapter(self) if deep else self._data - return type(self)(self.dimensions, data, self.attributes, - self.encoding) + return type(self)(self.dimensions, data, self.attrs, self.encoding) @property def as_index(self): @@ -714,11 +692,6 @@ def is_numeric(self): return self.as_index.is_numeric() -def _math_safe_attributes(attributes): - return OrderedDict((k, v) for k, v in attributes.iteritems() - if k not in ['units']) - - def broadcast_variables(first, second): """Given two Variables, return two Variables with matching dimensions and numpy broadcast compatible data. @@ -760,14 +733,13 @@ def broadcast_variables(first, second): # expand first_data's dimensions so it's broadcast compatible after # adding second's dimensions at the end first_data = first.values[(Ellipsis,) + (None,) * len(second_only_dims)] - new_first = Variable(dimensions, first_data, first.attributes, - first.encoding) + new_first = Variable(dimensions, first_data, first.attrs, first.encoding) # expand and reorder second_data so the dimensions line up first_only_dims = [d for d in dimensions if d not in second.dimensions] second_dims = list(second.dimensions) + first_only_dims second_data = second.values[(Ellipsis,) + (None,) * len(first_only_dims)] - new_second = Variable(second_dims, second_data, first.attributes, - second.encoding).transpose(*dimensions) + new_second = Variable(second_dims, second_data, second.attrs, + second.encoding).transpose(*dimensions) return new_first, new_second